1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -stop-after=si-fix-sgpr-copies -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s
5 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) {
6 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset
7 ; GFX908: bb.0 (%ir-block.0):
8 ; GFX908-NEXT: successors: %bb.1(0x80000000)
9 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
11 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
12 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
13 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
14 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
15 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
16 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
17 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
18 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
19 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
20 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
21 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
22 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
23 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
24 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
27 ; GFX908-NEXT: successors: %bb.2(0x80000000)
29 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
30 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
31 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
32 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
33 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
34 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
35 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
36 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
37 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
38 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
39 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
40 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
41 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
42 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
45 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
47 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
48 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
49 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
52 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
53 ; GFX908-NEXT: SI_RETURN
54 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
59 define void @raw_tbuffer_store_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) {
60 ; GFX908-LABEL: name: raw_tbuffer_store_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset
61 ; GFX908: bb.0 (%ir-block.0):
62 ; GFX908-NEXT: successors: %bb.1(0x80000000)
63 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
65 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr7
66 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr6
67 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr5
68 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr4
69 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr3
70 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2
71 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
72 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
73 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
74 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
75 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
76 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
77 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
78 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
79 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
80 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
81 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
82 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
83 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
86 ; GFX908-NEXT: successors: %bb.2(0x80000000)
88 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
89 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
90 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
91 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
92 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
93 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
94 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
95 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
96 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
97 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
98 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
99 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
100 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
101 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
102 ; GFX908-NEXT: {{ $}}
104 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
105 ; GFX908-NEXT: {{ $}}
106 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE4]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s64), align 1, addrspace 8)
107 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
108 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
109 ; GFX908-NEXT: {{ $}}
111 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
112 ; GFX908-NEXT: SI_RETURN
113 call void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
118 define void @raw_tbuffer_store_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) {
119 ; GFX908-LABEL: name: raw_tbuffer_store_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset
120 ; GFX908: bb.0 (%ir-block.0):
121 ; GFX908-NEXT: successors: %bb.1(0x80000000)
122 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
123 ; GFX908-NEXT: {{ $}}
124 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8
125 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr7
126 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr6
127 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr5
128 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
129 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3
130 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
131 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
132 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
133 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
134 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
135 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
136 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
137 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
138 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
139 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
140 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
141 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1, [[COPY6]], %subreg.sub2
142 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]]
143 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
144 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
145 ; GFX908-NEXT: {{ $}}
147 ; GFX908-NEXT: successors: %bb.2(0x80000000)
148 ; GFX908-NEXT: {{ $}}
149 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
150 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
151 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
152 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
153 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
154 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
155 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
156 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
157 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
158 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
159 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
160 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
161 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
162 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
163 ; GFX908-NEXT: {{ $}}
165 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
166 ; GFX908-NEXT: {{ $}}
167 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[COPY9]], [[COPY1]], killed [[REG_SEQUENCE4]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 8)
168 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
169 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
170 ; GFX908-NEXT: {{ $}}
172 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
173 ; GFX908-NEXT: SI_RETURN
174 call void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
179 define void @raw_tbuffer_store_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) {
180 ; GFX908-LABEL: name: raw_tbuffer_store_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset
181 ; GFX908: bb.0 (%ir-block.0):
182 ; GFX908-NEXT: successors: %bb.1(0x80000000)
183 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
184 ; GFX908-NEXT: {{ $}}
185 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr9
186 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8
187 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr7
188 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr6
189 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr5
190 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr4
191 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3
192 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr2
193 ; GFX908-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr1
194 ; GFX908-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr0
195 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
196 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
197 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
198 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
199 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
200 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
201 ; GFX908-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
202 ; GFX908-NEXT: [[DEF6:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
203 ; GFX908-NEXT: [[DEF7:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
204 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY8]], %subreg.sub1, [[COPY7]], %subreg.sub2, [[COPY6]], %subreg.sub3
205 ; GFX908-NEXT: [[COPY10:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE1]]
206 ; GFX908-NEXT: [[DEF8:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
207 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
208 ; GFX908-NEXT: {{ $}}
210 ; GFX908-NEXT: successors: %bb.2(0x80000000)
211 ; GFX908-NEXT: {{ $}}
212 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
213 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
214 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
215 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
216 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
217 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
218 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
219 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
220 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
221 ; GFX908-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
222 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
223 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
224 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
225 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
226 ; GFX908-NEXT: {{ $}}
228 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
229 ; GFX908-NEXT: {{ $}}
230 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[COPY10]], [[COPY1]], killed [[REG_SEQUENCE4]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 8)
231 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
232 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
233 ; GFX908-NEXT: {{ $}}
235 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
236 ; GFX908-NEXT: SI_RETURN
237 call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
241 ; Copies for VGPR arguments
242 define void @raw_tbuffer_store_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffset(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) {
243 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffset
244 ; GFX908: bb.0 (%ir-block.0):
245 ; GFX908-NEXT: successors: %bb.1(0x80000000)
246 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
247 ; GFX908-NEXT: {{ $}}
248 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
249 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
250 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
251 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
252 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
253 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
254 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
255 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
256 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
257 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
258 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
259 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
260 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
261 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
262 ; GFX908-NEXT: {{ $}}
264 ; GFX908-NEXT: successors: %bb.2(0x80000000)
265 ; GFX908-NEXT: {{ $}}
266 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
267 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
268 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
269 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
270 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
271 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
272 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
273 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
274 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
275 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
276 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
277 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
278 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
279 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
280 ; GFX908-NEXT: {{ $}}
282 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
283 ; GFX908-NEXT: {{ $}}
284 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
285 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
286 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
287 ; GFX908-NEXT: {{ $}}
289 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
290 ; GFX908-NEXT: SI_RETURN
291 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
296 define void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) {
297 ; GFX908-LABEL: name: raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffset
298 ; GFX908: bb.0 (%ir-block.0):
299 ; GFX908-NEXT: successors: %bb.1(0x80000000)
300 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
301 ; GFX908-NEXT: {{ $}}
302 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
303 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
304 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
305 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
306 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
307 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
308 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
309 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
310 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
311 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
312 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
313 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
314 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
315 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
316 ; GFX908-NEXT: {{ $}}
318 ; GFX908-NEXT: successors: %bb.2(0x80000000)
319 ; GFX908-NEXT: {{ $}}
320 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
321 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
322 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
323 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
324 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
325 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
326 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
327 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
328 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
329 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
330 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
331 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
332 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
333 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
334 ; GFX908-NEXT: {{ $}}
336 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
337 ; GFX908-NEXT: {{ $}}
338 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
339 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
340 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
341 ; GFX908-NEXT: {{ $}}
343 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
344 ; GFX908-NEXT: SI_RETURN
345 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 1)
349 ; Waterfall for rsrc and soffset
350 define void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffset(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) {
351 ; GFX908-LABEL: name: raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffset
352 ; GFX908: bb.0 (%ir-block.0):
353 ; GFX908-NEXT: successors: %bb.1(0x80000000)
354 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
355 ; GFX908-NEXT: {{ $}}
356 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
357 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
358 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
359 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
360 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
361 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
362 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
363 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
364 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
365 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
366 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
367 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
368 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
369 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
370 ; GFX908-NEXT: {{ $}}
372 ; GFX908-NEXT: successors: %bb.2(0x80000000)
373 ; GFX908-NEXT: {{ $}}
374 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
375 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
376 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
377 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
378 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
379 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
380 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
381 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
382 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
383 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
384 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
385 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
386 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
387 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
388 ; GFX908-NEXT: {{ $}}
390 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
391 ; GFX908-NEXT: {{ $}}
392 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
393 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
394 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
395 ; GFX908-NEXT: {{ $}}
397 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
398 ; GFX908-NEXT: SI_RETURN
399 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
403 ; Waterfall for rsrc and soffset, copy for voffset
404 define void @raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soffset(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) {
405 ; GFX908-LABEL: name: raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soffset
406 ; GFX908: bb.0 (%ir-block.0):
407 ; GFX908-NEXT: successors: %bb.1(0x80000000)
408 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
409 ; GFX908-NEXT: {{ $}}
410 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
411 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
412 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
413 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
414 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
415 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
416 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
417 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
418 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
419 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
420 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
421 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
422 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
423 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
424 ; GFX908-NEXT: {{ $}}
426 ; GFX908-NEXT: successors: %bb.2(0x80000000)
427 ; GFX908-NEXT: {{ $}}
428 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
429 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
430 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
431 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
432 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
433 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
434 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
435 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
436 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
437 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
438 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
439 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
440 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
441 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
442 ; GFX908-NEXT: {{ $}}
444 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
445 ; GFX908-NEXT: {{ $}}
446 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
447 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
448 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
449 ; GFX908-NEXT: {{ $}}
451 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
452 ; GFX908-NEXT: SI_RETURN
453 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
457 ; Natural mapping + glc
458 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) {
459 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc
460 ; GFX908: bb.0 (%ir-block.0):
461 ; GFX908-NEXT: successors: %bb.1(0x80000000)
462 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
463 ; GFX908-NEXT: {{ $}}
464 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
465 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
466 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
467 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
468 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
469 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
470 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
471 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
472 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
473 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
474 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
475 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
476 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
477 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
478 ; GFX908-NEXT: {{ $}}
480 ; GFX908-NEXT: successors: %bb.2(0x80000000)
481 ; GFX908-NEXT: {{ $}}
482 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
483 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
484 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
485 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
486 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
487 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
488 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
489 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
490 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
491 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
492 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
493 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
494 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
495 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
496 ; GFX908-NEXT: {{ $}}
498 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
499 ; GFX908-NEXT: {{ $}}
500 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 1, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
501 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
502 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
503 ; GFX908-NEXT: {{ $}}
505 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
506 ; GFX908-NEXT: SI_RETURN
507 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1)
511 ; Natural mapping + slc
512 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) {
513 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
514 ; GFX908: bb.0 (%ir-block.0):
515 ; GFX908-NEXT: successors: %bb.1(0x80000000)
516 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
517 ; GFX908-NEXT: {{ $}}
518 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
519 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
520 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
521 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
522 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
523 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
524 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
525 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
526 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
527 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
528 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
529 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
530 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
531 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
532 ; GFX908-NEXT: {{ $}}
534 ; GFX908-NEXT: successors: %bb.2(0x80000000)
535 ; GFX908-NEXT: {{ $}}
536 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
537 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
538 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
539 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
540 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
541 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
542 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
543 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
544 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
545 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
546 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
547 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
548 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
549 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
550 ; GFX908-NEXT: {{ $}}
552 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
553 ; GFX908-NEXT: {{ $}}
554 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 2, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
555 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
556 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
557 ; GFX908-NEXT: {{ $}}
559 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
560 ; GFX908-NEXT: SI_RETURN
561 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2)
565 ; Natural mapping + glc + slc
566 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) {
567 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc
568 ; GFX908: bb.0 (%ir-block.0):
569 ; GFX908-NEXT: successors: %bb.1(0x80000000)
570 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
571 ; GFX908-NEXT: {{ $}}
572 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
573 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
574 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
575 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
576 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
577 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
578 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
579 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
580 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
581 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
582 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
583 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
584 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
585 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
586 ; GFX908-NEXT: {{ $}}
588 ; GFX908-NEXT: successors: %bb.2(0x80000000)
589 ; GFX908-NEXT: {{ $}}
590 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
591 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
592 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
593 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
594 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
595 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
596 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
597 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
598 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
599 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
600 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
601 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
602 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
603 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
604 ; GFX908-NEXT: {{ $}}
606 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
607 ; GFX908-NEXT: {{ $}}
608 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 3, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
609 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
610 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
611 ; GFX908-NEXT: {{ $}}
613 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
614 ; GFX908-NEXT: SI_RETURN
615 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3)
619 ; Natural mapping + dlc
620 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset) {
621 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc
622 ; GFX908: bb.0 (%ir-block.0):
623 ; GFX908-NEXT: successors: %bb.1(0x80000000)
624 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
625 ; GFX908-NEXT: {{ $}}
626 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
627 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
628 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
629 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
630 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
631 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
632 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
633 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
634 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
635 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
636 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
637 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
638 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
639 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
640 ; GFX908-NEXT: {{ $}}
642 ; GFX908-NEXT: successors: %bb.2(0x80000000)
643 ; GFX908-NEXT: {{ $}}
644 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
645 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
646 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
647 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
648 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
649 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
650 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
651 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
652 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
653 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
654 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
655 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
656 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
657 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
658 ; GFX908-NEXT: {{ $}}
660 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
661 ; GFX908-NEXT: {{ $}}
662 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 78, 4, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
663 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
664 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
665 ; GFX908-NEXT: {{ $}}
667 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
668 ; GFX908-NEXT: SI_RETURN
669 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
673 define void @raw_tbuffer_store_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffset__voffset0(float %val, <4 x i32> %rsrc, i32 %soffset) {
674 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffset__voffset0
675 ; GFX908: bb.0 (%ir-block.0):
676 ; GFX908-NEXT: successors: %bb.1(0x80000000)
677 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
678 ; GFX908-NEXT: {{ $}}
679 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
680 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
681 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
682 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
683 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
684 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
685 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
686 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
687 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
688 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
689 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
690 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
691 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
692 ; GFX908-NEXT: {{ $}}
694 ; GFX908-NEXT: successors: %bb.2(0x80000000)
695 ; GFX908-NEXT: {{ $}}
696 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
697 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
698 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
699 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
700 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
701 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
702 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
703 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
704 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
705 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
706 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
707 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
708 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
709 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
710 ; GFX908-NEXT: {{ $}}
712 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
713 ; GFX908-NEXT: {{ $}}
714 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY5]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
715 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
716 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
717 ; GFX908-NEXT: {{ $}}
719 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
720 ; GFX908-NEXT: SI_RETURN
721 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 94, i32 0)
725 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4095(float %val, <4 x i32> %rsrc, i32 %soffset) {
726 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4095
727 ; GFX908: bb.0 (%ir-block.0):
728 ; GFX908-NEXT: successors: %bb.1(0x80000000)
729 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
730 ; GFX908-NEXT: {{ $}}
731 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
732 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
733 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
734 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
735 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
736 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
737 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
738 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
739 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
740 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
741 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
742 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
743 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
744 ; GFX908-NEXT: {{ $}}
746 ; GFX908-NEXT: successors: %bb.2(0x80000000)
747 ; GFX908-NEXT: {{ $}}
748 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
749 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
750 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
751 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
752 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
753 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
754 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
755 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
756 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
757 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
758 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
759 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
760 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
761 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
762 ; GFX908-NEXT: {{ $}}
764 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
765 ; GFX908-NEXT: {{ $}}
766 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY5]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
767 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
768 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
769 ; GFX908-NEXT: {{ $}}
771 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
772 ; GFX908-NEXT: SI_RETURN
773 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 94, i32 0)
777 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4096(float %val, <4 x i32> %rsrc, i32 %soffset) {
778 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset4096
779 ; GFX908: bb.0 (%ir-block.0):
780 ; GFX908-NEXT: successors: %bb.1(0x80000000)
781 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
782 ; GFX908-NEXT: {{ $}}
783 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
784 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
785 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
786 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
787 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
788 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
789 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
790 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
791 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
792 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
793 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
794 ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
795 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
796 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
797 ; GFX908-NEXT: {{ $}}
799 ; GFX908-NEXT: successors: %bb.2(0x80000000)
800 ; GFX908-NEXT: {{ $}}
801 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
802 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
803 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
804 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
805 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
806 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
807 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
808 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
809 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
810 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
811 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
812 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
813 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
814 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
815 ; GFX908-NEXT: {{ $}}
817 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
818 ; GFX908-NEXT: {{ $}}
819 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY5]], [[V_MOV_B32_e32_]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
820 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
821 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
822 ; GFX908-NEXT: {{ $}}
824 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
825 ; GFX908-NEXT: SI_RETURN
826 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 94, i32 0)
830 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add16(float %val, <4 x i32> %rsrc, i32 %voffset.base, i32 %soffset) {
831 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add16
832 ; GFX908: bb.0 (%ir-block.0):
833 ; GFX908-NEXT: successors: %bb.1(0x80000000)
834 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
835 ; GFX908-NEXT: {{ $}}
836 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
837 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
838 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
839 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
840 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
841 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
842 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
843 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
844 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
845 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
846 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
847 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
848 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
849 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
850 ; GFX908-NEXT: {{ $}}
852 ; GFX908-NEXT: successors: %bb.2(0x80000000)
853 ; GFX908-NEXT: {{ $}}
854 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
855 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
856 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
857 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
858 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
859 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
860 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
861 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
862 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
863 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
864 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
865 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
866 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
867 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
868 ; GFX908-NEXT: {{ $}}
870 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
871 ; GFX908-NEXT: {{ $}}
872 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 16, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
873 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
874 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
875 ; GFX908-NEXT: {{ $}}
877 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
878 ; GFX908-NEXT: SI_RETURN
879 %voffset = add i32 %voffset.base, 16
880 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
884 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095(float %val, <4 x i32> %rsrc, i32 %voffset.base, i32 %soffset) {
885 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095
886 ; GFX908: bb.0 (%ir-block.0):
887 ; GFX908-NEXT: successors: %bb.1(0x80000000)
888 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
889 ; GFX908-NEXT: {{ $}}
890 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
891 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
892 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
893 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
894 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
895 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
896 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
897 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
898 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
899 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
900 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
901 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
902 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
903 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
904 ; GFX908-NEXT: {{ $}}
906 ; GFX908-NEXT: successors: %bb.2(0x80000000)
907 ; GFX908-NEXT: {{ $}}
908 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
909 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
910 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
911 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
912 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
913 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
914 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
915 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
916 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
917 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
918 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
919 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
920 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
921 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
922 ; GFX908-NEXT: {{ $}}
924 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
925 ; GFX908-NEXT: {{ $}}
926 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 4095, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
927 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
928 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
929 ; GFX908-NEXT: {{ $}}
931 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
932 ; GFX908-NEXT: SI_RETURN
933 %voffset = add i32 %voffset.base, 4095
934 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
938 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4096(float %val, <4 x i32> %rsrc, i32 %voffset.base, i32 %soffset) {
939 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4096
940 ; GFX908: bb.0 (%ir-block.0):
941 ; GFX908-NEXT: successors: %bb.1(0x80000000)
942 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
943 ; GFX908-NEXT: {{ $}}
944 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
945 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
946 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
947 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
948 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
949 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
950 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
951 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
952 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
953 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
954 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
955 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
956 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
957 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], killed [[S_MOV_B32_]], 0, implicit $exec
958 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
959 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
960 ; GFX908-NEXT: {{ $}}
962 ; GFX908-NEXT: successors: %bb.2(0x80000000)
963 ; GFX908-NEXT: {{ $}}
964 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
965 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
966 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
967 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
968 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
969 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
970 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
971 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
972 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
973 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
974 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
975 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
976 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
977 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
978 ; GFX908-NEXT: {{ $}}
980 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
981 ; GFX908-NEXT: {{ $}}
982 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[V_ADD_U32_e64_]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
983 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
984 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
985 ; GFX908-NEXT: {{ $}}
987 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
988 ; GFX908-NEXT: SI_RETURN
989 %voffset = add i32 %voffset.base, 4096
990 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
994 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4095(float %val, <4 x i32> %rsrc, i32 %voffset) {
995 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4095
996 ; GFX908: bb.0 (%ir-block.0):
997 ; GFX908-NEXT: successors: %bb.1(0x80000000)
998 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
999 ; GFX908-NEXT: {{ $}}
1000 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
1001 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1002 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1003 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1004 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1005 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1006 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1007 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1008 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1009 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1010 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
1011 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
1012 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1013 ; GFX908-NEXT: {{ $}}
1014 ; GFX908-NEXT: bb.1:
1015 ; GFX908-NEXT: successors: %bb.2(0x80000000)
1016 ; GFX908-NEXT: {{ $}}
1017 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
1018 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
1019 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
1020 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
1021 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
1022 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
1023 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
1024 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
1025 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
1026 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
1027 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
1028 ; GFX908-NEXT: {{ $}}
1029 ; GFX908-NEXT: bb.2:
1030 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
1031 ; GFX908-NEXT: {{ $}}
1032 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY5]], [[COPY]], killed [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
1033 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1034 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
1035 ; GFX908-NEXT: {{ $}}
1036 ; GFX908-NEXT: bb.3:
1037 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
1038 ; GFX908-NEXT: SI_RETURN
1039 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 94, i32 0)
1043 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4096(float %val, <4 x i32> %rsrc, i32 %voffset) {
1044 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset4096
1045 ; GFX908: bb.0 (%ir-block.0):
1046 ; GFX908-NEXT: successors: %bb.1(0x80000000)
1047 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
1048 ; GFX908-NEXT: {{ $}}
1049 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
1050 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1051 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1052 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1053 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1054 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1055 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1056 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1057 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1058 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1059 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
1060 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
1061 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1062 ; GFX908-NEXT: {{ $}}
1063 ; GFX908-NEXT: bb.1:
1064 ; GFX908-NEXT: successors: %bb.2(0x80000000)
1065 ; GFX908-NEXT: {{ $}}
1066 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
1067 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
1068 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
1069 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
1070 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
1071 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
1072 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
1073 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
1074 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
1075 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
1076 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
1077 ; GFX908-NEXT: {{ $}}
1078 ; GFX908-NEXT: bb.2:
1079 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
1080 ; GFX908-NEXT: {{ $}}
1081 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY5]], [[COPY]], killed [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
1082 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1083 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
1084 ; GFX908-NEXT: {{ $}}
1085 ; GFX908-NEXT: bb.3:
1086 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
1087 ; GFX908-NEXT: SI_RETURN
1088 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 94, i32 0)
1092 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add16(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset.base) {
1093 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add16
1094 ; GFX908: bb.0 (%ir-block.0):
1095 ; GFX908-NEXT: successors: %bb.1(0x80000000)
1096 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
1097 ; GFX908-NEXT: {{ $}}
1098 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
1099 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
1100 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1101 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1102 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1103 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1104 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1105 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1106 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1107 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1108 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1109 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
1110 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
1111 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_]], 0, implicit $exec
1112 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1113 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1114 ; GFX908-NEXT: {{ $}}
1115 ; GFX908-NEXT: bb.1:
1116 ; GFX908-NEXT: successors: %bb.2(0x80000000)
1117 ; GFX908-NEXT: {{ $}}
1118 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
1119 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
1120 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
1121 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
1122 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
1123 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
1124 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
1125 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
1126 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
1127 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
1128 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_ADD_U32_e64_]], implicit $exec
1129 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[V_ADD_U32_e64_]], implicit $exec
1130 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
1131 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
1132 ; GFX908-NEXT: {{ $}}
1133 ; GFX908-NEXT: bb.2:
1134 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
1135 ; GFX908-NEXT: {{ $}}
1136 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
1137 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1138 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
1139 ; GFX908-NEXT: {{ $}}
1140 ; GFX908-NEXT: bb.3:
1141 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
1142 ; GFX908-NEXT: SI_RETURN
1143 %soffset = add i32 %soffset.base, 16
1144 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
1148 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4095(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset.base) {
1149 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4095
1150 ; GFX908: bb.0 (%ir-block.0):
1151 ; GFX908-NEXT: successors: %bb.1(0x80000000)
1152 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
1153 ; GFX908-NEXT: {{ $}}
1154 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
1155 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
1156 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1157 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1158 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1159 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1160 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1161 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1162 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1163 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1164 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1165 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
1166 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
1167 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_]], 0, implicit $exec
1168 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1169 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1170 ; GFX908-NEXT: {{ $}}
1171 ; GFX908-NEXT: bb.1:
1172 ; GFX908-NEXT: successors: %bb.2(0x80000000)
1173 ; GFX908-NEXT: {{ $}}
1174 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
1175 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
1176 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
1177 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
1178 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
1179 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
1180 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
1181 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
1182 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
1183 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
1184 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_ADD_U32_e64_]], implicit $exec
1185 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[V_ADD_U32_e64_]], implicit $exec
1186 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
1187 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
1188 ; GFX908-NEXT: {{ $}}
1189 ; GFX908-NEXT: bb.2:
1190 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
1191 ; GFX908-NEXT: {{ $}}
1192 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
1193 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1194 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
1195 ; GFX908-NEXT: {{ $}}
1196 ; GFX908-NEXT: bb.3:
1197 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
1198 ; GFX908-NEXT: SI_RETURN
1199 %soffset = add i32 %soffset.base, 4095
1200 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
1204 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4096(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset.base) {
1205 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add4096
1206 ; GFX908: bb.0 (%ir-block.0):
1207 ; GFX908-NEXT: successors: %bb.1(0x80000000)
1208 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
1209 ; GFX908-NEXT: {{ $}}
1210 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
1211 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
1212 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1213 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1214 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1215 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1216 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1217 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1218 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1219 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1220 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1221 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
1222 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
1223 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_]], 0, implicit $exec
1224 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1225 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1226 ; GFX908-NEXT: {{ $}}
1227 ; GFX908-NEXT: bb.1:
1228 ; GFX908-NEXT: successors: %bb.2(0x80000000)
1229 ; GFX908-NEXT: {{ $}}
1230 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
1231 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
1232 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
1233 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
1234 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
1235 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
1236 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
1237 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
1238 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
1239 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
1240 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_ADD_U32_e64_]], implicit $exec
1241 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[V_ADD_U32_e64_]], implicit $exec
1242 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
1243 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
1244 ; GFX908-NEXT: {{ $}}
1245 ; GFX908-NEXT: bb.2:
1246 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
1247 ; GFX908-NEXT: {{ $}}
1248 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
1249 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1250 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
1251 ; GFX908-NEXT: {{ $}}
1252 ; GFX908-NEXT: bb.3:
1253 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
1254 ; GFX908-NEXT: SI_RETURN
1255 %soffset = add i32 %soffset.base, 4096
1256 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
1260 ; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop.
1261 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add5000(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset.base) {
1262 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_soffset_add5000
1263 ; GFX908: bb.0 (%ir-block.0):
1264 ; GFX908-NEXT: successors: %bb.1(0x80000000)
1265 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
1266 ; GFX908-NEXT: {{ $}}
1267 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
1268 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
1269 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1270 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1271 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1272 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1273 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1274 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1275 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1276 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1277 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1278 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
1279 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000
1280 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], killed [[S_MOV_B32_]], 0, implicit $exec
1281 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1282 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1283 ; GFX908-NEXT: {{ $}}
1284 ; GFX908-NEXT: bb.1:
1285 ; GFX908-NEXT: successors: %bb.2(0x80000000)
1286 ; GFX908-NEXT: {{ $}}
1287 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
1288 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
1289 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
1290 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
1291 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
1292 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
1293 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
1294 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
1295 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
1296 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
1297 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[V_ADD_U32_e64_]], implicit $exec
1298 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[V_ADD_U32_e64_]], implicit $exec
1299 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
1300 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
1301 ; GFX908-NEXT: {{ $}}
1302 ; GFX908-NEXT: bb.2:
1303 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
1304 ; GFX908-NEXT: {{ $}}
1305 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
1306 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1307 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
1308 ; GFX908-NEXT: {{ $}}
1309 ; GFX908-NEXT: bb.3:
1310 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
1311 ; GFX908-NEXT: SI_RETURN
1312 %soffset = add i32 %soffset.base, 5000
1313 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
1317 ; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop.
1318 define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add5000(float %val, <4 x i32> %rsrc, i32 %voffset.base, i32 %soffset) {
1319 ; GFX908-LABEL: name: raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset_add5000
1320 ; GFX908: bb.0 (%ir-block.0):
1321 ; GFX908-NEXT: successors: %bb.1(0x80000000)
1322 ; GFX908-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
1323 ; GFX908-NEXT: {{ $}}
1324 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr6
1325 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr5
1326 ; GFX908-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
1327 ; GFX908-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
1328 ; GFX908-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2
1329 ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
1330 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1331 ; GFX908-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1332 ; GFX908-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1333 ; GFX908-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1334 ; GFX908-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
1335 ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
1336 ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
1337 ; GFX908-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY1]], killed [[S_MOV_B32_]], 0, implicit $exec
1338 ; GFX908-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
1339 ; GFX908-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
1340 ; GFX908-NEXT: {{ $}}
1341 ; GFX908-NEXT: bb.1:
1342 ; GFX908-NEXT: successors: %bb.2(0x80000000)
1343 ; GFX908-NEXT: {{ $}}
1344 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec
1345 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec
1346 ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
1347 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec
1348 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec
1349 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec
1350 ; GFX908-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
1351 ; GFX908-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec
1352 ; GFX908-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc
1353 ; GFX908-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
1354 ; GFX908-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[COPY]], implicit $exec
1355 ; GFX908-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY]], implicit $exec
1356 ; GFX908-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def $scc
1357 ; GFX908-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
1358 ; GFX908-NEXT: {{ $}}
1359 ; GFX908-NEXT: bb.2:
1360 ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000)
1361 ; GFX908-NEXT: {{ $}}
1362 ; GFX908-NEXT: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY6]], [[V_ADD_U32_e64_]], killed [[REG_SEQUENCE3]], killed [[V_READFIRSTLANE_B32_4]], 904, 94, 0, 0, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 8)
1363 ; GFX908-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
1364 ; GFX908-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
1365 ; GFX908-NEXT: {{ $}}
1366 ; GFX908-NEXT: bb.3:
1367 ; GFX908-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
1368 ; GFX908-NEXT: SI_RETURN
1369 %voffset = add i32 %voffset.base, 5000
1370 call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
1374 declare void @llvm.amdgcn.raw.tbuffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
1375 declare void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
1376 declare void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
1377 declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)