1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s
7 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
9 define <3 x i32> @v_load_constant_v3i32_align1(ptr addrspace(4) %ptr) {
10 ; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align1:
11 ; GFX9-UNALIGNED: ; %bb.0:
12 ; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13 ; GFX9-UNALIGNED-NEXT: global_load_dwordx3 v[0:2], v[0:1], off
14 ; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
15 ; GFX9-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
17 ; GFX9-NOUNALIGNED-LABEL: v_load_constant_v3i32_align1:
18 ; GFX9-NOUNALIGNED: ; %bb.0:
19 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v2, v[0:1], off
21 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v3, v[0:1], off offset:1
22 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v4, v[0:1], off offset:2
23 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v5, v[0:1], off offset:3
24 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v6, v[0:1], off offset:4
25 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v7, v[0:1], off offset:5
26 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v8, v[0:1], off offset:6
27 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v9, v[0:1], off offset:7
28 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v10, v[0:1], off offset:8
29 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v11, v[0:1], off offset:9
30 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v12, v[0:1], off offset:11
31 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v13, v[0:1], off offset:10
32 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10)
33 ; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v0, v3, 8, v2
34 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9)
35 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v4
36 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8)
37 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 24, v5
38 ; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v1, v2, v0
39 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6)
40 ; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v3, v7, 8, v6
41 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5)
42 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v8
43 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4)
44 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 24, v9
45 ; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v1, v4, v5, v3
46 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2)
47 ; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v6, v11, 8, v10
48 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1)
49 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 24, v12
50 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0)
51 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 16, v13
52 ; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v7, v8, v6
53 ; GFX9-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31]
55 ; GFX7-UNALIGNED-LABEL: v_load_constant_v3i32_align1:
56 ; GFX7-UNALIGNED: ; %bb.0:
57 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58 ; GFX7-UNALIGNED-NEXT: s_mov_b32 s6, 0
59 ; GFX7-UNALIGNED-NEXT: s_mov_b32 s7, 0xf000
60 ; GFX7-UNALIGNED-NEXT: s_mov_b64 s[4:5], 0
61 ; GFX7-UNALIGNED-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
62 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
63 ; GFX7-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
65 ; GFX7-NOUNALIGNED-LABEL: v_load_constant_v3i32_align1:
66 ; GFX7-NOUNALIGNED: ; %bb.0:
67 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68 ; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s6, 0
69 ; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s7, 0xf000
70 ; GFX7-NOUNALIGNED-NEXT: s_mov_b64 s[4:5], 0
71 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v2, v[0:1], s[4:7], 0 addr64 offset:1
72 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v3, v[0:1], s[4:7], 0 addr64 offset:3
73 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v4, v[0:1], s[4:7], 0 addr64 offset:2
74 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v5, v[0:1], s[4:7], 0 addr64 offset:5
75 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v6, v[0:1], s[4:7], 0 addr64 offset:7
76 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v7, v[0:1], s[4:7], 0 addr64 offset:6
77 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v8, v[0:1], s[4:7], 0 addr64 offset:9
78 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v9, v[0:1], s[4:7], 0 addr64 offset:11
79 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v10, v[0:1], s[4:7], 0 addr64 offset:10
80 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v11, v[0:1], s[4:7], 0 addr64
81 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v12, v[0:1], s[4:7], 0 addr64 offset:4
82 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:8
83 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(11)
84 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 8, v2
85 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10)
86 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v3
87 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9)
88 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 16, v4
89 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8)
90 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 8, v5
91 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(7)
92 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 24, v6
93 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6)
94 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v7
95 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5)
96 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 8, v8
97 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4)
98 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 24, v9
99 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3)
100 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 16, v10
101 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2)
102 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v11
103 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v2, v3
104 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1)
105 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v3, v4, v12
106 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v4, v5, v6
107 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0)
108 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v5, v7, v0
109 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v6, v8, v9
110 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v2, v1
111 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v4, v3
112 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v6, v5
113 ; GFX7-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31]
115 ; GFX6-LABEL: v_load_constant_v3i32_align1:
117 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118 ; GFX6-NEXT: s_mov_b32 s6, 0
119 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
120 ; GFX6-NEXT: s_mov_b64 s[4:5], 0
121 ; GFX6-NEXT: buffer_load_ubyte v2, v[0:1], s[4:7], 0 addr64 offset:1
122 ; GFX6-NEXT: buffer_load_ubyte v3, v[0:1], s[4:7], 0 addr64 offset:3
123 ; GFX6-NEXT: buffer_load_ubyte v4, v[0:1], s[4:7], 0 addr64 offset:2
124 ; GFX6-NEXT: buffer_load_ubyte v5, v[0:1], s[4:7], 0 addr64 offset:5
125 ; GFX6-NEXT: buffer_load_ubyte v6, v[0:1], s[4:7], 0 addr64 offset:7
126 ; GFX6-NEXT: buffer_load_ubyte v7, v[0:1], s[4:7], 0 addr64 offset:6
127 ; GFX6-NEXT: buffer_load_ubyte v8, v[0:1], s[4:7], 0 addr64 offset:9
128 ; GFX6-NEXT: buffer_load_ubyte v9, v[0:1], s[4:7], 0 addr64 offset:11
129 ; GFX6-NEXT: buffer_load_ubyte v10, v[0:1], s[4:7], 0 addr64 offset:10
130 ; GFX6-NEXT: buffer_load_ubyte v11, v[0:1], s[4:7], 0 addr64
131 ; GFX6-NEXT: buffer_load_ubyte v12, v[0:1], s[4:7], 0 addr64 offset:4
132 ; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:8
133 ; GFX6-NEXT: s_waitcnt vmcnt(11)
134 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v2
135 ; GFX6-NEXT: s_waitcnt vmcnt(10)
136 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3
137 ; GFX6-NEXT: s_waitcnt vmcnt(9)
138 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v4
139 ; GFX6-NEXT: s_waitcnt vmcnt(8)
140 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 8, v5
141 ; GFX6-NEXT: s_waitcnt vmcnt(7)
142 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 24, v6
143 ; GFX6-NEXT: s_waitcnt vmcnt(6)
144 ; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v7
145 ; GFX6-NEXT: s_waitcnt vmcnt(5)
146 ; GFX6-NEXT: v_lshlrev_b32_e32 v7, 8, v8
147 ; GFX6-NEXT: s_waitcnt vmcnt(4)
148 ; GFX6-NEXT: v_lshlrev_b32_e32 v8, 24, v9
149 ; GFX6-NEXT: s_waitcnt vmcnt(3)
150 ; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v10
151 ; GFX6-NEXT: s_waitcnt vmcnt(2)
152 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v11
153 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
154 ; GFX6-NEXT: s_waitcnt vmcnt(1)
155 ; GFX6-NEXT: v_or_b32_e32 v3, v4, v12
156 ; GFX6-NEXT: v_or_b32_e32 v4, v5, v6
157 ; GFX6-NEXT: s_waitcnt vmcnt(0)
158 ; GFX6-NEXT: v_or_b32_e32 v5, v7, v0
159 ; GFX6-NEXT: v_or_b32_e32 v6, v8, v9
160 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v1
161 ; GFX6-NEXT: v_or_b32_e32 v1, v4, v3
162 ; GFX6-NEXT: v_or_b32_e32 v2, v6, v5
163 ; GFX6-NEXT: s_setpc_b64 s[30:31]
164 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 1
168 define <3 x i32> @v_load_constant_v3i32_align2(ptr addrspace(4) %ptr) {
169 ; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align2:
170 ; GFX9-UNALIGNED: ; %bb.0:
171 ; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172 ; GFX9-UNALIGNED-NEXT: global_load_dwordx3 v[0:2], v[0:1], off
173 ; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
174 ; GFX9-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
176 ; GFX9-NOUNALIGNED-LABEL: v_load_constant_v3i32_align2:
177 ; GFX9-NOUNALIGNED: ; %bb.0:
178 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179 ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v2, v[0:1], off
180 ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v3, v[0:1], off offset:2
181 ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v4, v[0:1], off offset:4
182 ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v5, v[0:1], off offset:6
183 ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v6, v[0:1], off offset:8
184 ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v7, v[0:1], off offset:10
185 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4)
186 ; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v2
187 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2)
188 ; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v5, 16, v4
189 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0)
190 ; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v2, v7, 16, v6
191 ; GFX9-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31]
193 ; GFX7-UNALIGNED-LABEL: v_load_constant_v3i32_align2:
194 ; GFX7-UNALIGNED: ; %bb.0:
195 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
196 ; GFX7-UNALIGNED-NEXT: s_mov_b32 s6, 0
197 ; GFX7-UNALIGNED-NEXT: s_mov_b32 s7, 0xf000
198 ; GFX7-UNALIGNED-NEXT: s_mov_b64 s[4:5], 0
199 ; GFX7-UNALIGNED-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
200 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
201 ; GFX7-UNALIGNED-NEXT: s_setpc_b64 s[30:31]
203 ; GFX7-NOUNALIGNED-LABEL: v_load_constant_v3i32_align2:
204 ; GFX7-NOUNALIGNED: ; %bb.0:
205 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s6, 0
207 ; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s7, 0xf000
208 ; GFX7-NOUNALIGNED-NEXT: s_mov_b64 s[4:5], 0
209 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:2
210 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:6
211 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:10
212 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64
213 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:4
214 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v7, v[0:1], s[4:7], 0 addr64 offset:8
215 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5)
216 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v0, 16, v2
217 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4)
218 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v3
219 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3)
220 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v4
221 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2)
222 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v5
223 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1)
224 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v6
225 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0)
226 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v2, v7
227 ; GFX7-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31]
229 ; GFX6-LABEL: v_load_constant_v3i32_align2:
231 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232 ; GFX6-NEXT: s_mov_b32 s6, 0
233 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
234 ; GFX6-NEXT: s_mov_b64 s[4:5], 0
235 ; GFX6-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:2
236 ; GFX6-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:6
237 ; GFX6-NEXT: buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:10
238 ; GFX6-NEXT: buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64
239 ; GFX6-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:4
240 ; GFX6-NEXT: buffer_load_ushort v7, v[0:1], s[4:7], 0 addr64 offset:8
241 ; GFX6-NEXT: s_waitcnt vmcnt(5)
242 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v2
243 ; GFX6-NEXT: s_waitcnt vmcnt(4)
244 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3
245 ; GFX6-NEXT: s_waitcnt vmcnt(3)
246 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v4
247 ; GFX6-NEXT: s_waitcnt vmcnt(2)
248 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v5
249 ; GFX6-NEXT: s_waitcnt vmcnt(1)
250 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v6
251 ; GFX6-NEXT: s_waitcnt vmcnt(0)
252 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v7
253 ; GFX6-NEXT: s_setpc_b64 s[30:31]
254 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 2
258 define <3 x i32> @v_load_constant_v3i32_align4(ptr addrspace(4) %ptr) {
259 ; GFX9-LABEL: v_load_constant_v3i32_align4:
261 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262 ; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off
263 ; GFX9-NEXT: s_waitcnt vmcnt(0)
264 ; GFX9-NEXT: s_setpc_b64 s[30:31]
266 ; GFX7-LABEL: v_load_constant_v3i32_align4:
268 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
269 ; GFX7-NEXT: s_mov_b32 s6, 0
270 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
271 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
272 ; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
273 ; GFX7-NEXT: s_waitcnt vmcnt(0)
274 ; GFX7-NEXT: s_setpc_b64 s[30:31]
276 ; GFX6-LABEL: v_load_constant_v3i32_align4:
278 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279 ; GFX6-NEXT: s_mov_b32 s6, 0
280 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
281 ; GFX6-NEXT: s_mov_b64 s[4:5], 0
282 ; GFX6-NEXT: buffer_load_dwordx2 v[3:4], v[0:1], s[4:7], 0 addr64
283 ; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 offset:8
284 ; GFX6-NEXT: s_waitcnt vmcnt(1)
285 ; GFX6-NEXT: v_mov_b32_e32 v0, v3
286 ; GFX6-NEXT: v_mov_b32_e32 v1, v4
287 ; GFX6-NEXT: s_waitcnt vmcnt(0)
288 ; GFX6-NEXT: s_setpc_b64 s[30:31]
289 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 4
293 define i96 @v_load_constant_i96_align8(ptr addrspace(4) %ptr) {
294 ; GFX9-LABEL: v_load_constant_i96_align8:
296 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
297 ; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off
298 ; GFX9-NEXT: s_waitcnt vmcnt(0)
299 ; GFX9-NEXT: s_setpc_b64 s[30:31]
301 ; GFX7-LABEL: v_load_constant_i96_align8:
303 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304 ; GFX7-NEXT: s_mov_b32 s6, 0
305 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
306 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
307 ; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
308 ; GFX7-NEXT: s_waitcnt vmcnt(0)
309 ; GFX7-NEXT: s_setpc_b64 s[30:31]
311 ; GFX6-LABEL: v_load_constant_i96_align8:
313 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314 ; GFX6-NEXT: s_mov_b32 s6, 0
315 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
316 ; GFX6-NEXT: s_mov_b64 s[4:5], 0
317 ; GFX6-NEXT: buffer_load_dwordx2 v[3:4], v[0:1], s[4:7], 0 addr64
318 ; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 offset:8
319 ; GFX6-NEXT: s_waitcnt vmcnt(1)
320 ; GFX6-NEXT: v_mov_b32_e32 v0, v3
321 ; GFX6-NEXT: v_mov_b32_e32 v1, v4
322 ; GFX6-NEXT: s_waitcnt vmcnt(0)
323 ; GFX6-NEXT: s_setpc_b64 s[30:31]
324 %load = load i96, ptr addrspace(4) %ptr, align 8
328 define <3 x i32> @v_load_constant_v3i32_align8(ptr addrspace(4) %ptr) {
329 ; GFX9-LABEL: v_load_constant_v3i32_align8:
331 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
332 ; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off
333 ; GFX9-NEXT: s_waitcnt vmcnt(0)
334 ; GFX9-NEXT: s_setpc_b64 s[30:31]
336 ; GFX7-LABEL: v_load_constant_v3i32_align8:
338 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339 ; GFX7-NEXT: s_mov_b32 s6, 0
340 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
341 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
342 ; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
343 ; GFX7-NEXT: s_waitcnt vmcnt(0)
344 ; GFX7-NEXT: s_setpc_b64 s[30:31]
346 ; GFX6-LABEL: v_load_constant_v3i32_align8:
348 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
349 ; GFX6-NEXT: s_mov_b32 s6, 0
350 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
351 ; GFX6-NEXT: s_mov_b64 s[4:5], 0
352 ; GFX6-NEXT: buffer_load_dwordx2 v[3:4], v[0:1], s[4:7], 0 addr64
353 ; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 offset:8
354 ; GFX6-NEXT: s_waitcnt vmcnt(1)
355 ; GFX6-NEXT: v_mov_b32_e32 v0, v3
356 ; GFX6-NEXT: v_mov_b32_e32 v1, v4
357 ; GFX6-NEXT: s_waitcnt vmcnt(0)
358 ; GFX6-NEXT: s_setpc_b64 s[30:31]
359 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 8
363 define <6 x i16> @v_load_constant_v6i16_align8(ptr addrspace(4) %ptr) {
364 ; GFX9-LABEL: v_load_constant_v6i16_align8:
366 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
367 ; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off
368 ; GFX9-NEXT: s_waitcnt vmcnt(0)
369 ; GFX9-NEXT: s_setpc_b64 s[30:31]
371 ; GFX7-LABEL: v_load_constant_v6i16_align8:
373 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374 ; GFX7-NEXT: s_mov_b32 s6, 0
375 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
376 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
377 ; GFX7-NEXT: buffer_load_dwordx3 v[6:8], v[0:1], s[4:7], 0 addr64
378 ; GFX7-NEXT: s_waitcnt vmcnt(0)
379 ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v6
380 ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v7
381 ; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v8
382 ; GFX7-NEXT: v_mov_b32_e32 v0, v6
383 ; GFX7-NEXT: v_mov_b32_e32 v2, v7
384 ; GFX7-NEXT: v_mov_b32_e32 v4, v8
385 ; GFX7-NEXT: s_setpc_b64 s[30:31]
387 ; GFX6-LABEL: v_load_constant_v6i16_align8:
389 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390 ; GFX6-NEXT: s_mov_b32 s6, 0
391 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
392 ; GFX6-NEXT: s_mov_b64 s[4:5], 0
393 ; GFX6-NEXT: buffer_load_dwordx2 v[6:7], v[0:1], s[4:7], 0 addr64
394 ; GFX6-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8
395 ; GFX6-NEXT: s_waitcnt vmcnt(1)
396 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v6
397 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v7
398 ; GFX6-NEXT: s_waitcnt vmcnt(0)
399 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v4
400 ; GFX6-NEXT: v_mov_b32_e32 v0, v6
401 ; GFX6-NEXT: v_mov_b32_e32 v2, v7
402 ; GFX6-NEXT: s_setpc_b64 s[30:31]
403 %load = load <6 x i16>, ptr addrspace(4) %ptr, align 8
407 define <12 x i8> @v_load_constant_v12i8_align8(ptr addrspace(4) %ptr) {
408 ; GFX9-LABEL: v_load_constant_v12i8_align8:
410 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
411 ; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off
412 ; GFX9-NEXT: s_waitcnt vmcnt(0)
413 ; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v0
414 ; GFX9-NEXT: v_lshrrev_b32_e32 v12, 16, v0
415 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0
416 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1
417 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1
418 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1
419 ; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2
420 ; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v2
421 ; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2
422 ; GFX9-NEXT: v_mov_b32_e32 v4, v1
423 ; GFX9-NEXT: v_mov_b32_e32 v8, v2
424 ; GFX9-NEXT: v_mov_b32_e32 v1, v13
425 ; GFX9-NEXT: v_mov_b32_e32 v2, v12
426 ; GFX9-NEXT: s_setpc_b64 s[30:31]
428 ; GFX7-LABEL: v_load_constant_v12i8_align8:
430 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431 ; GFX7-NEXT: s_mov_b32 s6, 0
432 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
433 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
434 ; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
435 ; GFX7-NEXT: s_waitcnt vmcnt(0)
436 ; GFX7-NEXT: v_lshrrev_b32_e32 v13, 8, v0
437 ; GFX7-NEXT: v_lshrrev_b32_e32 v12, 16, v0
438 ; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0
439 ; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v1
440 ; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v1
441 ; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v1
442 ; GFX7-NEXT: v_lshrrev_b32_e32 v9, 8, v2
443 ; GFX7-NEXT: v_lshrrev_b32_e32 v10, 16, v2
444 ; GFX7-NEXT: v_lshrrev_b32_e32 v11, 24, v2
445 ; GFX7-NEXT: v_mov_b32_e32 v4, v1
446 ; GFX7-NEXT: v_mov_b32_e32 v8, v2
447 ; GFX7-NEXT: v_mov_b32_e32 v1, v13
448 ; GFX7-NEXT: v_mov_b32_e32 v2, v12
449 ; GFX7-NEXT: s_setpc_b64 s[30:31]
451 ; GFX6-LABEL: v_load_constant_v12i8_align8:
453 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
454 ; GFX6-NEXT: s_mov_b32 s6, 0
455 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
456 ; GFX6-NEXT: s_mov_b64 s[4:5], 0
457 ; GFX6-NEXT: buffer_load_dwordx2 v[12:13], v[0:1], s[4:7], 0 addr64
458 ; GFX6-NEXT: buffer_load_dword v8, v[0:1], s[4:7], 0 addr64 offset:8
459 ; GFX6-NEXT: s_waitcnt vmcnt(1)
460 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 8, v12
461 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v12
462 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 24, v12
463 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 8, v13
464 ; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v13
465 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, 24, v13
466 ; GFX6-NEXT: s_waitcnt vmcnt(0)
467 ; GFX6-NEXT: v_lshrrev_b32_e32 v9, 8, v8
468 ; GFX6-NEXT: v_lshrrev_b32_e32 v10, 16, v8
469 ; GFX6-NEXT: v_lshrrev_b32_e32 v11, 24, v8
470 ; GFX6-NEXT: v_mov_b32_e32 v0, v12
471 ; GFX6-NEXT: v_mov_b32_e32 v4, v13
472 ; GFX6-NEXT: s_setpc_b64 s[30:31]
473 %load = load <12 x i8>, ptr addrspace(4) %ptr, align 8
477 define <3 x i32> @v_load_constant_v3i32_align16(ptr addrspace(4) %ptr) {
478 ; GFX9-LABEL: v_load_constant_v3i32_align16:
480 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
481 ; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off
482 ; GFX9-NEXT: s_waitcnt vmcnt(0)
483 ; GFX9-NEXT: s_setpc_b64 s[30:31]
485 ; GFX7-LABEL: v_load_constant_v3i32_align16:
487 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488 ; GFX7-NEXT: s_mov_b32 s6, 0
489 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
490 ; GFX7-NEXT: s_mov_b64 s[4:5], 0
491 ; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
492 ; GFX7-NEXT: s_waitcnt vmcnt(0)
493 ; GFX7-NEXT: s_setpc_b64 s[30:31]
495 ; GFX6-LABEL: v_load_constant_v3i32_align16:
497 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498 ; GFX6-NEXT: s_mov_b32 s6, 0
499 ; GFX6-NEXT: s_mov_b32 s7, 0xf000
500 ; GFX6-NEXT: s_mov_b64 s[4:5], 0
501 ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64
502 ; GFX6-NEXT: s_waitcnt vmcnt(0)
503 ; GFX6-NEXT: s_setpc_b64 s[30:31]
504 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 16
508 define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(ptr addrspace(4) inreg %ptr) {
509 ; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align1:
510 ; GFX9-UNALIGNED: ; %bb.0:
511 ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0
512 ; GFX9-UNALIGNED-NEXT: global_load_dwordx3 v[0:2], v0, s[0:1]
513 ; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
514 ; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0
515 ; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1
516 ; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2
517 ; GFX9-UNALIGNED-NEXT: ; return to shader part epilog
519 ; GFX9-NOUNALIGNED-LABEL: s_load_constant_v3i32_align1:
520 ; GFX9-NOUNALIGNED: ; %bb.0:
521 ; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, 0
522 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v1, v0, s[0:1]
523 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v2, v0, s[0:1] offset:1
524 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v3, v0, s[0:1] offset:2
525 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v4, v0, s[0:1] offset:3
526 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v5, v0, s[0:1] offset:4
527 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v6, v0, s[0:1] offset:5
528 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v7, v0, s[0:1] offset:6
529 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v8, v0, s[0:1] offset:7
530 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v9, v0, s[0:1] offset:8
531 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v10, v0, s[0:1] offset:9
532 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v11, v0, s[0:1] offset:11
533 ; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v12, v0, s[0:1] offset:10
534 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10)
535 ; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v0, v2, 8, v1
536 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9)
537 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v3
538 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8)
539 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 24, v4
540 ; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v1, v2, v0
541 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6)
542 ; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v3, v6, 8, v5
543 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5)
544 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v7
545 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4)
546 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 24, v8
547 ; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v1, v4, v5, v3
548 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2)
549 ; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v6, v10, 8, v9
550 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1)
551 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 24, v11
552 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0)
553 ; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 16, v12
554 ; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v7, v8, v6
555 ; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0
556 ; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1
557 ; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2
558 ; GFX9-NOUNALIGNED-NEXT: ; return to shader part epilog
560 ; GFX7-UNALIGNED-LABEL: s_load_constant_v3i32_align1:
561 ; GFX7-UNALIGNED: ; %bb.0:
562 ; GFX7-UNALIGNED-NEXT: s_mov_b32 s2, -1
563 ; GFX7-UNALIGNED-NEXT: s_mov_b32 s3, 0xf000
564 ; GFX7-UNALIGNED-NEXT: buffer_load_dwordx3 v[0:2], off, s[0:3], 0
565 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
566 ; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0
567 ; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1
568 ; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2
569 ; GFX7-UNALIGNED-NEXT: ; return to shader part epilog
571 ; GFX7-NOUNALIGNED-LABEL: s_load_constant_v3i32_align1:
572 ; GFX7-NOUNALIGNED: ; %bb.0:
573 ; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s2, -1
574 ; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s3, 0xf000
575 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:1
576 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 offset:3
577 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v2, off, s[0:3], 0 offset:2
578 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v3, off, s[0:3], 0 offset:5
579 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v4, off, s[0:3], 0 offset:7
580 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v5, off, s[0:3], 0 offset:6
581 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v6, off, s[0:3], 0 offset:9
582 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v7, off, s[0:3], 0 offset:11
583 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v8, off, s[0:3], 0 offset:10
584 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v9, off, s[0:3], 0
585 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v10, off, s[0:3], 0 offset:4
586 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v11, off, s[0:3], 0 offset:8
587 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(11)
588 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v0, 8, v0
589 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10)
590 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 24, v1
591 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9)
592 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v2
593 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8)
594 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 8, v3
595 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(7)
596 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 24, v4
597 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6)
598 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v5
599 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5)
600 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 8, v6
601 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4)
602 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 24, v7
603 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3)
604 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 16, v8
605 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2)
606 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v9
607 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v2
608 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1)
609 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v3, v10
610 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v3, v4, v5
611 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0)
612 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v4, v6, v11
613 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v5, v7, v8
614 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v1, v0
615 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v3, v2
616 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v5, v4
617 ; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0
618 ; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1
619 ; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2
620 ; GFX7-NOUNALIGNED-NEXT: ; return to shader part epilog
622 ; GFX6-LABEL: s_load_constant_v3i32_align1:
624 ; GFX6-NEXT: s_mov_b32 s2, -1
625 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
626 ; GFX6-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:1
627 ; GFX6-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 offset:3
628 ; GFX6-NEXT: buffer_load_ubyte v2, off, s[0:3], 0 offset:2
629 ; GFX6-NEXT: buffer_load_ubyte v3, off, s[0:3], 0 offset:5
630 ; GFX6-NEXT: buffer_load_ubyte v4, off, s[0:3], 0 offset:7
631 ; GFX6-NEXT: buffer_load_ubyte v5, off, s[0:3], 0 offset:6
632 ; GFX6-NEXT: buffer_load_ubyte v6, off, s[0:3], 0 offset:9
633 ; GFX6-NEXT: buffer_load_ubyte v7, off, s[0:3], 0 offset:11
634 ; GFX6-NEXT: buffer_load_ubyte v8, off, s[0:3], 0 offset:10
635 ; GFX6-NEXT: buffer_load_ubyte v9, off, s[0:3], 0
636 ; GFX6-NEXT: buffer_load_ubyte v10, off, s[0:3], 0 offset:4
637 ; GFX6-NEXT: buffer_load_ubyte v11, off, s[0:3], 0 offset:8
638 ; GFX6-NEXT: s_waitcnt vmcnt(11)
639 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0
640 ; GFX6-NEXT: s_waitcnt vmcnt(10)
641 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
642 ; GFX6-NEXT: s_waitcnt vmcnt(9)
643 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
644 ; GFX6-NEXT: s_waitcnt vmcnt(8)
645 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3
646 ; GFX6-NEXT: s_waitcnt vmcnt(7)
647 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v4
648 ; GFX6-NEXT: s_waitcnt vmcnt(6)
649 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
650 ; GFX6-NEXT: s_waitcnt vmcnt(5)
651 ; GFX6-NEXT: v_lshlrev_b32_e32 v6, 8, v6
652 ; GFX6-NEXT: s_waitcnt vmcnt(4)
653 ; GFX6-NEXT: v_lshlrev_b32_e32 v7, 24, v7
654 ; GFX6-NEXT: s_waitcnt vmcnt(3)
655 ; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v8
656 ; GFX6-NEXT: s_waitcnt vmcnt(2)
657 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v9
658 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
659 ; GFX6-NEXT: s_waitcnt vmcnt(1)
660 ; GFX6-NEXT: v_or_b32_e32 v2, v3, v10
661 ; GFX6-NEXT: v_or_b32_e32 v3, v4, v5
662 ; GFX6-NEXT: s_waitcnt vmcnt(0)
663 ; GFX6-NEXT: v_or_b32_e32 v4, v6, v11
664 ; GFX6-NEXT: v_or_b32_e32 v5, v7, v8
665 ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
666 ; GFX6-NEXT: v_or_b32_e32 v1, v3, v2
667 ; GFX6-NEXT: v_or_b32_e32 v2, v5, v4
668 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
669 ; GFX6-NEXT: v_readfirstlane_b32 s1, v1
670 ; GFX6-NEXT: v_readfirstlane_b32 s2, v2
671 ; GFX6-NEXT: ; return to shader part epilog
672 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 1
676 define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(ptr addrspace(4) inreg %ptr) {
677 ; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align2:
678 ; GFX9-UNALIGNED: ; %bb.0:
679 ; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0
680 ; GFX9-UNALIGNED-NEXT: global_load_dwordx3 v[0:2], v0, s[0:1]
681 ; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
682 ; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0
683 ; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1
684 ; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2
685 ; GFX9-UNALIGNED-NEXT: ; return to shader part epilog
687 ; GFX9-NOUNALIGNED-LABEL: s_load_constant_v3i32_align2:
688 ; GFX9-NOUNALIGNED: ; %bb.0:
689 ; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, 0
690 ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v1, v0, s[0:1]
691 ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v2, v0, s[0:1] offset:2
692 ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v3, v0, s[0:1] offset:4
693 ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v4, v0, s[0:1] offset:6
694 ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v5, v0, s[0:1] offset:8
695 ; GFX9-NOUNALIGNED-NEXT: global_load_ushort v6, v0, s[0:1] offset:10
696 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4)
697 ; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v0, v2, 16, v1
698 ; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0
699 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2)
700 ; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v4, 16, v3
701 ; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1
702 ; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0)
703 ; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v2, v6, 16, v5
704 ; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2
705 ; GFX9-NOUNALIGNED-NEXT: ; return to shader part epilog
707 ; GFX7-UNALIGNED-LABEL: s_load_constant_v3i32_align2:
708 ; GFX7-UNALIGNED: ; %bb.0:
709 ; GFX7-UNALIGNED-NEXT: s_mov_b32 s2, -1
710 ; GFX7-UNALIGNED-NEXT: s_mov_b32 s3, 0xf000
711 ; GFX7-UNALIGNED-NEXT: buffer_load_dwordx3 v[0:2], off, s[0:3], 0
712 ; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0)
713 ; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0
714 ; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1
715 ; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2
716 ; GFX7-UNALIGNED-NEXT: ; return to shader part epilog
718 ; GFX7-NOUNALIGNED-LABEL: s_load_constant_v3i32_align2:
719 ; GFX7-NOUNALIGNED: ; %bb.0:
720 ; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s2, -1
721 ; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s3, 0xf000
722 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:2
723 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:6
724 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:10
725 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v3, off, s[0:3], 0
726 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:4
727 ; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:8
728 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5)
729 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v0, 16, v0
730 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4)
731 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1
732 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3)
733 ; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v2
734 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2)
735 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v3
736 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1)
737 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v4
738 ; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0)
739 ; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v2, v5
740 ; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0
741 ; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1
742 ; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2
743 ; GFX7-NOUNALIGNED-NEXT: ; return to shader part epilog
745 ; GFX6-LABEL: s_load_constant_v3i32_align2:
747 ; GFX6-NEXT: s_mov_b32 s2, -1
748 ; GFX6-NEXT: s_mov_b32 s3, 0xf000
749 ; GFX6-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:2
750 ; GFX6-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:6
751 ; GFX6-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:10
752 ; GFX6-NEXT: buffer_load_ushort v3, off, s[0:3], 0
753 ; GFX6-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:4
754 ; GFX6-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:8
755 ; GFX6-NEXT: s_waitcnt vmcnt(5)
756 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
757 ; GFX6-NEXT: s_waitcnt vmcnt(4)
758 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
759 ; GFX6-NEXT: s_waitcnt vmcnt(3)
760 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
761 ; GFX6-NEXT: s_waitcnt vmcnt(2)
762 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3
763 ; GFX6-NEXT: s_waitcnt vmcnt(1)
764 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v4
765 ; GFX6-NEXT: s_waitcnt vmcnt(0)
766 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v5
767 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
768 ; GFX6-NEXT: v_readfirstlane_b32 s1, v1
769 ; GFX6-NEXT: v_readfirstlane_b32 s2, v2
770 ; GFX6-NEXT: ; return to shader part epilog
771 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 2
775 define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(ptr addrspace(4) inreg %ptr) {
776 ; GFX9-LABEL: s_load_constant_v3i32_align4:
778 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
779 ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8
780 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
781 ; GFX9-NEXT: s_mov_b32 s0, s4
782 ; GFX9-NEXT: s_mov_b32 s1, s5
783 ; GFX9-NEXT: ; return to shader part epilog
785 ; GFX7-LABEL: s_load_constant_v3i32_align4:
787 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
788 ; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2
789 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
790 ; GFX7-NEXT: s_mov_b32 s0, s4
791 ; GFX7-NEXT: s_mov_b32 s1, s5
792 ; GFX7-NEXT: ; return to shader part epilog
794 ; GFX6-LABEL: s_load_constant_v3i32_align4:
796 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
797 ; GFX6-NEXT: s_load_dword s2, s[0:1], 0x2
798 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
799 ; GFX6-NEXT: s_mov_b32 s0, s4
800 ; GFX6-NEXT: s_mov_b32 s1, s5
801 ; GFX6-NEXT: ; return to shader part epilog
802 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 4
806 define amdgpu_ps i96 @s_load_constant_i96_align8(ptr addrspace(4) inreg %ptr) {
807 ; GFX9-LABEL: s_load_constant_i96_align8:
809 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
810 ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8
811 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
812 ; GFX9-NEXT: s_mov_b32 s0, s4
813 ; GFX9-NEXT: s_mov_b32 s1, s5
814 ; GFX9-NEXT: ; return to shader part epilog
816 ; GFX7-LABEL: s_load_constant_i96_align8:
818 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
819 ; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2
820 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
821 ; GFX7-NEXT: s_mov_b32 s0, s4
822 ; GFX7-NEXT: s_mov_b32 s1, s5
823 ; GFX7-NEXT: ; return to shader part epilog
825 ; GFX6-LABEL: s_load_constant_i96_align8:
827 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
828 ; GFX6-NEXT: s_load_dword s2, s[0:1], 0x2
829 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
830 ; GFX6-NEXT: s_mov_b32 s0, s4
831 ; GFX6-NEXT: s_mov_b32 s1, s5
832 ; GFX6-NEXT: ; return to shader part epilog
833 %load = load i96, ptr addrspace(4) %ptr, align 8
837 define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(ptr addrspace(4) inreg %ptr) {
838 ; GFX9-LABEL: s_load_constant_v3i32_align8:
840 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
841 ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8
842 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
843 ; GFX9-NEXT: s_mov_b32 s0, s4
844 ; GFX9-NEXT: s_mov_b32 s1, s5
845 ; GFX9-NEXT: ; return to shader part epilog
847 ; GFX7-LABEL: s_load_constant_v3i32_align8:
849 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
850 ; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2
851 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
852 ; GFX7-NEXT: s_mov_b32 s0, s4
853 ; GFX7-NEXT: s_mov_b32 s1, s5
854 ; GFX7-NEXT: ; return to shader part epilog
856 ; GFX6-LABEL: s_load_constant_v3i32_align8:
858 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
859 ; GFX6-NEXT: s_load_dword s2, s[0:1], 0x2
860 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
861 ; GFX6-NEXT: s_mov_b32 s0, s4
862 ; GFX6-NEXT: s_mov_b32 s1, s5
863 ; GFX6-NEXT: ; return to shader part epilog
864 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 8
868 define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(ptr addrspace(4) inreg %ptr) {
869 ; GFX9-LABEL: s_load_constant_v6i16_align8:
871 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
872 ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8
873 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
874 ; GFX9-NEXT: s_mov_b32 s0, s4
875 ; GFX9-NEXT: s_mov_b32 s1, s5
876 ; GFX9-NEXT: ; return to shader part epilog
878 ; GFX7-LABEL: s_load_constant_v6i16_align8:
880 ; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
881 ; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2
882 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
883 ; GFX7-NEXT: s_mov_b32 s0, s4
884 ; GFX7-NEXT: s_mov_b32 s1, s5
885 ; GFX7-NEXT: ; return to shader part epilog
887 ; GFX6-LABEL: s_load_constant_v6i16_align8:
889 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
890 ; GFX6-NEXT: s_load_dword s2, s[0:1], 0x2
891 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
892 ; GFX6-NEXT: s_mov_b32 s0, s4
893 ; GFX6-NEXT: s_mov_b32 s1, s5
894 ; GFX6-NEXT: ; return to shader part epilog
895 %load = load <6 x i16>, ptr addrspace(4) %ptr, align 8
896 %cast = bitcast <6 x i16> %load to <3 x i32>
900 define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(ptr addrspace(4) inreg %ptr) {
901 ; GFX9-LABEL: s_load_constant_v12i8_align8:
903 ; GFX9-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0
904 ; GFX9-NEXT: s_load_dword s8, s[0:1], 0x8
905 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
906 ; GFX9-NEXT: s_lshr_b32 s1, s12, 8
907 ; GFX9-NEXT: s_lshr_b32 s2, s12, 16
908 ; GFX9-NEXT: s_lshr_b32 s3, s12, 24
909 ; GFX9-NEXT: s_lshr_b32 s5, s13, 8
910 ; GFX9-NEXT: s_lshr_b32 s6, s13, 16
911 ; GFX9-NEXT: s_lshr_b32 s7, s13, 24
912 ; GFX9-NEXT: s_lshr_b32 s9, s8, 8
913 ; GFX9-NEXT: s_lshr_b32 s10, s8, 16
914 ; GFX9-NEXT: s_lshr_b32 s11, s8, 24
915 ; GFX9-NEXT: s_mov_b32 s0, s12
916 ; GFX9-NEXT: s_mov_b32 s4, s13
917 ; GFX9-NEXT: ; return to shader part epilog
919 ; GFX7-LABEL: s_load_constant_v12i8_align8:
921 ; GFX7-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0
922 ; GFX7-NEXT: s_load_dword s8, s[0:1], 0x2
923 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
924 ; GFX7-NEXT: s_lshr_b32 s1, s12, 8
925 ; GFX7-NEXT: s_lshr_b32 s2, s12, 16
926 ; GFX7-NEXT: s_lshr_b32 s3, s12, 24
927 ; GFX7-NEXT: s_lshr_b32 s5, s13, 8
928 ; GFX7-NEXT: s_lshr_b32 s6, s13, 16
929 ; GFX7-NEXT: s_lshr_b32 s7, s13, 24
930 ; GFX7-NEXT: s_lshr_b32 s9, s8, 8
931 ; GFX7-NEXT: s_lshr_b32 s10, s8, 16
932 ; GFX7-NEXT: s_lshr_b32 s11, s8, 24
933 ; GFX7-NEXT: s_mov_b32 s0, s12
934 ; GFX7-NEXT: s_mov_b32 s4, s13
935 ; GFX7-NEXT: ; return to shader part epilog
937 ; GFX6-LABEL: s_load_constant_v12i8_align8:
939 ; GFX6-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0
940 ; GFX6-NEXT: s_load_dword s8, s[0:1], 0x2
941 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
942 ; GFX6-NEXT: s_lshr_b32 s1, s12, 8
943 ; GFX6-NEXT: s_lshr_b32 s2, s12, 16
944 ; GFX6-NEXT: s_lshr_b32 s3, s12, 24
945 ; GFX6-NEXT: s_lshr_b32 s5, s13, 8
946 ; GFX6-NEXT: s_lshr_b32 s6, s13, 16
947 ; GFX6-NEXT: s_lshr_b32 s7, s13, 24
948 ; GFX6-NEXT: s_lshr_b32 s9, s8, 8
949 ; GFX6-NEXT: s_lshr_b32 s10, s8, 16
950 ; GFX6-NEXT: s_lshr_b32 s11, s8, 24
951 ; GFX6-NEXT: s_mov_b32 s0, s12
952 ; GFX6-NEXT: s_mov_b32 s4, s13
953 ; GFX6-NEXT: ; return to shader part epilog
954 %load = load <12 x i8>, ptr addrspace(4) %ptr, align 8
958 define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align16(ptr addrspace(4) inreg %ptr) {
959 ; GCN-LABEL: s_load_constant_v3i32_align16:
961 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
962 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
963 ; GCN-NEXT: ; return to shader part epilog
964 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 16