1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSA-VI %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=fiji -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s
5 define amdgpu_kernel void @i8_arg(ptr addrspace(1) nocapture %out, i8 %in) nounwind {
6 ; HSA-VI-LABEL: name: i8_arg
7 ; HSA-VI: bb.1 (%ir-block.0):
8 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
10 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
11 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
12 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
13 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
14 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
15 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
16 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
17 ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
18 ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
19 ; HSA-VI-NEXT: S_ENDPGM 0
20 ; LEGACY-MESA-VI-LABEL: name: i8_arg
21 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
22 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
23 ; LEGACY-MESA-VI-NEXT: {{ $}}
24 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
25 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
26 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
27 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
28 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
29 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
30 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
31 ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
32 ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
33 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
34 %ext = zext i8 %in to i32
35 store i32 %ext, ptr addrspace(1) %out, align 4
39 define amdgpu_kernel void @i8_zext_arg(ptr addrspace(1) nocapture %out, i8 zeroext %in) nounwind {
40 ; HSA-VI-LABEL: name: i8_zext_arg
41 ; HSA-VI: bb.1 (%ir-block.0):
42 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
44 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
45 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
46 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
47 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
48 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
49 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
50 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
51 ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
52 ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
53 ; HSA-VI-NEXT: S_ENDPGM 0
54 ; LEGACY-MESA-VI-LABEL: name: i8_zext_arg
55 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
56 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
57 ; LEGACY-MESA-VI-NEXT: {{ $}}
58 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
59 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
60 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
61 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
62 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
63 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
64 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
65 ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
66 ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
67 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
68 %ext = zext i8 %in to i32
69 store i32 %ext, ptr addrspace(1) %out, align 4
73 define amdgpu_kernel void @i8_sext_arg(ptr addrspace(1) nocapture %out, i8 signext %in) nounwind {
74 ; HSA-VI-LABEL: name: i8_sext_arg
75 ; HSA-VI: bb.1 (%ir-block.0):
76 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
78 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
79 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
80 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
81 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
82 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
83 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
84 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4)
85 ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
86 ; HSA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
87 ; HSA-VI-NEXT: S_ENDPGM 0
88 ; LEGACY-MESA-VI-LABEL: name: i8_sext_arg
89 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
90 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
91 ; LEGACY-MESA-VI-NEXT: {{ $}}
92 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
93 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
94 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
95 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
96 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
97 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
98 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
99 ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
100 ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
101 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
102 %ext = sext i8 %in to i32
103 store i32 %ext, ptr addrspace(1) %out, align 4
107 define amdgpu_kernel void @i16_arg(ptr addrspace(1) nocapture %out, i16 %in) nounwind {
108 ; HSA-VI-LABEL: name: i16_arg
109 ; HSA-VI: bb.1 (%ir-block.0):
110 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
111 ; HSA-VI-NEXT: {{ $}}
112 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
113 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
114 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
115 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
116 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
117 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
118 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
119 ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
120 ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
121 ; HSA-VI-NEXT: S_ENDPGM 0
122 ; LEGACY-MESA-VI-LABEL: name: i16_arg
123 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
124 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
125 ; LEGACY-MESA-VI-NEXT: {{ $}}
126 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
127 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
128 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
129 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
130 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
131 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
132 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
133 ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
134 ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
135 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
136 %ext = zext i16 %in to i32
137 store i32 %ext, ptr addrspace(1) %out, align 4
141 define amdgpu_kernel void @i16_zext_arg(ptr addrspace(1) nocapture %out, i16 zeroext %in) nounwind {
142 ; HSA-VI-LABEL: name: i16_zext_arg
143 ; HSA-VI: bb.1 (%ir-block.0):
144 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
145 ; HSA-VI-NEXT: {{ $}}
146 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
147 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
148 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
149 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
150 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
151 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
152 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
153 ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
154 ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
155 ; HSA-VI-NEXT: S_ENDPGM 0
156 ; LEGACY-MESA-VI-LABEL: name: i16_zext_arg
157 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
158 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
159 ; LEGACY-MESA-VI-NEXT: {{ $}}
160 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
161 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
162 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
163 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
164 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
165 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
166 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
167 ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
168 ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
169 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
170 %ext = zext i16 %in to i32
171 store i32 %ext, ptr addrspace(1) %out, align 4
175 define amdgpu_kernel void @i16_sext_arg(ptr addrspace(1) nocapture %out, i16 signext %in) nounwind {
176 ; HSA-VI-LABEL: name: i16_sext_arg
177 ; HSA-VI: bb.1 (%ir-block.0):
178 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
179 ; HSA-VI-NEXT: {{ $}}
180 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
181 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
182 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
183 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
184 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
185 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
186 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4)
187 ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16)
188 ; HSA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
189 ; HSA-VI-NEXT: S_ENDPGM 0
190 ; LEGACY-MESA-VI-LABEL: name: i16_sext_arg
191 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
192 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
193 ; LEGACY-MESA-VI-NEXT: {{ $}}
194 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
195 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
196 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
197 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
198 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
199 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
200 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4)
201 ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16)
202 ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
203 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
204 %ext = sext i16 %in to i32
205 store i32 %ext, ptr addrspace(1) %out, align 4
209 define amdgpu_kernel void @i32_arg(ptr addrspace(1) nocapture %out, i32 %in) nounwind {
210 ; HSA-VI-LABEL: name: i32_arg
211 ; HSA-VI: bb.1.entry:
212 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
213 ; HSA-VI-NEXT: {{ $}}
214 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
215 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
216 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
217 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
218 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
219 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
220 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
221 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
222 ; HSA-VI-NEXT: S_ENDPGM 0
223 ; LEGACY-MESA-VI-LABEL: name: i32_arg
224 ; LEGACY-MESA-VI: bb.1.entry:
225 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
226 ; LEGACY-MESA-VI-NEXT: {{ $}}
227 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
228 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
229 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
230 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
231 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
232 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
233 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
234 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
235 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
237 store i32 %in, ptr addrspace(1) %out, align 4
241 define amdgpu_kernel void @f32_arg(ptr addrspace(1) nocapture %out, float %in) nounwind {
242 ; HSA-VI-LABEL: name: f32_arg
243 ; HSA-VI: bb.1.entry:
244 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
245 ; HSA-VI-NEXT: {{ $}}
246 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
247 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
248 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
249 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
250 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
251 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
252 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
253 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
254 ; HSA-VI-NEXT: S_ENDPGM 0
255 ; LEGACY-MESA-VI-LABEL: name: f32_arg
256 ; LEGACY-MESA-VI: bb.1.entry:
257 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
258 ; LEGACY-MESA-VI-NEXT: {{ $}}
259 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
260 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
261 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
262 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
263 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
264 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
265 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
266 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
267 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
269 store float %in, ptr addrspace(1) %out, align 4
273 define amdgpu_kernel void @v2i8_arg(ptr addrspace(1) %out, <2 x i8> %in) {
274 ; HSA-VI-LABEL: name: v2i8_arg
275 ; HSA-VI: bb.1.entry:
276 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
277 ; HSA-VI-NEXT: {{ $}}
278 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
279 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
280 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
281 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
282 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
283 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
284 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 8, addrspace 4)
285 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1)
286 ; HSA-VI-NEXT: S_ENDPGM 0
287 ; LEGACY-MESA-VI-LABEL: name: v2i8_arg
288 ; LEGACY-MESA-VI: bb.1.entry:
289 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
290 ; LEGACY-MESA-VI-NEXT: {{ $}}
291 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
292 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
293 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
294 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
295 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
296 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
297 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 4, addrspace 4)
298 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1)
299 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
301 store <2 x i8> %in, ptr addrspace(1) %out
305 define amdgpu_kernel void @v2i16_arg(ptr addrspace(1) %out, <2 x i16> %in) {
306 ; HSA-VI-LABEL: name: v2i16_arg
307 ; HSA-VI: bb.1.entry:
308 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
309 ; HSA-VI-NEXT: {{ $}}
310 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
311 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
312 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
313 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
314 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
315 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
316 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), align 8, addrspace 4)
317 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1)
318 ; HSA-VI-NEXT: S_ENDPGM 0
319 ; LEGACY-MESA-VI-LABEL: name: v2i16_arg
320 ; LEGACY-MESA-VI: bb.1.entry:
321 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
322 ; LEGACY-MESA-VI-NEXT: {{ $}}
323 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
324 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
325 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
326 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
327 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
328 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
329 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), addrspace 4)
330 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1)
331 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
333 store <2 x i16> %in, ptr addrspace(1) %out
337 define amdgpu_kernel void @v2i32_arg(ptr addrspace(1) nocapture %out, <2 x i32> %in) nounwind {
338 ; HSA-VI-LABEL: name: v2i32_arg
339 ; HSA-VI: bb.1.entry:
340 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
341 ; HSA-VI-NEXT: {{ $}}
342 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
343 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
344 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
345 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
346 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
347 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
348 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4)
349 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
350 ; HSA-VI-NEXT: S_ENDPGM 0
351 ; LEGACY-MESA-VI-LABEL: name: v2i32_arg
352 ; LEGACY-MESA-VI: bb.1.entry:
353 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
354 ; LEGACY-MESA-VI-NEXT: {{ $}}
355 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
356 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
357 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
358 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
359 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
360 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
361 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
362 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
363 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
365 store <2 x i32> %in, ptr addrspace(1) %out, align 4
369 define amdgpu_kernel void @v2f32_arg(ptr addrspace(1) nocapture %out, <2 x float> %in) nounwind {
370 ; HSA-VI-LABEL: name: v2f32_arg
371 ; HSA-VI: bb.1.entry:
372 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
373 ; HSA-VI-NEXT: {{ $}}
374 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
375 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
376 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
377 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
378 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
379 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
380 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4)
381 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
382 ; HSA-VI-NEXT: S_ENDPGM 0
383 ; LEGACY-MESA-VI-LABEL: name: v2f32_arg
384 ; LEGACY-MESA-VI: bb.1.entry:
385 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
386 ; LEGACY-MESA-VI-NEXT: {{ $}}
387 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
388 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
389 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
390 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
391 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
392 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
393 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
394 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
395 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
397 store <2 x float> %in, ptr addrspace(1) %out, align 4
401 define amdgpu_kernel void @v3i8_arg(ptr addrspace(1) nocapture %out, <3 x i8> %in) nounwind {
402 ; HSA-VI-LABEL: name: v3i8_arg
403 ; HSA-VI: bb.1.entry:
404 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
405 ; HSA-VI-NEXT: {{ $}}
406 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
407 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
408 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
409 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
410 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
411 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
412 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 8, addrspace 4)
413 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1)
414 ; HSA-VI-NEXT: S_ENDPGM 0
415 ; LEGACY-MESA-VI-LABEL: name: v3i8_arg
416 ; LEGACY-MESA-VI: bb.1.entry:
417 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
418 ; LEGACY-MESA-VI-NEXT: {{ $}}
419 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
420 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
421 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
422 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
423 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
424 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
425 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 4, addrspace 4)
426 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1)
427 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
429 store <3 x i8> %in, ptr addrspace(1) %out, align 4
433 define amdgpu_kernel void @v3i16_arg(ptr addrspace(1) nocapture %out, <3 x i16> %in) nounwind {
434 ; HSA-VI-LABEL: name: v3i16_arg
435 ; HSA-VI: bb.1.entry:
436 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
437 ; HSA-VI-NEXT: {{ $}}
438 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
439 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
440 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
441 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
442 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
443 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
444 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 8, addrspace 4)
445 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1)
446 ; HSA-VI-NEXT: S_ENDPGM 0
447 ; LEGACY-MESA-VI-LABEL: name: v3i16_arg
448 ; LEGACY-MESA-VI: bb.1.entry:
449 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
450 ; LEGACY-MESA-VI-NEXT: {{ $}}
451 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
452 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
453 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
454 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
455 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
456 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
457 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 4, addrspace 4)
458 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1)
459 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
461 store <3 x i16> %in, ptr addrspace(1) %out, align 4
465 define amdgpu_kernel void @v3i32_arg(ptr addrspace(1) nocapture %out, <3 x i32> %in) nounwind {
466 ; HSA-VI-LABEL: name: v3i32_arg
467 ; HSA-VI: bb.1.entry:
468 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
469 ; HSA-VI-NEXT: {{ $}}
470 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
471 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
472 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
473 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
474 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
475 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
476 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4)
477 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
478 ; HSA-VI-NEXT: S_ENDPGM 0
479 ; LEGACY-MESA-VI-LABEL: name: v3i32_arg
480 ; LEGACY-MESA-VI: bb.1.entry:
481 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
482 ; LEGACY-MESA-VI-NEXT: {{ $}}
483 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
484 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
485 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
486 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
487 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
488 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
489 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4)
490 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
491 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
493 store <3 x i32> %in, ptr addrspace(1) %out, align 4
497 define amdgpu_kernel void @v3f32_arg(ptr addrspace(1) nocapture %out, <3 x float> %in) nounwind {
498 ; HSA-VI-LABEL: name: v3f32_arg
499 ; HSA-VI: bb.1.entry:
500 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
501 ; HSA-VI-NEXT: {{ $}}
502 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
503 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
504 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
505 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
506 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
507 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
508 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4)
509 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
510 ; HSA-VI-NEXT: S_ENDPGM 0
511 ; LEGACY-MESA-VI-LABEL: name: v3f32_arg
512 ; LEGACY-MESA-VI: bb.1.entry:
513 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
514 ; LEGACY-MESA-VI-NEXT: {{ $}}
515 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
516 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
517 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
518 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
519 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
520 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
521 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4)
522 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
523 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
525 store <3 x float> %in, ptr addrspace(1) %out, align 4
529 define amdgpu_kernel void @v4i8_arg(ptr addrspace(1) %out, <4 x i8> %in) {
530 ; HSA-VI-LABEL: name: v4i8_arg
531 ; HSA-VI: bb.1.entry:
532 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
533 ; HSA-VI-NEXT: {{ $}}
534 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
535 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
536 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
537 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
538 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
539 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
540 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), align 8, addrspace 4)
541 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1)
542 ; HSA-VI-NEXT: S_ENDPGM 0
543 ; LEGACY-MESA-VI-LABEL: name: v4i8_arg
544 ; LEGACY-MESA-VI: bb.1.entry:
545 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
546 ; LEGACY-MESA-VI-NEXT: {{ $}}
547 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
548 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
549 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
550 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
551 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
552 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
553 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), addrspace 4)
554 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1)
555 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
557 store <4 x i8> %in, ptr addrspace(1) %out
561 define amdgpu_kernel void @v4i16_arg(ptr addrspace(1) %out, <4 x i16> %in) {
562 ; HSA-VI-LABEL: name: v4i16_arg
563 ; HSA-VI: bb.1.entry:
564 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
565 ; HSA-VI-NEXT: {{ $}}
566 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
567 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
568 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
569 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
570 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
571 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
572 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), addrspace 4)
573 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1)
574 ; HSA-VI-NEXT: S_ENDPGM 0
575 ; LEGACY-MESA-VI-LABEL: name: v4i16_arg
576 ; LEGACY-MESA-VI: bb.1.entry:
577 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
578 ; LEGACY-MESA-VI-NEXT: {{ $}}
579 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
580 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
581 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
582 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
583 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
584 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
585 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), align 4, addrspace 4)
586 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1)
587 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
589 store <4 x i16> %in, ptr addrspace(1) %out
593 define amdgpu_kernel void @v4i32_arg(ptr addrspace(1) nocapture %out, <4 x i32> %in) nounwind {
594 ; HSA-VI-LABEL: name: v4i32_arg
595 ; HSA-VI: bb.1.entry:
596 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
597 ; HSA-VI-NEXT: {{ $}}
598 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
599 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
600 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
601 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
602 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
603 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
604 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4)
605 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
606 ; HSA-VI-NEXT: S_ENDPGM 0
607 ; LEGACY-MESA-VI-LABEL: name: v4i32_arg
608 ; LEGACY-MESA-VI: bb.1.entry:
609 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
610 ; LEGACY-MESA-VI-NEXT: {{ $}}
611 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
612 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
613 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
614 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
615 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
616 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
617 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4)
618 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
619 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
621 store <4 x i32> %in, ptr addrspace(1) %out, align 4
625 define amdgpu_kernel void @v4f32_arg(ptr addrspace(1) nocapture %out, <4 x float> %in) nounwind {
626 ; HSA-VI-LABEL: name: v4f32_arg
627 ; HSA-VI: bb.1.entry:
628 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
629 ; HSA-VI-NEXT: {{ $}}
630 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
631 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
632 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
633 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
634 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
635 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
636 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4)
637 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
638 ; HSA-VI-NEXT: S_ENDPGM 0
639 ; LEGACY-MESA-VI-LABEL: name: v4f32_arg
640 ; LEGACY-MESA-VI: bb.1.entry:
641 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
642 ; LEGACY-MESA-VI-NEXT: {{ $}}
643 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
644 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
645 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
646 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
647 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
648 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
649 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4)
650 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
651 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
653 store <4 x float> %in, ptr addrspace(1) %out, align 4
657 define amdgpu_kernel void @v8i8_arg(ptr addrspace(1) %out, <8 x i8> %in) {
658 ; HSA-VI-LABEL: name: v8i8_arg
659 ; HSA-VI: bb.1.entry:
660 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
661 ; HSA-VI-NEXT: {{ $}}
662 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
663 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
664 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
665 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
666 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
667 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
668 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), addrspace 4)
669 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1)
670 ; HSA-VI-NEXT: S_ENDPGM 0
671 ; LEGACY-MESA-VI-LABEL: name: v8i8_arg
672 ; LEGACY-MESA-VI: bb.1.entry:
673 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
674 ; LEGACY-MESA-VI-NEXT: {{ $}}
675 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
676 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
677 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
678 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
679 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
680 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
681 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), align 4, addrspace 4)
682 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1)
683 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
685 store <8 x i8> %in, ptr addrspace(1) %out
689 define amdgpu_kernel void @v8i16_arg(ptr addrspace(1) %out, <8 x i16> %in) {
690 ; HSA-VI-LABEL: name: v8i16_arg
691 ; HSA-VI: bb.1.entry:
692 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
693 ; HSA-VI-NEXT: {{ $}}
694 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
695 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
696 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
697 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
698 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
699 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
700 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), addrspace 4)
701 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1)
702 ; HSA-VI-NEXT: S_ENDPGM 0
703 ; LEGACY-MESA-VI-LABEL: name: v8i16_arg
704 ; LEGACY-MESA-VI: bb.1.entry:
705 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
706 ; LEGACY-MESA-VI-NEXT: {{ $}}
707 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
708 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
709 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
710 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
711 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
712 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
713 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), align 4, addrspace 4)
714 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1)
715 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
717 store <8 x i16> %in, ptr addrspace(1) %out
721 define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture %out, <8 x i32> %in) nounwind {
722 ; HSA-VI-LABEL: name: v8i32_arg
723 ; HSA-VI: bb.1.entry:
724 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
725 ; HSA-VI-NEXT: {{ $}}
726 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
727 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
728 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
729 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
730 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
731 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
732 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4)
733 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
734 ; HSA-VI-NEXT: S_ENDPGM 0
735 ; LEGACY-MESA-VI-LABEL: name: v8i32_arg
736 ; LEGACY-MESA-VI: bb.1.entry:
737 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
738 ; LEGACY-MESA-VI-NEXT: {{ $}}
739 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
740 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
741 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
742 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
743 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
744 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
745 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4)
746 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
747 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
749 store <8 x i32> %in, ptr addrspace(1) %out, align 4
753 define amdgpu_kernel void @v8f32_arg(ptr addrspace(1) nocapture %out, <8 x float> %in) nounwind {
754 ; HSA-VI-LABEL: name: v8f32_arg
755 ; HSA-VI: bb.1.entry:
756 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
757 ; HSA-VI-NEXT: {{ $}}
758 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
759 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
760 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
761 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
762 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
763 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
764 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4)
765 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
766 ; HSA-VI-NEXT: S_ENDPGM 0
767 ; LEGACY-MESA-VI-LABEL: name: v8f32_arg
768 ; LEGACY-MESA-VI: bb.1.entry:
769 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
770 ; LEGACY-MESA-VI-NEXT: {{ $}}
771 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
772 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
773 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
774 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
775 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
776 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
777 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4)
778 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
779 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
781 store <8 x float> %in, ptr addrspace(1) %out, align 4
785 define amdgpu_kernel void @v16i8_arg(ptr addrspace(1) %out, <16 x i8> %in) {
786 ; HSA-VI-LABEL: name: v16i8_arg
787 ; HSA-VI: bb.1.entry:
788 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
789 ; HSA-VI-NEXT: {{ $}}
790 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
791 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
792 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
793 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
794 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
795 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
796 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), addrspace 4)
797 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1)
798 ; HSA-VI-NEXT: S_ENDPGM 0
799 ; LEGACY-MESA-VI-LABEL: name: v16i8_arg
800 ; LEGACY-MESA-VI: bb.1.entry:
801 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
802 ; LEGACY-MESA-VI-NEXT: {{ $}}
803 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
804 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
805 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
806 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
807 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
808 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
809 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), align 4, addrspace 4)
810 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1)
811 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
813 store <16 x i8> %in, ptr addrspace(1) %out
817 define amdgpu_kernel void @v16i16_arg(ptr addrspace(1) %out, <16 x i16> %in) {
818 ; HSA-VI-LABEL: name: v16i16_arg
819 ; HSA-VI: bb.1.entry:
820 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
821 ; HSA-VI-NEXT: {{ $}}
822 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
823 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
824 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
825 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
826 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
827 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
828 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 16, addrspace 4)
829 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1)
830 ; HSA-VI-NEXT: S_ENDPGM 0
831 ; LEGACY-MESA-VI-LABEL: name: v16i16_arg
832 ; LEGACY-MESA-VI: bb.1.entry:
833 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
834 ; LEGACY-MESA-VI-NEXT: {{ $}}
835 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
836 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
837 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
838 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
839 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
840 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
841 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 4, addrspace 4)
842 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1)
843 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
845 store <16 x i16> %in, ptr addrspace(1) %out
849 define amdgpu_kernel void @v16i32_arg(ptr addrspace(1) nocapture %out, <16 x i32> %in) nounwind {
850 ; HSA-VI-LABEL: name: v16i32_arg
851 ; HSA-VI: bb.1.entry:
852 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
853 ; HSA-VI-NEXT: {{ $}}
854 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
855 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
856 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
857 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
858 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
859 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
860 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4)
861 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
862 ; HSA-VI-NEXT: S_ENDPGM 0
863 ; LEGACY-MESA-VI-LABEL: name: v16i32_arg
864 ; LEGACY-MESA-VI: bb.1.entry:
865 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
866 ; LEGACY-MESA-VI-NEXT: {{ $}}
867 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
868 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
869 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
870 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
871 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
872 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
873 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4)
874 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
875 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
877 store <16 x i32> %in, ptr addrspace(1) %out, align 4
881 define amdgpu_kernel void @v16f32_arg(ptr addrspace(1) nocapture %out, <16 x float> %in) nounwind {
882 ; HSA-VI-LABEL: name: v16f32_arg
883 ; HSA-VI: bb.1.entry:
884 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
885 ; HSA-VI-NEXT: {{ $}}
886 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
887 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
888 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
889 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
890 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
891 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
892 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4)
893 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
894 ; HSA-VI-NEXT: S_ENDPGM 0
895 ; LEGACY-MESA-VI-LABEL: name: v16f32_arg
896 ; LEGACY-MESA-VI: bb.1.entry:
897 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
898 ; LEGACY-MESA-VI-NEXT: {{ $}}
899 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
900 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
901 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
902 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
903 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
904 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
905 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4)
906 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
907 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
909 store <16 x float> %in, ptr addrspace(1) %out, align 4
913 define amdgpu_kernel void @kernel_arg_i64(ptr addrspace(1) %out, i64 %a) nounwind {
914 ; HSA-VI-LABEL: name: kernel_arg_i64
915 ; HSA-VI: bb.1 (%ir-block.0):
916 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
917 ; HSA-VI-NEXT: {{ $}}
918 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
919 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
920 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
921 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
922 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
923 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
924 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
925 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
926 ; HSA-VI-NEXT: S_ENDPGM 0
927 ; LEGACY-MESA-VI-LABEL: name: kernel_arg_i64
928 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
929 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
930 ; LEGACY-MESA-VI-NEXT: {{ $}}
931 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
932 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
933 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
934 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
935 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
936 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
937 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
938 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
939 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
940 store i64 %a, ptr addrspace(1) %out, align 8
944 define amdgpu_kernel void @f64_kernel_arg(ptr addrspace(1) %out, double %in) {
945 ; HSA-VI-LABEL: name: f64_kernel_arg
946 ; HSA-VI: bb.1.entry:
947 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
948 ; HSA-VI-NEXT: {{ $}}
949 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
950 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
951 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
952 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
953 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
954 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
955 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
956 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
957 ; HSA-VI-NEXT: S_ENDPGM 0
958 ; LEGACY-MESA-VI-LABEL: name: f64_kernel_arg
959 ; LEGACY-MESA-VI: bb.1.entry:
960 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
961 ; LEGACY-MESA-VI-NEXT: {{ $}}
962 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
963 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
964 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
965 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
966 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
967 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
968 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
969 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
970 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
972 store double %in, ptr addrspace(1) %out
976 define amdgpu_kernel void @i1_arg(ptr addrspace(1) %out, i1 %x) nounwind {
977 ; HSA-VI-LABEL: name: i1_arg
978 ; HSA-VI: bb.1 (%ir-block.0):
979 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
980 ; HSA-VI-NEXT: {{ $}}
981 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
982 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
983 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
984 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
985 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
986 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
987 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
988 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1)
989 ; HSA-VI-NEXT: S_ENDPGM 0
990 ; LEGACY-MESA-VI-LABEL: name: i1_arg
991 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
992 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
993 ; LEGACY-MESA-VI-NEXT: {{ $}}
994 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
995 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
996 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
997 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
998 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
999 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1000 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
1001 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1)
1002 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1003 store i1 %x, ptr addrspace(1) %out, align 1
1007 define amdgpu_kernel void @i1_arg_zext_i32(ptr addrspace(1) %out, i1 %x) nounwind {
1008 ; HSA-VI-LABEL: name: i1_arg_zext_i32
1009 ; HSA-VI: bb.1 (%ir-block.0):
1010 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1011 ; HSA-VI-NEXT: {{ $}}
1012 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1013 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1014 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1015 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1016 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1017 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1018 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
1019 ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
1020 ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1021 ; HSA-VI-NEXT: S_ENDPGM 0
1022 ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i32
1023 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1024 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1025 ; LEGACY-MESA-VI-NEXT: {{ $}}
1026 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1027 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1028 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1029 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1030 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1031 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1032 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
1033 ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1)
1034 ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1035 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1036 %ext = zext i1 %x to i32
1037 store i32 %ext, ptr addrspace(1) %out, align 4
1041 define amdgpu_kernel void @i1_arg_zext_i64(ptr addrspace(1) %out, i1 %x) nounwind {
1042 ; HSA-VI-LABEL: name: i1_arg_zext_i64
1043 ; HSA-VI: bb.1 (%ir-block.0):
1044 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1045 ; HSA-VI-NEXT: {{ $}}
1046 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1047 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1048 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1049 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1050 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1051 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1052 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
1053 ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
1054 ; HSA-VI-NEXT: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
1055 ; HSA-VI-NEXT: S_ENDPGM 0
1056 ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i64
1057 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1058 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1059 ; LEGACY-MESA-VI-NEXT: {{ $}}
1060 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1061 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1062 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1063 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1064 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1065 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1066 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
1067 ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1)
1068 ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
1069 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1070 %ext = zext i1 %x to i64
1071 store i64 %ext, ptr addrspace(1) %out, align 8
1075 define amdgpu_kernel void @i1_arg_sext_i32(ptr addrspace(1) %out, i1 %x) nounwind {
1076 ; HSA-VI-LABEL: name: i1_arg_sext_i32
1077 ; HSA-VI: bb.1 (%ir-block.0):
1078 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1079 ; HSA-VI-NEXT: {{ $}}
1080 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1081 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1082 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1083 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1084 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1085 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1086 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
1087 ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
1088 ; HSA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1089 ; HSA-VI-NEXT: S_ENDPGM 0
1090 ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i32
1091 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1092 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1093 ; LEGACY-MESA-VI-NEXT: {{ $}}
1094 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1095 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1096 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1097 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1098 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1099 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1100 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
1101 ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1)
1102 ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1103 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1104 %ext = sext i1 %x to i32
1105 store i32 %ext, ptr addrspace(1) %out, align 4
1109 define amdgpu_kernel void @i1_arg_sext_i64(ptr addrspace(1) %out, i1 %x) nounwind {
1110 ; HSA-VI-LABEL: name: i1_arg_sext_i64
1111 ; HSA-VI: bb.1 (%ir-block.0):
1112 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1113 ; HSA-VI-NEXT: {{ $}}
1114 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1115 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1116 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1117 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1118 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1119 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1120 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4)
1121 ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
1122 ; HSA-VI-NEXT: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
1123 ; HSA-VI-NEXT: S_ENDPGM 0
1124 ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i64
1125 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1126 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1127 ; LEGACY-MESA-VI-NEXT: {{ $}}
1128 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1129 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1130 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1131 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1132 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1133 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1134 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
1135 ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1)
1136 ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
1137 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1138 %ext = sext i1 %x to i64
1139 store i64 %ext, ptr addrspace(1) %out, align 8
1143 ; 0-sized arguments do not add a slot to the argument register set, so
1144 ; waste an index in the virtual register array.
1145 define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind {
1146 ; HSA-VI-LABEL: name: empty_struct_arg
1147 ; HSA-VI: bb.1 (%ir-block.0):
1148 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1149 ; HSA-VI-NEXT: {{ $}}
1150 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1151 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1152 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1153 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1154 ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1155 ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1156 ; HSA-VI-NEXT: S_ENDPGM 0
1157 ; LEGACY-MESA-VI-LABEL: name: empty_struct_arg
1158 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1159 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1160 ; LEGACY-MESA-VI-NEXT: {{ $}}
1161 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1162 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1163 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1164 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1165 ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1166 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1167 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1168 store i32 %arg1, ptr addrspace(1) undef
1172 define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind {
1173 ; HSA-VI-LABEL: name: empty_array_arg
1174 ; HSA-VI: bb.1 (%ir-block.0):
1175 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1176 ; HSA-VI-NEXT: {{ $}}
1177 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1178 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1179 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1180 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1181 ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1182 ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1183 ; HSA-VI-NEXT: S_ENDPGM 0
1184 ; LEGACY-MESA-VI-LABEL: name: empty_array_arg
1185 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1186 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1187 ; LEGACY-MESA-VI-NEXT: {{ $}}
1188 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1189 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1190 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1191 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1192 ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1193 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1194 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1195 store i32 %arg1, ptr addrspace(1) undef
1199 ; The correct load offsets for these:
1205 ; With the SelectionDAG argument lowering, the alignments for the
1206 ; struct members is not properly considered, making these wrong.
1207 define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad, {i32, i64} %arg1) {
1208 ; HSA-VI-LABEL: name: struct_argument_alignment
1209 ; HSA-VI: bb.1 (%ir-block.0):
1210 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1211 ; HSA-VI-NEXT: {{ $}}
1212 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1213 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1214 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1215 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1216 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1217 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1218 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
1219 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1220 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1221 ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
1222 ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
1223 ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1224 ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
1225 ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
1226 ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
1227 ; HSA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
1228 ; HSA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1229 ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1)
1230 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1)
1231 ; HSA-VI-NEXT: G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1)
1232 ; HSA-VI-NEXT: G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1)
1233 ; HSA-VI-NEXT: G_STORE [[LOAD4]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1)
1234 ; HSA-VI-NEXT: S_ENDPGM 0
1235 ; LEGACY-MESA-VI-LABEL: name: struct_argument_alignment
1236 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1237 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1238 ; LEGACY-MESA-VI-NEXT: {{ $}}
1239 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1240 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1241 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1242 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1243 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1244 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1245 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1246 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
1247 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1248 ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
1249 ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
1250 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1251 ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1252 ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
1253 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
1254 ; LEGACY-MESA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1255 ; LEGACY-MESA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1256 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1)
1257 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1)
1258 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1)
1259 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1)
1260 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD4]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1)
1261 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1262 %val0 = extractvalue {i32, i64} %arg0, 0
1263 %val1 = extractvalue {i32, i64} %arg0, 1
1264 %val2 = extractvalue {i32, i64} %arg1, 0
1265 %val3 = extractvalue {i32, i64} %arg1, 1
1266 store volatile i32 %val0, ptr addrspace(1) null
1267 store volatile i64 %val1, ptr addrspace(1) null
1268 store volatile i8 %pad, ptr addrspace(1) null
1269 store volatile i32 %val2, ptr addrspace(1) null
1270 store volatile i64 %val3, ptr addrspace(1) null
1274 define amdgpu_kernel void @pointer_in_struct_argument({ptr addrspace(3), ptr addrspace(1)} %arg0, i8 %pad, {ptr addrspace(3), ptr addrspace(1234)} %arg1) {
1275 ; HSA-VI-LABEL: name: pointer_in_struct_argument
1276 ; HSA-VI: bb.1 (%ir-block.0):
1277 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1278 ; HSA-VI-NEXT: {{ $}}
1279 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1280 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1281 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1282 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1283 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1284 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1285 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
1286 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1287 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1288 ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4)
1289 ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
1290 ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1291 ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
1292 ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
1293 ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
1294 ; HSA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
1295 ; HSA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1296 ; HSA-VI-NEXT: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1)
1297 ; HSA-VI-NEXT: G_STORE [[LOAD1]](p1), [[C5]](p1) :: (volatile store (p1) into `ptr addrspace(1) null`, addrspace 1)
1298 ; HSA-VI-NEXT: G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1)
1299 ; HSA-VI-NEXT: G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1)
1300 ; HSA-VI-NEXT: G_STORE [[LOAD4]](p1234), [[C5]](p1) :: (volatile store (p1234) into `ptr addrspace(1) null`, addrspace 1)
1301 ; HSA-VI-NEXT: S_ENDPGM 0
1302 ; LEGACY-MESA-VI-LABEL: name: pointer_in_struct_argument
1303 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1304 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1305 ; LEGACY-MESA-VI-NEXT: {{ $}}
1306 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1307 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1308 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1309 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1310 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1311 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1312 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1313 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
1314 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1315 ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4)
1316 ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
1317 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1318 ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1319 ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
1320 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
1321 ; LEGACY-MESA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1322 ; LEGACY-MESA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1323 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1)
1324 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](p1), [[C5]](p1) :: (volatile store (p1) into `ptr addrspace(1) null`, addrspace 1)
1325 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1)
1326 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1)
1327 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD4]](p1234), [[C5]](p1) :: (volatile store (p1234) into `ptr addrspace(1) null`, addrspace 1)
1328 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1329 %val0 = extractvalue {ptr addrspace(3), ptr addrspace(1)} %arg0, 0
1330 %val1 = extractvalue {ptr addrspace(3), ptr addrspace(1)} %arg0, 1
1331 %val2 = extractvalue {ptr addrspace(3), ptr addrspace(1234)} %arg1, 0
1332 %val3 = extractvalue {ptr addrspace(3), ptr addrspace(1234)} %arg1, 1
1333 store volatile ptr addrspace(3) %val0, ptr addrspace(1) null
1334 store volatile ptr addrspace(1) %val1, ptr addrspace(1) null
1335 store volatile i8 %pad, ptr addrspace(1) null
1336 store volatile ptr addrspace(3) %val2, ptr addrspace(1) null
1337 store volatile ptr addrspace(1234) %val3, ptr addrspace(1) null
1341 ; No padding between i8 and next struct, but round up at end to 4 byte
1343 define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) {
1344 ; HSA-VI-LABEL: name: packed_struct_argument_alignment
1345 ; HSA-VI: bb.1 (%ir-block.1):
1346 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1347 ; HSA-VI-NEXT: {{ $}}
1348 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1349 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1350 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1351 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1352 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
1353 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1354 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1355 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 13
1356 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1357 ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4)
1358 ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 17
1359 ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1360 ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4)
1361 ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1362 ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1)
1363 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1)
1364 ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1)
1365 ; HSA-VI-NEXT: G_STORE [[LOAD3]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1)
1366 ; HSA-VI-NEXT: S_ENDPGM 0
1367 ; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment
1368 ; LEGACY-MESA-VI: bb.1 (%ir-block.1):
1369 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1370 ; LEGACY-MESA-VI-NEXT: {{ $}}
1371 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1372 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1373 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1374 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1375 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
1376 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1377 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
1378 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 49
1379 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1380 ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4)
1381 ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 53
1382 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1383 ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4)
1384 ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
1385 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1)
1386 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1)
1387 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1)
1388 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1)
1389 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1390 %val0 = extractvalue <{i32, i64}> %arg0, 0
1391 %val1 = extractvalue <{i32, i64}> %arg0, 1
1392 %val2 = extractvalue <{i32, i64}> %arg1, 0
1393 %val3 = extractvalue <{i32, i64}> %arg1, 1
1394 store volatile i32 %val0, ptr addrspace(1) null
1395 store volatile i64 %val1, ptr addrspace(1) null
1396 store volatile i32 %val2, ptr addrspace(1) null
1397 store volatile i64 %val3, ptr addrspace(1) null
1401 define amdgpu_kernel void @unused_i32_arg(ptr addrspace(1) nocapture %out, i32 %unused, i32 %in) nounwind {
1402 ; HSA-VI-LABEL: name: unused_i32_arg
1403 ; HSA-VI: bb.1.entry:
1404 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1405 ; HSA-VI-NEXT: {{ $}}
1406 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1407 ; HSA-VI-NEXT: S_ENDPGM 0
1408 ; LEGACY-MESA-VI-LABEL: name: unused_i32_arg
1409 ; LEGACY-MESA-VI: bb.1.entry:
1410 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1411 ; LEGACY-MESA-VI-NEXT: {{ $}}
1412 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1413 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1418 ; Byref pointers should only be treated as offsets from kernarg
1419 define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i8) %in.byref) {
1420 ; HSA-VI-LABEL: name: byref_constant_i8_arg
1421 ; HSA-VI: bb.1 (%ir-block.0):
1422 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1423 ; HSA-VI-NEXT: {{ $}}
1424 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1425 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1426 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1427 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1428 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1429 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1430 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8) from %ir.in.byref, addrspace 4)
1431 ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
1432 ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1433 ; HSA-VI-NEXT: S_ENDPGM 0
1434 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i8_arg
1435 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1436 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1437 ; LEGACY-MESA-VI-NEXT: {{ $}}
1438 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1439 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1440 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1441 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1442 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1443 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1444 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8) from %ir.in.byref, addrspace 4)
1445 ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
1446 ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1447 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1448 %in = load i8, ptr addrspace(4) %in.byref
1449 %ext = zext i8 %in to i32
1450 store i32 %ext, ptr addrspace(1) %out, align 4
1454 define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i16) align 2 %in.byref) {
1455 ; HSA-VI-LABEL: name: byref_constant_i16_arg
1456 ; HSA-VI: bb.1 (%ir-block.0):
1457 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1458 ; HSA-VI-NEXT: {{ $}}
1459 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1460 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1461 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1462 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1463 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1464 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1465 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16) from %ir.in.byref, addrspace 4)
1466 ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
1467 ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1468 ; HSA-VI-NEXT: S_ENDPGM 0
1469 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i16_arg
1470 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1471 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1472 ; LEGACY-MESA-VI-NEXT: {{ $}}
1473 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1474 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1475 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1476 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1477 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1478 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1479 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16) from %ir.in.byref, addrspace 4)
1480 ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
1481 ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1482 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1483 %in = load i16, ptr addrspace(4) %in.byref
1484 %ext = zext i16 %in to i32
1485 store i32 %ext, ptr addrspace(1) %out, align 4
1489 define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align 4 %in.byref, i32 %after.offset) {
1490 ; HSA-VI-LABEL: name: byref_constant_i32_arg
1491 ; HSA-VI: bb.1 (%ir-block.0):
1492 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1493 ; HSA-VI-NEXT: {{ $}}
1494 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1495 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1496 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1497 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1498 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1499 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1500 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
1501 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1502 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1503 ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4)
1504 ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1505 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1506 ; HSA-VI-NEXT: S_ENDPGM 0
1507 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg
1508 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1509 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1510 ; LEGACY-MESA-VI-NEXT: {{ $}}
1511 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1512 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1513 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1514 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1515 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1516 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1517 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
1518 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1519 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1520 ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4)
1521 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1522 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1523 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1524 %in = load i32, ptr addrspace(4) %in.byref
1525 store volatile i32 %in, ptr addrspace(1) %out, align 4
1526 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
1530 define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(<4 x i32>) align(16) %in.byref, i32 %after.offset) {
1531 ; HSA-VI-LABEL: name: byref_constant_v4i32_arg
1532 ; HSA-VI: bb.1 (%ir-block.0):
1533 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1534 ; HSA-VI-NEXT: {{ $}}
1535 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1536 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1537 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1538 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1539 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1540 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1541 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
1542 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1543 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1544 ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>) from %ir.in.byref, addrspace 4)
1545 ; HSA-VI-NEXT: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1)
1546 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1547 ; HSA-VI-NEXT: S_ENDPGM 0
1548 ; LEGACY-MESA-VI-LABEL: name: byref_constant_v4i32_arg
1549 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1550 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1551 ; LEGACY-MESA-VI-NEXT: {{ $}}
1552 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1553 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1554 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1555 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1556 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
1557 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1558 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
1559 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1560 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1561 ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>) from %ir.in.byref, addrspace 4)
1562 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1)
1563 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1564 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1565 %in = load <4 x i32>, ptr addrspace(4) %in.byref
1566 store volatile <4 x i32> %in, ptr addrspace(1) %out, align 4
1567 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
1571 define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) {
1572 ; HSA-VI-LABEL: name: byref_align_constant_i32_arg
1573 ; HSA-VI: bb.1 (%ir-block.0):
1574 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1575 ; HSA-VI-NEXT: {{ $}}
1576 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1577 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1578 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1579 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1580 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
1581 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1582 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 260
1583 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1584 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1585 ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4)
1586 ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1587 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1588 ; HSA-VI-NEXT: S_ENDPGM 0
1589 ; LEGACY-MESA-VI-LABEL: name: byref_align_constant_i32_arg
1590 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1591 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1592 ; LEGACY-MESA-VI-NEXT: {{ $}}
1593 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1594 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1595 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1596 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1597 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 292
1598 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1599 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 296
1600 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1601 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4)
1602 ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4)
1603 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1604 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1605 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1606 %in = load i32, ptr addrspace(4) %in.byref
1607 store volatile i32 %in, ptr addrspace(1) %out, align 4
1608 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
1612 define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) {
1613 ; HSA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg
1614 ; HSA-VI: bb.1 (%ir-block.1):
1615 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1616 ; HSA-VI-NEXT: {{ $}}
1617 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1618 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1619 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1620 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1621 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
1622 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1623 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
1624 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1625 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1626 ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>) from %ir.in.byref, addrspace 4)
1627 ; HSA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.out, align 4, addrspace 1)
1628 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1629 ; HSA-VI-NEXT: S_ENDPGM 0
1630 ; LEGACY-MESA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg
1631 ; LEGACY-MESA-VI: bb.1 (%ir-block.1):
1632 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1633 ; LEGACY-MESA-VI-NEXT: {{ $}}
1634 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1635 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1636 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1637 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1638 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100
1639 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1640 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 164
1641 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1642 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1643 ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>) from %ir.in.byref, addrspace 4)
1644 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.out, align 4, addrspace 1)
1645 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1646 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1647 %in = load <16 x i32>, ptr addrspace(4) %in.byref
1648 store volatile <16 x i32> %in, ptr addrspace(1) %out, align 4
1649 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
1653 ; Also accept byref kernel arguments with other global address spaces.
1654 define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(1) byref(i32) align(4) %in.byref) {
1655 ; HSA-VI-LABEL: name: byref_global_i32_arg
1656 ; HSA-VI: bb.1 (%ir-block.0):
1657 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1658 ; HSA-VI-NEXT: {{ $}}
1659 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1660 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1661 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1662 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1663 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1664 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1665 ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1666 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p1) :: (dereferenceable "amdgpu-noclobber" load (s32) from %ir.in.byref, addrspace 1)
1667 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1668 ; HSA-VI-NEXT: S_ENDPGM 0
1669 ; LEGACY-MESA-VI-LABEL: name: byref_global_i32_arg
1670 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1671 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1672 ; LEGACY-MESA-VI-NEXT: {{ $}}
1673 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1674 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1675 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1676 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1677 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1678 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1679 ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1680 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p1) :: (dereferenceable "amdgpu-noclobber" load (s32) from %ir.in.byref, addrspace 1)
1681 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1682 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1683 %in = load i32, ptr addrspace(1) %in.byref
1684 store i32 %in, ptr addrspace(1) %out, align 4
1688 define amdgpu_kernel void @byref_flat_i32_arg(ptr addrspace(1) nocapture %out, ptr byref(i32) align(4) %in.byref) {
1689 ; HSA-VI-LABEL: name: byref_flat_i32_arg
1690 ; HSA-VI: bb.1 (%ir-block.0):
1691 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1692 ; HSA-VI-NEXT: {{ $}}
1693 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1694 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1695 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1696 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1697 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1698 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1699 ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1700 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref)
1701 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1702 ; HSA-VI-NEXT: S_ENDPGM 0
1703 ; LEGACY-MESA-VI-LABEL: name: byref_flat_i32_arg
1704 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1705 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1706 ; LEGACY-MESA-VI-NEXT: {{ $}}
1707 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1708 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1709 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1710 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1711 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1712 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1713 ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1714 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref)
1715 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1716 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1717 %in = load i32, ptr %in.byref
1718 store i32 %in, ptr addrspace(1) %out, align 4
1722 define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(6) byref(i32) align(4) %in.byref) {
1723 ; HSA-VI-LABEL: name: byref_constant_32bit_i32_arg
1724 ; HSA-VI: bb.1 (%ir-block.0):
1725 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1726 ; HSA-VI-NEXT: {{ $}}
1727 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1728 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1729 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1730 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1731 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1732 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1733 ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1734 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 6)
1735 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1736 ; HSA-VI-NEXT: S_ENDPGM 0
1737 ; LEGACY-MESA-VI-LABEL: name: byref_constant_32bit_i32_arg
1738 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1739 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1740 ; LEGACY-MESA-VI-NEXT: {{ $}}
1741 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1742 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1743 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1744 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1745 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1746 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1747 ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1748 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 6)
1749 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1750 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1751 %in = load i32, ptr addrspace(6) %in.byref
1752 store i32 %in, ptr addrspace(1) %out, align 4
1756 define amdgpu_kernel void @byref_unknown_as_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(999) byref(i32) align(4) %in.byref) {
1757 ; HSA-VI-LABEL: name: byref_unknown_as_i32_arg
1758 ; HSA-VI: bb.1 (%ir-block.0):
1759 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1760 ; HSA-VI-NEXT: {{ $}}
1761 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1762 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1763 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1764 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1765 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1766 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1767 ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1768 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999)
1769 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1770 ; HSA-VI-NEXT: S_ENDPGM 0
1771 ; LEGACY-MESA-VI-LABEL: name: byref_unknown_as_i32_arg
1772 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1773 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1774 ; LEGACY-MESA-VI-NEXT: {{ $}}
1775 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1776 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1777 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1778 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1779 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1780 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1781 ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1782 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999)
1783 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1784 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1785 %in = load i32, ptr addrspace(999) %in.byref
1786 store i32 %in, ptr addrspace(1) %out, align 4
1790 ; Invalid, but should not crash.
1791 define amdgpu_kernel void @byref_local_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(3) byref(i32) align(4) %in.byref) {
1792 ; HSA-VI-LABEL: name: byref_local_i32_arg
1793 ; HSA-VI: bb.1 (%ir-block.0):
1794 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1795 ; HSA-VI-NEXT: {{ $}}
1796 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1797 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1798 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1799 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1800 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1801 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1802 ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1803 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3)
1804 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1805 ; HSA-VI-NEXT: S_ENDPGM 0
1806 ; LEGACY-MESA-VI-LABEL: name: byref_local_i32_arg
1807 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1808 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1809 ; LEGACY-MESA-VI-NEXT: {{ $}}
1810 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1811 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1812 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1813 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1814 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1815 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1816 ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4)
1817 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3)
1818 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
1819 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1820 %in = load i32, ptr addrspace(3) %in.byref
1821 store i32 %in, ptr addrspace(1) %out, align 4
1825 define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(4) %in0.byref, ptr addrspace(4) byref(i32) align(4) %in1.byref, i32 %after.offset) {
1826 ; HSA-VI-LABEL: name: multi_byref_constant_i32_arg
1827 ; HSA-VI: bb.1 (%ir-block.0):
1828 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1829 ; HSA-VI-NEXT: {{ $}}
1830 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1831 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1832 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1833 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4)
1834 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
1835 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1836 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
1837 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1838 ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
1839 ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1840 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
1841 ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in0.byref, addrspace 4)
1842 ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32) from %ir.in1.byref, addrspace 4)
1843 ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1844 ; HSA-VI-NEXT: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1845 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1846 ; HSA-VI-NEXT: S_ENDPGM 0
1847 ; LEGACY-MESA-VI-LABEL: name: multi_byref_constant_i32_arg
1848 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1849 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1850 ; LEGACY-MESA-VI-NEXT: {{ $}}
1851 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1852 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1853 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1854 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4)
1855 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
1856 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
1857 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
1858 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
1859 ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
1860 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
1861 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
1862 ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in0.byref, addrspace 4)
1863 ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32) from %ir.in1.byref, addrspace 4)
1864 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1865 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1866 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
1867 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1868 %in0 = load i32, ptr addrspace(4) %in0.byref
1869 %in1 = load i32, ptr addrspace(4) %in1.byref
1870 store volatile i32 %in0, ptr addrspace(1) %out, align 4
1871 store volatile i32 %in1, ptr addrspace(1) %out, align 4
1872 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
1876 define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref(i32) align(4) %in.byref) {
1877 ; HSA-VI-LABEL: name: byref_constant_i32_arg_offset0
1878 ; HSA-VI: bb.1 (%ir-block.0):
1879 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1880 ; HSA-VI-NEXT: {{ $}}
1881 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1882 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1883 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1884 ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1885 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4)
1886 ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1887 ; HSA-VI-NEXT: S_ENDPGM 0
1888 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg_offset0
1889 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1890 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1891 ; LEGACY-MESA-VI-NEXT: {{ $}}
1892 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1893 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1894 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1895 ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1896 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4)
1897 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
1898 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1899 %in = load i32, ptr addrspace(4) %in.byref
1900 store i32 %in, ptr addrspace(1) undef, align 4
1904 define amdgpu_kernel void @p3i8_arg(ptr addrspace(3) %arg) nounwind {
1905 ; HSA-VI-LABEL: name: p3i8_arg
1906 ; HSA-VI: bb.1 (%ir-block.0):
1907 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1908 ; HSA-VI-NEXT: {{ $}}
1909 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1910 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1911 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1912 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), align 16, addrspace 4)
1913 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
1914 ; HSA-VI-NEXT: G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3)
1915 ; HSA-VI-NEXT: S_ENDPGM 0
1916 ; LEGACY-MESA-VI-LABEL: name: p3i8_arg
1917 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1918 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1919 ; LEGACY-MESA-VI-NEXT: {{ $}}
1920 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1921 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1922 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1923 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), addrspace 4)
1924 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
1925 ; LEGACY-MESA-VI-NEXT: G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3)
1926 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1927 store i8 9, ptr addrspace(3) %arg, align 4
1931 define amdgpu_kernel void @p1i8_arg(ptr addrspace(1) %arg) nounwind {
1932 ; HSA-VI-LABEL: name: p1i8_arg
1933 ; HSA-VI: bb.1 (%ir-block.0):
1934 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1935 ; HSA-VI-NEXT: {{ $}}
1936 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1937 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
1938 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
1939 ; HSA-VI-NEXT: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `ptr addrspace(3) null`, addrspace 3)
1940 ; HSA-VI-NEXT: S_ENDPGM 0
1941 ; LEGACY-MESA-VI-LABEL: name: p1i8_arg
1942 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1943 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1944 ; LEGACY-MESA-VI-NEXT: {{ $}}
1945 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1946 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
1947 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
1948 ; LEGACY-MESA-VI-NEXT: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `ptr addrspace(3) null`, addrspace 3)
1949 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1950 store i8 9, ptr addrspace(3) null
1954 define amdgpu_kernel void @v2p1i8_arg(<2 x ptr addrspace(1)> %arg) nounwind {
1955 ; HSA-VI-LABEL: name: v2p1i8_arg
1956 ; HSA-VI: bb.1 (%ir-block.0):
1957 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1958 ; HSA-VI-NEXT: {{ $}}
1959 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1960 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1961 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1962 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), addrspace 4)
1963 ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1964 ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1)
1965 ; HSA-VI-NEXT: S_ENDPGM 0
1966 ; LEGACY-MESA-VI-LABEL: name: v2p1i8_arg
1967 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1968 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1969 ; LEGACY-MESA-VI-NEXT: {{ $}}
1970 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1971 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1972 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1973 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), align 4, addrspace 4)
1974 ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1975 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1)
1976 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
1977 store <2 x ptr addrspace(1)> %arg, ptr addrspace(1) undef
1981 define amdgpu_kernel void @v2p3i8_arg(<2 x ptr addrspace(3)> %arg) nounwind {
1982 ; HSA-VI-LABEL: name: v2p3i8_arg
1983 ; HSA-VI: bb.1 (%ir-block.0):
1984 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
1985 ; HSA-VI-NEXT: {{ $}}
1986 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
1987 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
1988 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
1989 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 16, addrspace 4)
1990 ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
1991 ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef`, addrspace 1)
1992 ; HSA-VI-NEXT: S_ENDPGM 0
1993 ; LEGACY-MESA-VI-LABEL: name: v2p3i8_arg
1994 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
1995 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
1996 ; LEGACY-MESA-VI-NEXT: {{ $}}
1997 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
1998 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
1999 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
2000 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 4, addrspace 4)
2001 ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
2002 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef`, addrspace 1)
2003 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
2004 store <2 x ptr addrspace(3)> %arg, ptr addrspace(1) undef
2008 define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x ptr addrspace(1)>, <2 x ptr addrspace(3)> } %arg) nounwind {
2009 ; HSA-VI-LABEL: name: v2p1i8_in_struct_arg
2010 ; HSA-VI: bb.1 (%ir-block.0):
2011 ; HSA-VI-NEXT: liveins: $sgpr4_sgpr5
2012 ; HSA-VI-NEXT: {{ $}}
2013 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
2014 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
2015 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
2016 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), addrspace 4)
2017 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
2018 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
2019 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 16, addrspace 4)
2020 ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
2021 ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1)
2022 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
2023 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
2024 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef` + 16, align 16, addrspace 1)
2025 ; HSA-VI-NEXT: S_ENDPGM 0
2026 ; LEGACY-MESA-VI-LABEL: name: v2p1i8_in_struct_arg
2027 ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
2028 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr0_sgpr1
2029 ; LEGACY-MESA-VI-NEXT: {{ $}}
2030 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
2031 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
2032 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
2033 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4)
2034 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
2035 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
2036 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
2037 ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
2038 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1)
2039 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
2040 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
2041 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef` + 16, align 16, addrspace 1)
2042 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0
2043 store { <2 x ptr addrspace(1)>, <2 x ptr addrspace(3)> } %arg, ptr addrspace(1) undef