1 ; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
2 ; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
3 ; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
4 ; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5 ; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 ; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
7 ; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
8 ; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
9 ; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
10 ; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
11 ; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
12 ; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
13 ; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
14 ; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
15 ; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
16 ; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
18 target triple = "aarch64-unknown-linux-gnu"
20 ; Don't use SVE when its registers are no bigger than NEON.
27 ; NOTE: Covers the scenario where a SIGN_EXTEND_INREG is required, whose inreg
28 ; type's element type is not byte based and thus cannot be lowered directly to
30 define void @sext_v8i1_v8i32(<8 x i1> %a, <8 x i32>* %out) #0 {
31 ; CHECK-LABEL: sext_v8i1_v8i32:
32 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
33 ; CHECK-NEXT: uunpklo [[A_HALFS:z[0-9]+]].h, z0.b
34 ; CHECK-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
35 ; CHECK-NEXT: lsl [[A_WORDS]].s, [[PG]]/m, [[A_WORDS]].s, #31
36 ; CHECK-NEXT: asr [[A_WORDS]].s, [[PG]]/m, [[A_WORDS]].s, #31
37 ; CHECK-NEXT: st1w { [[A_WORDS]].s }, [[PG]], [x0]
39 %b = sext <8 x i1> %a to <8 x i32>
40 store <8 x i32> %b, <8 x i32>* %out
48 ; NOTE: Covers the scenario where a SIGN_EXTEND_INREG is required, whose inreg
49 ; type's element type is not power-of-2 based and thus cannot be lowered
50 ; directly to an SVE instruction.
51 define void @sext_v4i3_v4i64(<4 x i3> %a, <4 x i64>* %out) #0 {
52 ; CHECK-LABEL: sext_v4i3_v4i64:
53 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
54 ; CHECK-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, z0.h
55 ; CHECK-NEXT: uunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
56 ; CHECK-NEXT: lsl [[A_DWORDS]].d, [[PG]]/m, [[A_DWORDS]].d, #61
57 ; CHECK-NEXT: asr [[A_DWORDS]].d, [[PG]]/m, [[A_DWORDS]].d, #61
58 ; CHECK-NEXT: st1d { [[A_WORDS]].d }, [[PG]], [x0]
60 %b = sext <4 x i3> %a to <4 x i64>
61 store <4 x i64> %b, <4 x i64>* %out
69 define void @sext_v16i8_v16i16(<16 x i8> %a, <16 x i16>* %out) #0 {
70 ; CHECK-LABEL: sext_v16i8_v16i16:
71 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
72 ; CHECK-NEXT: sunpklo [[A_HALFS:z[0-9]+]].h, z0.b
73 ; CHECK-NEXT: st1h { [[A_HALFS]].h }, [[PG]], [x0]
75 %b = sext <16 x i8> %a to <16 x i16>
76 store <16 x i16>%b, <16 x i16>* %out
80 ; NOTE: Extra 'add' is to prevent the extend being combined with the load.
81 define void @sext_v32i8_v32i16(<32 x i8>* %in, <32 x i16>* %out) #0 {
82 ; CHECK-LABEL: sext_v32i8_v32i16:
83 ; VBITS_GE_512: add [[A_BYTES:z[0-9]+]].b, {{p[0-9]+}}/m, {{z[0-9]+}}.b, {{z[0-9]+}}.b
84 ; VBITS_GE_512-NEXT: sunpklo [[A_HALFS:z[0-9]+]].h, [[A_BYTES]].b
85 ; VBITS_GE_512-NEXT: ptrue [[PG:p[0-9]+]].h, vl32
86 ; VBITS_GE_512-NEXT: st1h { [[A_HALFS]].h }, [[PG]], [x1]
87 ; VBITS_GE_512-NEXT: ret
88 %a = load <32 x i8>, <32 x i8>* %in
89 %b = add <32 x i8> %a, %a
90 %c = sext <32 x i8> %b to <32 x i16>
91 store <32 x i16> %c, <32 x i16>* %out
95 define void @sext_v64i8_v64i16(<64 x i8>* %in, <64 x i16>* %out) #0 {
96 ; CHECK-LABEL: sext_v64i8_v64i16:
97 ; VBITS_GE_1024: add [[A_BYTES:z[0-9]+]].b, {{p[0-9]+}}/m, {{z[0-9]+}}.b, {{z[0-9]+}}.b
98 ; VBITS_GE_1024-NEXT: sunpklo [[A_HALFS:z[0-9]+]].h, [[A_BYTES]].b
99 ; VBITS_GE_1024-NEXT: ptrue [[PG:p[0-9]+]].h, vl64
100 ; VBITS_GE_1024-NEXT: st1h { [[A_HALFS]].h }, [[PG]], [x1]
101 ; VBITS_GE_1024-NEXT: ret
102 %a = load <64 x i8>, <64 x i8>* %in
103 %b = add <64 x i8> %a, %a
104 %c = sext <64 x i8> %b to <64 x i16>
105 store <64 x i16> %c, <64 x i16>* %out
109 define void @sext_v128i8_v128i16(<128 x i8>* %in, <128 x i16>* %out) #0 {
110 ; CHECK-LABEL: sext_v128i8_v128i16:
111 ; VBITS_GE_2048: add [[A_BYTES:z[0-9]+]].b, {{p[0-9]+}}/m, {{z[0-9]+}}.b, {{z[0-9]+}}.b
112 ; VBITS_GE_2048-NEXT: sunpklo [[A_HALFS:z[0-9]+]].h, [[A_BYTES]].b
113 ; VBITS_GE_2048-NEXT: ptrue [[PG:p[0-9]+]].h, vl128
114 ; VBITS_GE_2048-NEXT: st1h { [[A_HALFS]].h }, [[PG]], [x1]
115 ; VBITS_GE_2048-NEXT: ret
116 %a = load <128 x i8>, <128 x i8>* %in
117 %b = add <128 x i8> %a, %a
118 %c = sext <128 x i8> %b to <128 x i16>
119 store <128 x i16> %c, <128 x i16>* %out
127 define void @sext_v8i8_v8i32(<8 x i8> %a, <8 x i32>* %out) #0 {
128 ; CHECK-LABEL: sext_v8i8_v8i32:
129 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
130 ; CHECK-NEXT: sunpklo [[A_HALFS:z[0-9]+]].h, z0.b
131 ; CHECK-NEXT: sunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
132 ; CHECK-NEXT: st1w { [[A_HALFS]].s }, [[PG]], [x0]
134 %b = sext <8 x i8> %a to <8 x i32>
135 store <8 x i32>%b, <8 x i32>* %out
139 define void @sext_v16i8_v16i32(<16 x i8> %a, <16 x i32>* %out) #0 {
140 ; CHECK-LABEL: sext_v16i8_v16i32:
141 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
142 ; VBITS_GE_512-NEXT: sunpklo [[A_HALFS:z[0-9]+]].h, z0.b
143 ; VBITS_GE_512-NEXT: sunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
144 ; VBITS_GE_512-NEXT: st1w { [[A_WORDS]].s }, [[PG]], [x0]
145 ; VBITS_GE_512-NEXT: ret
147 ; Ensure sensible type legalisation.
148 ; VBITS_EQ_256-DAG: ext v[[A_HI:[0-9]+]].16b, v0.16b, v0.16b, #8
149 ; VBITS_EQ_256-DAG: sunpklo [[A_HALFS_LO:z[0-9]+]].h, z0.b
150 ; VBITS_EQ_256-DAG: sunpklo [[A_HALFS_HI:z[0-9]+]].h, z[[A_HI]].b
151 ; VBITS_EQ_256-DAG: sunpklo [[A_WORDS_LO:z[0-9]+]].s, [[A_HALFS_LO]].h
152 ; VBITS_EQ_256-DAG: sunpklo [[A_WORDS_HI:z[0-9]+]].s, [[A_HALFS_HI]].h
153 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
154 ; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
155 ; VBITS_EQ_256-DAG: st1w { [[A_WORDS_LO]].s }, [[PG]], [x0]
156 ; VBITS_EQ_256-DAG: st1w { [[A_WORDS_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2]
157 ; VBITS_EQ_256-NEXT: ret
158 %b = sext <16 x i8> %a to <16 x i32>
159 store <16 x i32> %b, <16 x i32>* %out
163 define void @sext_v32i8_v32i32(<32 x i8>* %in, <32 x i32>* %out) #0 {
164 ; CHECK-LABEL: sext_v32i8_v32i32:
165 ; VBITS_GE_1024: add [[A_BYTES:z[0-9]+]].b, {{p[0-9]+}}/m, {{z[0-9]+}}.b, {{z[0-9]+}}.b
166 ; VBITS_GE_1024-NEXT: sunpklo [[A_HALFS:z[0-9]+]].h, [[A_BYTES]].b
167 ; VBITS_GE_1024-NEXT: sunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
168 ; VBITS_GE_1024-NEXT: ptrue [[PG:p[0-9]+]].s, vl32
169 ; VBITS_GE_1024-NEXT: st1w { [[A_WORDS]].s }, [[PG]], [x1]
170 ; VBITS_GE_1024-NEXT: ret
171 %a = load <32 x i8>, <32 x i8>* %in
172 %b = add <32 x i8> %a, %a
173 %c = sext <32 x i8> %b to <32 x i32>
174 store <32 x i32> %c, <32 x i32>* %out
178 define void @sext_v64i8_v64i32(<64 x i8>* %in, <64 x i32>* %out) #0 {
179 ; CHECK-LABEL: sext_v64i8_v64i32:
180 ; VBITS_GE_2048: add [[A_BYTES:z[0-9]+]].b, {{p[0-9]+}}/m, {{z[0-9]+}}.b, {{z[0-9]+}}.b
181 ; VBITS_GE_2048-NEXT: sunpklo [[A_HALFS:z[0-9]+]].h, [[A_BYTES]].b
182 ; VBITS_GE_2048-NEXT: sunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
183 ; VBITS_GE_2048-NEXT: ptrue [[PG:p[0-9]+]].s, vl64
184 ; VBITS_GE_2048-NEXT: st1w { [[A_WORDS]].s }, [[PG]], [x1]
185 ; VBITS_GE_2048-NEXT: ret
186 %a = load <64 x i8>, <64 x i8>* %in
187 %b = add <64 x i8> %a, %a
188 %c = sext <64 x i8> %b to <64 x i32>
189 store <64 x i32> %c, <64 x i32>* %out
197 ; NOTE: v4i8 is an unpacked typed stored within a v4i16 container. The sign
198 ; extend is a two step process where the container is any_extend'd with the
199 ; result feeding an inreg sign extend.
200 define void @sext_v4i8_v4i64(<4 x i8> %a, <4 x i64>* %out) #0 {
201 ; CHECK-LABEL: sext_v4i8_v4i64:
202 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
203 ; CHECK-NEXT: uunpklo [[ANYEXT_W:z[0-9]+]].s, z0.h
204 ; CHECK-NEXT: uunpklo [[ANYEXT_D:z[0-9]+]].d, [[ANYEXT_W]].s
205 ; CHECK-NEXT: sxtb [[A_DWORDS:z[0-9]+]].d, [[PG]]/m, [[ANYEXT_D]].d
206 ; CHECK-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x0]
208 %b = sext <4 x i8> %a to <4 x i64>
209 store <4 x i64>%b, <4 x i64>* %out
213 define void @sext_v8i8_v8i64(<8 x i8> %a, <8 x i64>* %out) #0 {
214 ; CHECK-LABEL: sext_v8i8_v8i64:
215 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
216 ; VBITS_GE_512-NEXT: sunpklo [[A_HALFS:z[0-9]+]].h, z0.b
217 ; VBITS_GE_512-NEXT: sunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
218 ; VBITS_GE_512-NEXT: sunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
219 ; VBITS_GE_512-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x0]
220 ; VBITS_GE_512-NEXT: ret
221 %b = sext <8 x i8> %a to <8 x i64>
222 store <8 x i64>%b, <8 x i64>* %out
226 define void @sext_v16i8_v16i64(<16 x i8> %a, <16 x i64>* %out) #0 {
227 ; CHECK-LABEL: sext_v16i8_v16i64:
228 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
229 ; VBITS_GE_1024-NEXT: sunpklo [[A_HALFS:z[0-9]+]].h, z0.b
230 ; VBITS_GE_1024-NEXT: sunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
231 ; VBITS_GE_1024-NEXT: sunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
232 ; VBITS_GE_1024-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x0]
233 ; VBITS_GE_1024-NEXT: ret
234 %b = sext <16 x i8> %a to <16 x i64>
235 store <16 x i64> %b, <16 x i64>* %out
239 define void @sext_v32i8_v32i64(<32 x i8>* %in, <32 x i64>* %out) #0 {
240 ; CHECK-LABEL: sext_v32i8_v32i64:
241 ; VBITS_GE_2048: add [[A_BYTES:z[0-9]+]].b, {{p[0-9]+}}/m, {{z[0-9]+}}.b, {{z[0-9]+}}.b
242 ; VBITS_GE_2048-NEXT: sunpklo [[A_HALFS:z[0-9]+]].h, [[A_BYTES]].b
243 ; VBITS_GE_2048-NEXT: sunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
244 ; VBITS_GE_2048-NEXT: sunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
245 ; VBITS_GE_2048-NEXT: ptrue [[PG:p[0-9]+]].d, vl32
246 ; VBITS_GE_2048-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x1]
247 ; VBITS_GE_2048-NEXT: ret
248 %a = load <32 x i8>, <32 x i8>* %in
249 %b = add <32 x i8> %a, %a
250 %c = sext <32 x i8> %b to <32 x i64>
251 store <32 x i64> %c, <32 x i64>* %out
259 define void @sext_v8i16_v8i32(<8 x i16> %a, <8 x i32>* %out) #0 {
260 ; CHECK-LABEL: sext_v8i16_v8i32:
261 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
262 ; CHECK-NEXT: sunpklo [[A_WORDS:z[0-9]+]].s, z0.h
263 ; CHECK-NEXT: st1w { [[A_WORDS]].s }, [[PG]], [x0]
265 %b = sext <8 x i16> %a to <8 x i32>
266 store <8 x i32>%b, <8 x i32>* %out
270 define void @sext_v16i16_v16i32(<16 x i16>* %in, <16 x i32>* %out) #0 {
271 ; CHECK-LABEL: sext_v16i16_v16i32:
272 ; VBITS_GE_512: add [[A_HALFS:z[0-9]+]].h, {{p[0-9]+}}/m, {{z[0-9]+}}.h, {{z[0-9]+}}.h
273 ; VBITS_GE_512-NEXT: sunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
274 ; VBITS_GE_512-NEXT: ptrue [[PG:p[0-9]+]].s, vl16
275 ; VBITS_GE_512-NEXT: st1w { [[A_WORDS]].s }, [[PG]], [x1]
276 ; VBITS_GE_512-NEXT: ret
277 %a = load <16 x i16>, <16 x i16>* %in
278 %b = add <16 x i16> %a, %a
279 %c = sext <16 x i16> %b to <16 x i32>
280 store <16 x i32> %c, <16 x i32>* %out
284 define void @sext_v32i16_v32i32(<32 x i16>* %in, <32 x i32>* %out) #0 {
285 ; CHECK-LABEL: sext_v32i16_v32i32:
286 ; VBITS_GE_1024: add [[A_HALFS:z[0-9]+]].h, {{p[0-9]+}}/m, {{z[0-9]+}}.h, {{z[0-9]+}}.h
287 ; VBITS_GE_1024-NEXT: sunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
288 ; VBITS_GE_1024-NEXT: ptrue [[PG:p[0-9]+]].s, vl32
289 ; VBITS_GE_1024-NEXT: st1w { [[A_WORDS]].s }, [[PG]], [x1]
290 ; VBITS_GE_1024-NEXT: ret
291 %a = load <32 x i16>, <32 x i16>* %in
292 %b = add <32 x i16> %a, %a
293 %c = sext <32 x i16> %b to <32 x i32>
294 store <32 x i32> %c, <32 x i32>* %out
298 define void @sext_v64i16_v64i32(<64 x i16>* %in, <64 x i32>* %out) #0 {
299 ; CHECK-LABEL: sext_v64i16_v64i32:
300 ; VBITS_GE_2048: add [[A_HALFS:z[0-9]+]].h, {{p[0-9]+}}/m, {{z[0-9]+}}.h, {{z[0-9]+}}.h
301 ; VBITS_GE_2048-NEXT: sunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
302 ; VBITS_GE_2048-NEXT: ptrue [[PG:p[0-9]+]].s, vl64
303 ; VBITS_GE_2048-NEXT: st1w { [[A_WORDS]].s }, [[PG]], [x1]
304 ; VBITS_GE_2048-NEXT: ret
305 %a = load <64 x i16>, <64 x i16>* %in
306 %b = add <64 x i16> %a, %a
307 %c = sext <64 x i16> %b to <64 x i32>
308 store <64 x i32> %c, <64 x i32>* %out
316 define void @sext_v4i16_v4i64(<4 x i16> %a, <4 x i64>* %out) #0 {
317 ; CHECK-LABEL: sext_v4i16_v4i64:
318 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
319 ; CHECK-NEXT: sunpklo [[A_WORDS:z[0-9]+]].s, z0.h
320 ; CHECK-NEXT: sunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
321 ; CHECK-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x0]
323 %b = sext <4 x i16> %a to <4 x i64>
324 store <4 x i64>%b, <4 x i64>* %out
328 define void @sext_v8i16_v8i64(<8 x i16> %a, <8 x i64>* %out) #0 {
329 ; CHECK-LABEL: sext_v8i16_v8i64:
330 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
331 ; VBITS_GE_512-NEXT: sunpklo [[A_WORDS:z[0-9]+]].s, z0.h
332 ; VBITS_GE_512-NEXT: sunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
333 ; VBITS_GE_512-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x0]
334 ; VBITS_GE_512-NEXT: ret
335 %b = sext <8 x i16> %a to <8 x i64>
336 store <8 x i64>%b, <8 x i64>* %out
340 define void @sext_v16i16_v16i64(<16 x i16>* %in, <16 x i64>* %out) #0 {
341 ; CHECK-LABEL: sext_v16i16_v16i64:
342 ; VBITS_GE_1024: add [[A_HALFS:z[0-9]+]].h, {{p[0-9]+}}/m, {{z[0-9]+}}.h, {{z[0-9]+}}.h
343 ; VBITS_GE_1024-NEXT: sunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
344 ; VBITS_GE_1024-NEXT: sunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
345 ; VBITS_GE_1024-NEXT: ptrue [[PG:p[0-9]+]].d, vl16
346 ; VBITS_GE_1024-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x1]
347 ; VBITS_GE_1024-NEXT: ret
348 %a = load <16 x i16>, <16 x i16>* %in
349 %b = add <16 x i16> %a, %a
350 %c = sext <16 x i16> %b to <16 x i64>
351 store <16 x i64> %c, <16 x i64>* %out
355 define void @sext_v32i16_v32i64(<32 x i16>* %in, <32 x i64>* %out) #0 {
356 ; CHECK-LABEL: sext_v32i16_v32i64:
357 ; VBITS_GE_2048: add [[A_HALFS:z[0-9]+]].h, {{p[0-9]+}}/m, {{z[0-9]+}}.h, {{z[0-9]+}}.h
358 ; VBITS_GE_2048-NEXT: sunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
359 ; VBITS_GE_2048-NEXT: sunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
360 ; VBITS_GE_2048-NEXT: ptrue [[PG:p[0-9]+]].d, vl32
361 ; VBITS_GE_2048-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x1]
362 ; VBITS_GE_2048-NEXT: ret
363 %a = load <32 x i16>, <32 x i16>* %in
364 %b = add <32 x i16> %a, %a
365 %c = sext <32 x i16> %b to <32 x i64>
366 store <32 x i64> %c, <32 x i64>* %out
374 define void @sext_v4i32_v4i64(<4 x i32> %a, <4 x i64>* %out) #0 {
375 ; CHECK-LABEL: sext_v4i32_v4i64:
376 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
377 ; CHECK-NEXT: sunpklo [[A_DWORDS:z[0-9]+]].d, z0.s
378 ; CHECK-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x0]
380 %b = sext <4 x i32> %a to <4 x i64>
381 store <4 x i64>%b, <4 x i64>* %out
385 define void @sext_v8i32_v8i64(<8 x i32>* %in, <8 x i64>* %out) #0 {
386 ; CHECK-LABEL: sext_v8i32_v8i64:
387 ; VBITS_GE_512: add [[A_WORDS:z[0-9]+]].s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s
388 ; VBITS_GE_512-NEXT: sunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
389 ; VBITS_GE_512-NEXT: ptrue [[PG:p[0-9]+]].d, vl8
390 ; VBITS_GE_512-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x1]
391 ; VBITS_GE_512-NEXT: ret
392 %a = load <8 x i32>, <8 x i32>* %in
393 %b = add <8 x i32> %a, %a
394 %c = sext <8 x i32> %b to <8 x i64>
395 store <8 x i64> %c, <8 x i64>* %out
399 define void @sext_v16i32_v16i64(<16 x i32>* %in, <16 x i64>* %out) #0 {
400 ; CHECK-LABEL: sext_v16i32_v16i64:
401 ; VBITS_GE_1024: add [[A_WORDS:z[0-9]+]].s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s
402 ; VBITS_GE_1024-NEXT: sunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
403 ; VBITS_GE_1024-NEXT: ptrue [[PG:p[0-9]+]].d, vl16
404 ; VBITS_GE_1024-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x1]
405 ; VBITS_GE_1024-NEXT: ret
406 %a = load <16 x i32>, <16 x i32>* %in
407 %b = add <16 x i32> %a, %a
408 %c = sext <16 x i32> %b to <16 x i64>
409 store <16 x i64> %c, <16 x i64>* %out
413 define void @sext_v32i32_v32i64(<32 x i32>* %in, <32 x i64>* %out) #0 {
414 ; CHECK-LABEL: sext_v32i32_v32i64:
415 ; VBITS_GE_2048: add [[A_WORDS:z[0-9]+]].s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s
416 ; VBITS_GE_2048-NEXT: sunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
417 ; VBITS_GE_2048-NEXT: ptrue [[PG:p[0-9]+]].d, vl32
418 ; VBITS_GE_2048-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x1]
419 ; VBITS_GE_2048-NEXT: ret
420 %a = load <32 x i32>, <32 x i32>* %in
421 %b = add <32 x i32> %a, %a
422 %c = sext <32 x i32> %b to <32 x i64>
423 store <32 x i64> %c, <32 x i64>* %out
431 define void @zext_v16i8_v16i16(<16 x i8> %a, <16 x i16>* %out) #0 {
432 ; CHECK-LABEL: zext_v16i8_v16i16:
433 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
434 ; CHECK-NEXT: uunpklo [[A_HALFS:z[0-9]+]].h, z0.b
435 ; CHECK-NEXT: st1h { [[A_HALFS]].h }, [[PG]], [x0]
437 %b = zext <16 x i8> %a to <16 x i16>
438 store <16 x i16>%b, <16 x i16>* %out
442 ; NOTE: Extra 'add' is to prevent the extend being combined with the load.
443 define void @zext_v32i8_v32i16(<32 x i8>* %in, <32 x i16>* %out) #0 {
444 ; CHECK-LABEL: zext_v32i8_v32i16:
445 ; VBITS_GE_512: add [[A_BYTES:z[0-9]+]].b, {{p[0-9]+}}/m, {{z[0-9]+}}.b, {{z[0-9]+}}.b
446 ; VBITS_GE_512-NEXT: uunpklo [[A_HALFS:z[0-9]+]].h, [[A_BYTES]].b
447 ; VBITS_GE_512-NEXT: ptrue [[PG:p[0-9]+]].h, vl32
448 ; VBITS_GE_512-NEXT: st1h { [[A_HALFS]].h }, [[PG]], [x1]
449 ; VBITS_GE_512-NEXT: ret
450 %a = load <32 x i8>, <32 x i8>* %in
451 %b = add <32 x i8> %a, %a
452 %c = zext <32 x i8> %b to <32 x i16>
453 store <32 x i16> %c, <32 x i16>* %out
457 define void @zext_v64i8_v64i16(<64 x i8>* %in, <64 x i16>* %out) #0 {
458 ; CHECK-LABEL: zext_v64i8_v64i16:
459 ; VBITS_GE_1024: add [[A_BYTES:z[0-9]+]].b, {{p[0-9]+}}/m, {{z[0-9]+}}.b, {{z[0-9]+}}.b
460 ; VBITS_GE_1024-NEXT: uunpklo [[A_HALFS:z[0-9]+]].h, [[A_BYTES]].b
461 ; VBITS_GE_1024-NEXT: ptrue [[PG:p[0-9]+]].h, vl64
462 ; VBITS_GE_1024-NEXT: st1h { [[A_HALFS]].h }, [[PG]], [x1]
463 ; VBITS_GE_1024-NEXT: ret
464 %a = load <64 x i8>, <64 x i8>* %in
465 %b = add <64 x i8> %a, %a
466 %c = zext <64 x i8> %b to <64 x i16>
467 store <64 x i16> %c, <64 x i16>* %out
471 define void @zext_v128i8_v128i16(<128 x i8>* %in, <128 x i16>* %out) #0 {
472 ; CHECK-LABEL: zext_v128i8_v128i16:
473 ; VBITS_GE_2048: add [[A_BYTES:z[0-9]+]].b, {{p[0-9]+}}/m, {{z[0-9]+}}.b, {{z[0-9]+}}.b
474 ; VBITS_GE_2048-NEXT: uunpklo [[A_HALFS:z[0-9]+]].h, [[A_BYTES]].b
475 ; VBITS_GE_2048-NEXT: ptrue [[PG:p[0-9]+]].h, vl128
476 ; VBITS_GE_2048-NEXT: st1h { [[A_HALFS]].h }, [[PG]], [x1]
477 ; VBITS_GE_2048-NEXT: ret
478 %a = load <128 x i8>, <128 x i8>* %in
479 %b = add <128 x i8> %a, %a
480 %c = zext <128 x i8> %b to <128 x i16>
481 store <128 x i16> %c, <128 x i16>* %out
489 define void @zext_v8i8_v8i32(<8 x i8> %a, <8 x i32>* %out) #0 {
490 ; CHECK-LABEL: zext_v8i8_v8i32:
491 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
492 ; CHECK-NEXT: uunpklo [[A_HALFS:z[0-9]+]].h, z0.b
493 ; CHECK-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
494 ; CHECK-NEXT: st1w { [[A_HALFS]].s }, [[PG]], [x0]
496 %b = zext <8 x i8> %a to <8 x i32>
497 store <8 x i32>%b, <8 x i32>* %out
501 define void @zext_v16i8_v16i32(<16 x i8> %a, <16 x i32>* %out) #0 {
502 ; CHECK-LABEL: zext_v16i8_v16i32:
503 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
504 ; VBITS_GE_512-NEXT: uunpklo [[A_HALFS:z[0-9]+]].h, z0.b
505 ; VBITS_GE_512-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
506 ; VBITS_GE_512-NEXT: st1w { [[A_WORDS]].s }, [[PG]], [x0]
507 ; VBITS_GE_512-NEXT: ret
509 ; Ensure sensible type legalisation.
510 ; VBITS_EQ_256-DAG: ext v[[A_HI:[0-9]+]].16b, v0.16b, v0.16b, #8
511 ; VBITS_EQ_256-DAG: uunpklo [[A_HALFS_LO:z[0-9]+]].h, z0.b
512 ; VBITS_EQ_256-DAG: uunpklo [[A_HALFS_HI:z[0-9]+]].h, z[[A_HI]].b
513 ; VBITS_EQ_256-DAG: uunpklo [[A_WORDS_LO:z[0-9]+]].s, [[A_HALFS_LO]].h
514 ; VBITS_EQ_256-DAG: uunpklo [[A_WORDS_HI:z[0-9]+]].s, [[A_HALFS_HI]].h
515 ; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8
516 ; VBITS_EQ_256-DAG: mov x[[OUT_HI:[0-9]+]], #8
517 ; VBITS_EQ_256-DAG: st1w { [[A_WORDS_LO]].s }, [[PG]], [x0]
518 ; VBITS_EQ_256-DAG: st1w { [[A_WORDS_HI]].s }, [[PG]], [x0, x[[OUT_HI]], lsl #2]
519 ; VBITS_EQ_256-NEXT: ret
520 %b = zext <16 x i8> %a to <16 x i32>
521 store <16 x i32> %b, <16 x i32>* %out
525 define void @zext_v32i8_v32i32(<32 x i8>* %in, <32 x i32>* %out) #0 {
526 ; CHECK-LABEL: zext_v32i8_v32i32:
527 ; VBITS_GE_1024: add [[A_BYTES:z[0-9]+]].b, {{p[0-9]+}}/m, {{z[0-9]+}}.b, {{z[0-9]+}}.b
528 ; VBITS_GE_1024-NEXT: uunpklo [[A_HALFS:z[0-9]+]].h, [[A_BYTES]].b
529 ; VBITS_GE_1024-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
530 ; VBITS_GE_1024-NEXT: ptrue [[PG:p[0-9]+]].s, vl32
531 ; VBITS_GE_1024-NEXT: st1w { [[A_WORDS]].s }, [[PG]], [x1]
532 ; VBITS_GE_1024-NEXT: ret
533 %a = load <32 x i8>, <32 x i8>* %in
534 %b = add <32 x i8> %a, %a
535 %c = zext <32 x i8> %b to <32 x i32>
536 store <32 x i32> %c, <32 x i32>* %out
540 define void @zext_v64i8_v64i32(<64 x i8>* %in, <64 x i32>* %out) #0 {
541 ; CHECK-LABEL: zext_v64i8_v64i32:
542 ; VBITS_GE_2048: add [[A_BYTES:z[0-9]+]].b, {{p[0-9]+}}/m, {{z[0-9]+}}.b, {{z[0-9]+}}.b
543 ; VBITS_GE_2048-NEXT: uunpklo [[A_HALFS:z[0-9]+]].h, [[A_BYTES]].b
544 ; VBITS_GE_2048-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
545 ; VBITS_GE_2048-NEXT: ptrue [[PG:p[0-9]+]].s, vl64
546 ; VBITS_GE_2048-NEXT: st1w { [[A_WORDS]].s }, [[PG]], [x1]
547 ; VBITS_GE_2048-NEXT: ret
548 %a = load <64 x i8>, <64 x i8>* %in
549 %b = add <64 x i8> %a, %a
550 %c = zext <64 x i8> %b to <64 x i32>
551 store <64 x i32> %c, <64 x i32>* %out
559 ; NOTE: v4i8 is an unpacked typed stored within a v4i16 container. The zero
560 ; extend is a two step process where the container is zero_extend_inreg'd with
561 ; the result feeding a normal zero extend from halfs to doublewords.
562 define void @zext_v4i8_v4i64(<4 x i8> %a, <4 x i64>* %out) #0 {
563 ; CHECK-LABEL: zext_v4i8_v4i64:
564 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
565 ; CHECK-NEXT: bic v0.4h, #255, lsl #8
566 ; CHECK-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, z0.h
567 ; CHECK-NEXT: uunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
568 ; CHECK-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x0]
570 %b = zext <4 x i8> %a to <4 x i64>
571 store <4 x i64>%b, <4 x i64>* %out
575 define void @zext_v8i8_v8i64(<8 x i8> %a, <8 x i64>* %out) #0 {
576 ; CHECK-LABEL: zext_v8i8_v8i64:
577 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
578 ; VBITS_GE_512-NEXT: uunpklo [[A_HALFS:z[0-9]+]].h, z0.b
579 ; VBITS_GE_512-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
580 ; VBITS_GE_512-NEXT: uunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
581 ; VBITS_GE_512-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x0]
582 ; VBITS_GE_512-NEXT: ret
583 %b = zext <8 x i8> %a to <8 x i64>
584 store <8 x i64>%b, <8 x i64>* %out
588 define void @zext_v16i8_v16i64(<16 x i8> %a, <16 x i64>* %out) #0 {
589 ; CHECK-LABEL: zext_v16i8_v16i64:
590 ; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16
591 ; VBITS_GE_1024-NEXT: uunpklo [[A_HALFS:z[0-9]+]].h, z0.b
592 ; VBITS_GE_1024-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
593 ; VBITS_GE_1024-NEXT: uunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
594 ; VBITS_GE_1024-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x0]
595 ; VBITS_GE_1024-NEXT: ret
596 %b = zext <16 x i8> %a to <16 x i64>
597 store <16 x i64> %b, <16 x i64>* %out
601 define void @zext_v32i8_v32i64(<32 x i8>* %in, <32 x i64>* %out) #0 {
602 ; CHECK-LABEL: zext_v32i8_v32i64:
603 ; VBITS_GE_2048: add [[A_BYTES:z[0-9]+]].b, {{p[0-9]+}}/m, {{z[0-9]+}}.b, {{z[0-9]+}}.b
604 ; VBITS_GE_2048-NEXT: uunpklo [[A_HALFS:z[0-9]+]].h, [[A_BYTES]].b
605 ; VBITS_GE_2048-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
606 ; VBITS_GE_2048-NEXT: uunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
607 ; VBITS_GE_2048-NEXT: ptrue [[PG:p[0-9]+]].d, vl32
608 ; VBITS_GE_2048-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x1]
609 ; VBITS_GE_2048-NEXT: ret
610 %a = load <32 x i8>, <32 x i8>* %in
611 %b = add <32 x i8> %a, %a
612 %c = zext <32 x i8> %b to <32 x i64>
613 store <32 x i64> %c, <32 x i64>* %out
621 define void @zext_v8i16_v8i32(<8 x i16> %a, <8 x i32>* %out) #0 {
622 ; CHECK-LABEL: zext_v8i16_v8i32:
623 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
624 ; CHECK-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, z0.h
625 ; CHECK-NEXT: st1w { [[A_WORDS]].s }, [[PG]], [x0]
627 %b = zext <8 x i16> %a to <8 x i32>
628 store <8 x i32>%b, <8 x i32>* %out
632 define void @zext_v16i16_v16i32(<16 x i16>* %in, <16 x i32>* %out) #0 {
633 ; CHECK-LABEL: zext_v16i16_v16i32:
634 ; VBITS_GE_512: add [[A_HALFS:z[0-9]+]].h, {{p[0-9]+}}/m, {{z[0-9]+}}.h, {{z[0-9]+}}.h
635 ; VBITS_GE_512-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
636 ; VBITS_GE_512-NEXT: ptrue [[PG:p[0-9]+]].s, vl16
637 ; VBITS_GE_512-NEXT: st1w { [[A_WORDS]].s }, [[PG]], [x1]
638 ; VBITS_GE_512-NEXT: ret
639 %a = load <16 x i16>, <16 x i16>* %in
640 %b = add <16 x i16> %a, %a
641 %c = zext <16 x i16> %b to <16 x i32>
642 store <16 x i32> %c, <16 x i32>* %out
646 define void @zext_v32i16_v32i32(<32 x i16>* %in, <32 x i32>* %out) #0 {
647 ; CHECK-LABEL: zext_v32i16_v32i32:
648 ; VBITS_GE_1024: add [[A_HALFS:z[0-9]+]].h, {{p[0-9]+}}/m, {{z[0-9]+}}.h, {{z[0-9]+}}.h
649 ; VBITS_GE_1024-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
650 ; VBITS_GE_1024-NEXT: ptrue [[PG:p[0-9]+]].s, vl32
651 ; VBITS_GE_1024-NEXT: st1w { [[A_WORDS]].s }, [[PG]], [x1]
652 ; VBITS_GE_1024-NEXT: ret
653 %a = load <32 x i16>, <32 x i16>* %in
654 %b = add <32 x i16> %a, %a
655 %c = zext <32 x i16> %b to <32 x i32>
656 store <32 x i32> %c, <32 x i32>* %out
660 define void @zext_v64i16_v64i32(<64 x i16>* %in, <64 x i32>* %out) #0 {
661 ; CHECK-LABEL: zext_v64i16_v64i32:
662 ; VBITS_GE_2048: add [[A_HALFS:z[0-9]+]].h, {{p[0-9]+}}/m, {{z[0-9]+}}.h, {{z[0-9]+}}.h
663 ; VBITS_GE_2048-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
664 ; VBITS_GE_2048-NEXT: ptrue [[PG:p[0-9]+]].s, vl64
665 ; VBITS_GE_2048-NEXT: st1w { [[A_WORDS]].s }, [[PG]], [x1]
666 ; VBITS_GE_2048-NEXT: ret
667 %a = load <64 x i16>, <64 x i16>* %in
668 %b = add <64 x i16> %a, %a
669 %c = zext <64 x i16> %b to <64 x i32>
670 store <64 x i32> %c, <64 x i32>* %out
678 define void @zext_v4i16_v4i64(<4 x i16> %a, <4 x i64>* %out) #0 {
679 ; CHECK-LABEL: zext_v4i16_v4i64:
680 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
681 ; CHECK-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, z0.h
682 ; CHECK-NEXT: uunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
683 ; CHECK-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x0]
685 %b = zext <4 x i16> %a to <4 x i64>
686 store <4 x i64>%b, <4 x i64>* %out
690 define void @zext_v8i16_v8i64(<8 x i16> %a, <8 x i64>* %out) #0 {
691 ; CHECK-LABEL: zext_v8i16_v8i64:
692 ; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8
693 ; VBITS_GE_512-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, z0.h
694 ; VBITS_GE_512-NEXT: uunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
695 ; VBITS_GE_512-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x0]
696 ; VBITS_GE_512-NEXT: ret
697 %b = zext <8 x i16> %a to <8 x i64>
698 store <8 x i64>%b, <8 x i64>* %out
702 define void @zext_v16i16_v16i64(<16 x i16>* %in, <16 x i64>* %out) #0 {
703 ; CHECK-LABEL: zext_v16i16_v16i64:
704 ; VBITS_GE_1024: add [[A_HALFS:z[0-9]+]].h, {{p[0-9]+}}/m, {{z[0-9]+}}.h, {{z[0-9]+}}.h
705 ; VBITS_GE_1024-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
706 ; VBITS_GE_1024-NEXT: uunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
707 ; VBITS_GE_1024-NEXT: ptrue [[PG:p[0-9]+]].d, vl16
708 ; VBITS_GE_1024-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x1]
709 ; VBITS_GE_1024-NEXT: ret
710 %a = load <16 x i16>, <16 x i16>* %in
711 %b = add <16 x i16> %a, %a
712 %c = zext <16 x i16> %b to <16 x i64>
713 store <16 x i64> %c, <16 x i64>* %out
717 define void @zext_v32i16_v32i64(<32 x i16>* %in, <32 x i64>* %out) #0 {
718 ; CHECK-LABEL: zext_v32i16_v32i64:
719 ; VBITS_GE_2048: add [[A_HALFS:z[0-9]+]].h, {{p[0-9]+}}/m, {{z[0-9]+}}.h, {{z[0-9]+}}.h
720 ; VBITS_GE_2048-NEXT: uunpklo [[A_WORDS:z[0-9]+]].s, [[A_HALFS]].h
721 ; VBITS_GE_2048-NEXT: uunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
722 ; VBITS_GE_2048-NEXT: ptrue [[PG:p[0-9]+]].d, vl32
723 ; VBITS_GE_2048-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x1]
724 ; VBITS_GE_2048-NEXT: ret
725 %a = load <32 x i16>, <32 x i16>* %in
726 %b = add <32 x i16> %a, %a
727 %c = zext <32 x i16> %b to <32 x i64>
728 store <32 x i64> %c, <32 x i64>* %out
736 define void @zext_v4i32_v4i64(<4 x i32> %a, <4 x i64>* %out) #0 {
737 ; CHECK-LABEL: zext_v4i32_v4i64:
738 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
739 ; CHECK-NEXT: uunpklo [[A_DWORDS:z[0-9]+]].d, z0.s
740 ; CHECK-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x0]
742 %b = zext <4 x i32> %a to <4 x i64>
743 store <4 x i64>%b, <4 x i64>* %out
747 define void @zext_v8i32_v8i64(<8 x i32>* %in, <8 x i64>* %out) #0 {
748 ; CHECK-LABEL: zext_v8i32_v8i64:
749 ; VBITS_GE_512: add [[A_WORDS:z[0-9]+]].s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s
750 ; VBITS_GE_512-NEXT: uunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
751 ; VBITS_GE_512-NEXT: ptrue [[PG:p[0-9]+]].d, vl8
752 ; VBITS_GE_512-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x1]
753 ; VBITS_GE_512-NEXT: ret
754 %a = load <8 x i32>, <8 x i32>* %in
755 %b = add <8 x i32> %a, %a
756 %c = zext <8 x i32> %b to <8 x i64>
757 store <8 x i64> %c, <8 x i64>* %out
761 define void @zext_v16i32_v16i64(<16 x i32>* %in, <16 x i64>* %out) #0 {
762 ; CHECK-LABEL: zext_v16i32_v16i64:
763 ; VBITS_GE_1024: add [[A_WORDS:z[0-9]+]].s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s
764 ; VBITS_GE_1024-NEXT: uunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
765 ; VBITS_GE_1024-NEXT: ptrue [[PG:p[0-9]+]].d, vl16
766 ; VBITS_GE_1024-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x1]
767 ; VBITS_GE_1024-NEXT: ret
768 %a = load <16 x i32>, <16 x i32>* %in
769 %b = add <16 x i32> %a, %a
770 %c = zext <16 x i32> %b to <16 x i64>
771 store <16 x i64> %c, <16 x i64>* %out
775 define void @zext_v32i32_v32i64(<32 x i32>* %in, <32 x i64>* %out) #0 {
776 ; CHECK-LABEL: zext_v32i32_v32i64:
777 ; VBITS_GE_2048: add [[A_WORDS:z[0-9]+]].s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s
778 ; VBITS_GE_2048-NEXT: uunpklo [[A_DWORDS:z[0-9]+]].d, [[A_WORDS]].s
779 ; VBITS_GE_2048-NEXT: ptrue [[PG:p[0-9]+]].d, vl32
780 ; VBITS_GE_2048-NEXT: st1d { [[A_DWORDS]].d }, [[PG]], [x1]
781 ; VBITS_GE_2048-NEXT: ret
782 %a = load <32 x i32>, <32 x i32>* %in
783 %b = add <32 x i32> %a, %a
784 %c = zext <32 x i32> %b to <32 x i64>
785 store <32 x i64> %c, <32 x i64>* %out
789 attributes #0 = { nounwind "target-features"="+sve" }