1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=1 < %s | FileCheck %s
8 define <vscale x 16 x i32> @ld1b_i8_sext_i32(<vscale x 16 x i8> *%base) {
9 ; CHECK-LABEL: ld1b_i8_sext_i32:
11 ; CHECK-NEXT: ptrue p0.s
12 ; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0]
13 ; CHECK-NEXT: ld1sb { z1.s }, p0/z, [x0, #1, mul vl]
14 ; CHECK-NEXT: ld1sb { z2.s }, p0/z, [x0, #2, mul vl]
15 ; CHECK-NEXT: ld1sb { z3.s }, p0/z, [x0, #3, mul vl]
17 %wide.load = load <vscale x 16 x i8>, <vscale x 16 x i8>* %base
18 %res = sext <vscale x 16 x i8> %wide.load to <vscale x 16 x i32>
19 ret <vscale x 16 x i32> %res
22 define <vscale x 16 x i32> @ld1b_i8_zext_i32(<vscale x 16 x i8> *%base) {
23 ; CHECK-LABEL: ld1b_i8_zext_i32:
25 ; CHECK-NEXT: ptrue p0.s
26 ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
27 ; CHECK-NEXT: ld1b { z1.s }, p0/z, [x0, #1, mul vl]
28 ; CHECK-NEXT: ld1b { z2.s }, p0/z, [x0, #2, mul vl]
29 ; CHECK-NEXT: ld1b { z3.s }, p0/z, [x0, #3, mul vl]
31 %wide.load = load <vscale x 16 x i8>, <vscale x 16 x i8>* %base
32 %res = zext <vscale x 16 x i8> %wide.load to <vscale x 16 x i32>
33 ret <vscale x 16 x i32> %res
36 define <vscale x 16 x i64> @ld1b_i8_sext(<vscale x 16 x i8> *%base) {
37 ; CHECK-LABEL: ld1b_i8_sext:
39 ; CHECK-NEXT: ptrue p0.d
40 ; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0]
41 ; CHECK-NEXT: ld1sb { z1.d }, p0/z, [x0, #1, mul vl]
42 ; CHECK-NEXT: ld1sb { z2.d }, p0/z, [x0, #2, mul vl]
43 ; CHECK-NEXT: ld1sb { z3.d }, p0/z, [x0, #3, mul vl]
44 ; CHECK-NEXT: ld1sb { z4.d }, p0/z, [x0, #4, mul vl]
45 ; CHECK-NEXT: ld1sb { z5.d }, p0/z, [x0, #5, mul vl]
46 ; CHECK-NEXT: ld1sb { z6.d }, p0/z, [x0, #6, mul vl]
47 ; CHECK-NEXT: ld1sb { z7.d }, p0/z, [x0, #7, mul vl]
49 %wide.load = load <vscale x 16 x i8>, <vscale x 16 x i8>* %base
50 %res = sext <vscale x 16 x i8> %wide.load to <vscale x 16 x i64>
51 ret <vscale x 16 x i64> %res
54 define <vscale x 16 x i64> @ld1b_i8_zext(<vscale x 16 x i8> *%base) {
55 ; CHECK-LABEL: ld1b_i8_zext:
57 ; CHECK-NEXT: ptrue p0.d
58 ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0]
59 ; CHECK-NEXT: ld1b { z1.d }, p0/z, [x0, #1, mul vl]
60 ; CHECK-NEXT: ld1b { z2.d }, p0/z, [x0, #2, mul vl]
61 ; CHECK-NEXT: ld1b { z3.d }, p0/z, [x0, #3, mul vl]
62 ; CHECK-NEXT: ld1b { z4.d }, p0/z, [x0, #4, mul vl]
63 ; CHECK-NEXT: ld1b { z5.d }, p0/z, [x0, #5, mul vl]
64 ; CHECK-NEXT: ld1b { z6.d }, p0/z, [x0, #6, mul vl]
65 ; CHECK-NEXT: ld1b { z7.d }, p0/z, [x0, #7, mul vl]
67 %wide.load = load <vscale x 16 x i8>, <vscale x 16 x i8>* %base
68 %res = zext <vscale x 16 x i8> %wide.load to <vscale x 16 x i64>
69 ret <vscale x 16 x i64> %res
76 define <vscale x 8 x i64> @ld1h_i16_sext(<vscale x 8 x i16> *%base) {
77 ; CHECK-LABEL: ld1h_i16_sext:
79 ; CHECK-NEXT: ptrue p0.d
80 ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0]
81 ; CHECK-NEXT: ld1sh { z1.d }, p0/z, [x0, #1, mul vl]
82 ; CHECK-NEXT: ld1sh { z2.d }, p0/z, [x0, #2, mul vl]
83 ; CHECK-NEXT: ld1sh { z3.d }, p0/z, [x0, #3, mul vl]
85 %wide.load = load <vscale x 8 x i16>, <vscale x 8 x i16>* %base
86 %res = sext <vscale x 8 x i16> %wide.load to <vscale x 8 x i64>
87 ret <vscale x 8 x i64> %res
90 define <vscale x 8 x i64> @ld1h_i16_zext(<vscale x 8 x i16> *%base) {
91 ; CHECK-LABEL: ld1h_i16_zext:
93 ; CHECK-NEXT: ptrue p0.d
94 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
95 ; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, #1, mul vl]
96 ; CHECK-NEXT: ld1h { z2.d }, p0/z, [x0, #2, mul vl]
97 ; CHECK-NEXT: ld1h { z3.d }, p0/z, [x0, #3, mul vl]
99 %wide.load = load <vscale x 8 x i16>, <vscale x 8 x i16>* %base
100 %res = zext <vscale x 8 x i16> %wide.load to <vscale x 8 x i64>
101 ret <vscale x 8 x i64> %res
108 define <vscale x 4 x i64> @ld1w_i32_sext(<vscale x 4 x i32> *%base) {
109 ; CHECK-LABEL: ld1w_i32_sext:
111 ; CHECK-NEXT: ptrue p0.d
112 ; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0]
113 ; CHECK-NEXT: ld1sw { z1.d }, p0/z, [x0, #1, mul vl]
115 %wide.load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %base
116 %res = sext <vscale x 4 x i32> %wide.load to <vscale x 4 x i64>
117 ret <vscale x 4 x i64> %res
120 define <vscale x 4 x i64> @ld1w_i32_zext(<vscale x 4 x i32> *%base) {
121 ; CHECK-LABEL: ld1w_i32_zext:
123 ; CHECK-NEXT: ptrue p0.d
124 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
125 ; CHECK-NEXT: ld1w { z1.d }, p0/z, [x0, #1, mul vl]
127 %wide.load = load <vscale x 4 x i32>, <vscale x 4 x i32>* %base
128 %res = zext <vscale x 4 x i32> %wide.load to <vscale x 4 x i64>
129 ret <vscale x 4 x i64> %res
133 ; Extending loads from unpacked to wide illegal types
135 define <vscale x 4 x i64> @zload_4i8_4i64(ptr %a) {
136 ; CHECK-LABEL: zload_4i8_4i64:
138 ; CHECK-NEXT: ptrue p0.d
139 ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0]
140 ; CHECK-NEXT: ld1b { z1.d }, p0/z, [x0, #1, mul vl]
142 %aval = load <vscale x 4 x i8>, ptr %a
143 %aext = zext <vscale x 4 x i8> %aval to <vscale x 4 x i64>
144 ret <vscale x 4 x i64> %aext
147 define <vscale x 4 x i64> @zload_4i16_4i64(ptr %a) {
148 ; CHECK-LABEL: zload_4i16_4i64:
150 ; CHECK-NEXT: ptrue p0.d
151 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0]
152 ; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, #1, mul vl]
154 %aval = load <vscale x 4 x i16>, ptr %a
155 %aext = zext <vscale x 4 x i16> %aval to <vscale x 4 x i64>
156 ret <vscale x 4 x i64> %aext
159 define <vscale x 8 x i32> @zload_8i8_8i32(ptr %a) {
160 ; CHECK-LABEL: zload_8i8_8i32:
162 ; CHECK-NEXT: ptrue p0.s
163 ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0]
164 ; CHECK-NEXT: ld1b { z1.s }, p0/z, [x0, #1, mul vl]
166 %aval = load <vscale x 8 x i8>, ptr %a
167 %aext = zext <vscale x 8 x i8> %aval to <vscale x 8 x i32>
168 ret <vscale x 8 x i32> %aext
171 define <vscale x 8 x i64> @zload_8i8_8i64(ptr %a) {
172 ; CHECK-LABEL: zload_8i8_8i64:
174 ; CHECK-NEXT: ptrue p0.d
175 ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0]
176 ; CHECK-NEXT: ld1b { z1.d }, p0/z, [x0, #1, mul vl]
177 ; CHECK-NEXT: ld1b { z2.d }, p0/z, [x0, #2, mul vl]
178 ; CHECK-NEXT: ld1b { z3.d }, p0/z, [x0, #3, mul vl]
180 %aval = load <vscale x 8 x i8>, ptr %a
181 %aext = zext <vscale x 8 x i8> %aval to <vscale x 8 x i64>
182 ret <vscale x 8 x i64> %aext
185 define <vscale x 4 x i64> @sload_4i8_4i64(ptr %a) {
186 ; CHECK-LABEL: sload_4i8_4i64:
188 ; CHECK-NEXT: ptrue p0.d
189 ; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0]
190 ; CHECK-NEXT: ld1sb { z1.d }, p0/z, [x0, #1, mul vl]
192 %aval = load <vscale x 4 x i8>, ptr %a
193 %aext = sext <vscale x 4 x i8> %aval to <vscale x 4 x i64>
194 ret <vscale x 4 x i64> %aext
197 define <vscale x 4 x i64> @sload_4i16_4i64(ptr %a) {
198 ; CHECK-LABEL: sload_4i16_4i64:
200 ; CHECK-NEXT: ptrue p0.d
201 ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0]
202 ; CHECK-NEXT: ld1sh { z1.d }, p0/z, [x0, #1, mul vl]
204 %aval = load <vscale x 4 x i16>, ptr %a
205 %aext = sext <vscale x 4 x i16> %aval to <vscale x 4 x i64>
206 ret <vscale x 4 x i64> %aext
209 define <vscale x 8 x i32> @sload_8i8_8i32(ptr %a) {
210 ; CHECK-LABEL: sload_8i8_8i32:
212 ; CHECK-NEXT: ptrue p0.s
213 ; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0]
214 ; CHECK-NEXT: ld1sb { z1.s }, p0/z, [x0, #1, mul vl]
216 %aval = load <vscale x 8 x i8>, ptr %a
217 %aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i32>
218 ret <vscale x 8 x i32> %aext
221 define <vscale x 8 x i64> @sload_8i8_8i64(ptr %a) {
222 ; CHECK-LABEL: sload_8i8_8i64:
224 ; CHECK-NEXT: ptrue p0.d
225 ; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0]
226 ; CHECK-NEXT: ld1sb { z1.d }, p0/z, [x0, #1, mul vl]
227 ; CHECK-NEXT: ld1sb { z2.d }, p0/z, [x0, #2, mul vl]
228 ; CHECK-NEXT: ld1sb { z3.d }, p0/z, [x0, #3, mul vl]
230 %aval = load <vscale x 8 x i8>, ptr %a
231 %aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i64>
232 ret <vscale x 8 x i64> %aext