1 ; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
3 ; All these tests create a vector tuple, insert z5 into one of the elements,
4 ; and finally extracts that element from the wide vector to return it. These
5 ; checks ensure that z5 is always the value that is returned.
8 ; Insert into two element tuples
11 ; tuple: { tuple2.res0, tuple2.res1 }
12 ; insert z5: { z5 , tuple2.res1 }
14 define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
15 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
16 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
17 ; CHECK-LABEL: set_tuple2_nxv8i32_elt0:
18 ; CHECK-NEXT: mov z0.d, z5.d
20 %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
21 %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
22 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0)
23 ret <vscale x 4 x i32> %ext
26 ; tuple: { tuple2.res0, tuple2.res1 }
27 ; insert z5: { tuple2.res0, z5 }
29 define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
30 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
31 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
32 ; CHECK-LABEL: set_tuple2_nxv8i32_elt1:
33 ; CHECK-NEXT: mov z0.d, z5.d
35 %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
36 %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
37 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 1)
38 ret <vscale x 4 x i32> %ext
41 ; This test checks the elements _not_ being set aren't changed.
43 ; tuple: { tuple2.res0, tuple2.res1 }
44 ; insert z5: { tuple2.res0, z5 }
46 define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1_ret_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
47 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
48 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
49 ; CHECK-LABEL: set_tuple2_nxv8i32_elt1_ret_elt0:
51 %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
52 %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
53 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0)
54 ret <vscale x 4 x i32> %ext
57 ; Test extract of tuple passed into function
58 define <vscale x 4 x i32> @get_tuple2_nxv8i32_elt1(<vscale x 8 x i32> %tuple) #0 {
59 ; CHECK-LABEL: get_tuple2_nxv8i32_elt1:
60 ; CHECK-NEXT: mov z0.d, z1.d
62 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %tuple, i32 1)
63 ret <vscale x 4 x i32> %ext
67 ; Insert into three element tuples
70 ; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 }
71 ; insert z5: { z5 , tuple3.res0, tuple3.res2 }
73 define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
74 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
75 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
76 ; CHECK-LABEL: set_tuple3_nxv12i32_elt0:
77 ; CHECK-NEXT: mov z0.d, z5.d
79 %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
80 %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
81 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 0)
82 ret <vscale x 4 x i32> %ext
85 ; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 }
86 ; insert z5: { tuple3.res0, z5 , tuple3.res2 }
88 define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
89 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
90 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
91 ; CHECK-LABEL: set_tuple3_nxv12i32_elt1:
92 ; CHECK-NEXT: mov z0.d, z5.d
94 %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
95 %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
96 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 1)
97 ret <vscale x 4 x i32> %ext
100 ; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 }
101 ; insert z5: { tuple3.res0, tuple3.res1, z5 }
103 define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
104 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
105 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
106 ; CHECK-LABEL: set_tuple3_nxv12i32_elt2:
107 ; CHECK-NEXT: mov z0.d, z5.d
109 %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
110 %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5)
111 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2)
112 ret <vscale x 4 x i32> %ext
115 ; This test checks the elements _not_ being set aren't changed.
117 ; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 }
118 ; insert z5: { tuple3.res0, z5 , tuple3.res2 }
120 define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
121 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
122 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
123 ; CHECK-LABEL: set_tuple3_nxv12i32_elt1_ret_elt2:
124 ; CHECK-NEXT: mov z0.d, z2.d
126 %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
127 %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
128 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2)
129 ret <vscale x 4 x i32> %ext
132 ; Test extract of tuple passed into function
133 define <vscale x 4 x i32> @get_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 12 x i32> %tuple) #0 {
134 ; CHECK-LABEL: get_tuple3_nxv12i32_elt2:
135 ; CHECK-NEXT: mov z0.d, z3.d
137 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %tuple, i32 2)
138 ret <vscale x 4 x i32> %ext
142 ; Insert into four element tuples
145 ; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
146 ; insert z5: { z5 , tuple4.res1, tuple4.res2, tuple4.res3 }
148 define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
149 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
150 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
151 ; CHECK-LABEL: set_tuple4_nxv16i32_elt0:
152 ; CHECK-NEXT: mov z0.d, z5.d
154 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
155 %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
156 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 0)
157 ret <vscale x 4 x i32> %ext
160 ; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
161 ; insert z5: { tuple4.res0, z5 , tuple4.res2, tuple4.res3 }
163 define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
164 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
165 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
166 ; CHECK-LABEL: set_tuple4_nxv16i32_elt1:
167 ; CHECK-NEXT: mov z0.d, z5.d
169 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
170 %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
171 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 1)
172 ret <vscale x 4 x i32> %ext
175 ; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
176 ; insert z5: { tuple4.res0, tuple4.res1, z5 , tuple4.res3 }
178 define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
179 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
180 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
181 ; CHECK-LABEL: set_tuple4_nxv16i32_elt2:
182 ; CHECK-NEXT: mov z0.d, z5.d
184 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
185 %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5)
186 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2)
187 ret <vscale x 4 x i32> %ext
190 ; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
191 ; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 }
193 define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
194 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
195 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
196 ; CHECK-LABEL: set_tuple4_nxv16i32_elt3:
197 ; CHECK-NEXT: mov z0.d, z5.d
199 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
200 %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5)
201 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 3)
202 ret <vscale x 4 x i32> %ext
205 ; This test checks the elements _not_ being set aren't changed.
207 ; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
208 ; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 }
210 define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
211 <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
212 <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
213 ; CHECK-LABEL: set_tuple4_nxv16i32_elt3_ret_elt2:
214 ; CHECK-NEXT: mov z0.d, z2.d
216 %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
217 %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5)
218 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2)
219 ret <vscale x 4 x i32> %ext
222 ; Test extract of tuple passed into function
223 define <vscale x 4 x i32> @get_tuple4_nxv16i32_elt3(<vscale x 16 x i32> %tuple) #0 {
224 ; CHECK-LABEL: get_tuple4_nxv16i32_elt3:
225 ; CHECK-NEXT: mov z0.d, z3.d
227 %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %tuple, i32 3)
228 ret <vscale x 4 x i32> %ext
231 attributes #0 = { nounwind "target-features"="+sve" }
233 declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
234 declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32>, i32, <vscale x 4 x i32>)
235 declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32>, i32)
237 declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
238 declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32>, i32, <vscale x 4 x i32>)
239 declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32>, i32)
241 declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
242 declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32>, i32, <vscale x 4 x i32>)
243 declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32>, i32)