1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s | FileCheck %s
3 target triple = "aarch64-unknown-linux-gnu"
5 define <vscale x 8 x i1> @masked_load_sext_i8i16(ptr %ap, <vscale x 16 x i8> %b) #0 {
6 ; CHECK-LABEL: masked_load_sext_i8i16:
8 ; CHECK-NEXT: ptrue p0.b, vl32
9 ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
10 ; CHECK-NEXT: punpklo p0.h, p0.b
12 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10)
13 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
14 %extract = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
15 %ext1 = sext <vscale x 8 x i1> %extract to <vscale x 8 x i16>
16 %p1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 10)
17 %cmp1 = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %p1, <vscale x 8 x i16> %ext1, <vscale x 8 x i16> zeroinitializer)
18 ret <vscale x 8 x i1> %cmp1
21 ; This negative test ensures the two ptrues have the same vl
22 define <vscale x 8 x i1> @masked_load_sext_i8i16_ptrue_vl(ptr %ap, <vscale x 16 x i8> %b) #0 {
23 ; CHECK-LABEL: masked_load_sext_i8i16_ptrue_vl:
25 ; CHECK-NEXT: ptrue p0.b, vl64
26 ; CHECK-NEXT: ptrue p1.h, vl32
27 ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
28 ; CHECK-NEXT: punpklo p0.h, p0.b
29 ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
31 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
32 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
33 %extract = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
34 %ext1 = sext <vscale x 8 x i1> %extract to <vscale x 8 x i16>
35 %p1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 10)
36 %cmp1 = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %p1, <vscale x 8 x i16> %ext1, <vscale x 8 x i16> zeroinitializer)
37 ret <vscale x 8 x i1> %cmp1
40 ; This negative test enforces that both predicates are ptrues
41 define <vscale x 8 x i1> @masked_load_sext_i8i16_parg(ptr %ap, <vscale x 16 x i8> %b, <vscale x 16 x i1> %p0) #0 {
42 ; CHECK-LABEL: masked_load_sext_i8i16_parg:
44 ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
45 ; CHECK-NEXT: ptrue p1.h, vl32
46 ; CHECK-NEXT: punpklo p0.h, p0.b
47 ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
49 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
50 %extract = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
51 %ext1 = sext <vscale x 8 x i1> %extract to <vscale x 8 x i16>
52 %p1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 10)
53 %cmp1 = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %p1, <vscale x 8 x i16> %ext1, <vscale x 8 x i16> zeroinitializer)
54 ret <vscale x 8 x i1> %cmp1
57 define <vscale x 4 x i1> @masked_load_sext_i8i32(ptr %ap, <vscale x 16 x i8> %b) #0 {
58 ; CHECK-LABEL: masked_load_sext_i8i32:
60 ; CHECK-NEXT: ptrue p0.b, vl32
61 ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
62 ; CHECK-NEXT: punpklo p0.h, p0.b
63 ; CHECK-NEXT: punpklo p0.h, p0.b
65 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10)
66 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
67 %extract = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
68 %ext1 = sext <vscale x 4 x i1> %extract to <vscale x 4 x i32>
69 %p1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 10)
70 %cmp1 = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %p1, <vscale x 4 x i32> %ext1, <vscale x 4 x i32> zeroinitializer)
71 ret <vscale x 4 x i1> %cmp1
74 ; This negative test ensures the two ptrues have the same vl
75 define <vscale x 4 x i1> @masked_load_sext_i8i32_ptrue_vl(ptr %ap, <vscale x 16 x i8> %b) #0 {
76 ; CHECK-LABEL: masked_load_sext_i8i32_ptrue_vl:
78 ; CHECK-NEXT: ptrue p0.b, vl64
79 ; CHECK-NEXT: ptrue p1.s, vl32
80 ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
81 ; CHECK-NEXT: punpklo p0.h, p0.b
82 ; CHECK-NEXT: punpklo p0.h, p0.b
83 ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
85 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
86 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
87 %extract = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
88 %ext1 = sext <vscale x 4 x i1> %extract to <vscale x 4 x i32>
89 %p1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 10)
90 %cmp1 = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %p1, <vscale x 4 x i32> %ext1, <vscale x 4 x i32> zeroinitializer)
91 ret <vscale x 4 x i1> %cmp1
94 ; This negative test enforces that both predicates are ptrues
95 define <vscale x 4 x i1> @masked_load_sext_i8i32_parg(ptr %ap, <vscale x 16 x i8> %b, <vscale x 16 x i1> %p0) #0 {
96 ; CHECK-LABEL: masked_load_sext_i8i32_parg:
98 ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
99 ; CHECK-NEXT: ptrue p1.s, vl32
100 ; CHECK-NEXT: punpklo p0.h, p0.b
101 ; CHECK-NEXT: punpklo p0.h, p0.b
102 ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
104 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
105 %extract = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
106 %ext1 = sext <vscale x 4 x i1> %extract to <vscale x 4 x i32>
107 %p1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 10)
108 %cmp1 = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %p1, <vscale x 4 x i32> %ext1, <vscale x 4 x i32> zeroinitializer)
109 ret <vscale x 4 x i1> %cmp1
112 define <vscale x 2 x i1> @masked_load_sext_i8i64(ptr %ap, <vscale x 16 x i8> %b) #0 {
113 ; CHECK-LABEL: masked_load_sext_i8i64:
115 ; CHECK-NEXT: ptrue p0.b, vl32
116 ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
117 ; CHECK-NEXT: punpklo p0.h, p0.b
118 ; CHECK-NEXT: punpklo p0.h, p0.b
119 ; CHECK-NEXT: punpklo p0.h, p0.b
121 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10)
122 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
123 %extract = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
124 %ext1 = sext <vscale x 2 x i1> %extract to <vscale x 2 x i64>
125 %p1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 10)
126 %cmp1 = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %p1, <vscale x 2 x i64> %ext1, <vscale x 2 x i64> zeroinitializer)
127 ret <vscale x 2 x i1> %cmp1
130 ; This negative test ensures the two ptrues have the same vl
131 define <vscale x 2 x i1> @masked_load_sext_i8i64_ptrue_vl(ptr %ap, <vscale x 16 x i8> %b) #0 {
132 ; CHECK-LABEL: masked_load_sext_i8i64_ptrue_vl:
134 ; CHECK-NEXT: ptrue p0.b, vl64
135 ; CHECK-NEXT: ptrue p1.d, vl32
136 ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
137 ; CHECK-NEXT: punpklo p0.h, p0.b
138 ; CHECK-NEXT: punpklo p0.h, p0.b
139 ; CHECK-NEXT: punpklo p0.h, p0.b
140 ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
142 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
143 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
144 %extract = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
145 %ext1 = sext <vscale x 2 x i1> %extract to <vscale x 2 x i64>
146 %p1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 10)
147 %cmp1 = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %p1, <vscale x 2 x i64> %ext1, <vscale x 2 x i64> zeroinitializer)
148 ret <vscale x 2 x i1> %cmp1
151 ; This negative test enforces that both predicates are ptrues
152 define <vscale x 2 x i1> @masked_load_sext_i8i64_parg(ptr %ap, <vscale x 16 x i8> %b, <vscale x 16 x i1> %p0) #0 {
153 ; CHECK-LABEL: masked_load_sext_i8i64_parg:
155 ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
156 ; CHECK-NEXT: ptrue p1.d, vl32
157 ; CHECK-NEXT: punpklo p0.h, p0.b
158 ; CHECK-NEXT: punpklo p0.h, p0.b
159 ; CHECK-NEXT: punpklo p0.h, p0.b
160 ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
162 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
163 %extract = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
164 %ext1 = sext <vscale x 2 x i1> %extract to <vscale x 2 x i64>
165 %p1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 10)
166 %cmp1 = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %p1, <vscale x 2 x i64> %ext1, <vscale x 2 x i64> zeroinitializer)
167 ret <vscale x 2 x i1> %cmp1
170 ; This negative test enforces that the ptrues have a specified vl
171 define <vscale x 8 x i1> @masked_load_sext_i8i16_ptrue_all(ptr %ap, <vscale x 16 x i8> %b) #0 {
172 ; CHECK-LABEL: masked_load_sext_i8i16_ptrue_all:
174 ; CHECK-NEXT: ptrue p0.b, vl64
175 ; CHECK-NEXT: ptrue p1.h, vl32
176 ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
177 ; CHECK-NEXT: punpklo p0.h, p0.b
178 ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
180 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
181 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
182 %extract = call <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
183 %ext1 = sext <vscale x 8 x i1> %extract to <vscale x 8 x i16>
184 %p1 = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 10)
185 %cmp1 = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1> %p1, <vscale x 8 x i16> %ext1, <vscale x 8 x i16> zeroinitializer)
186 ret <vscale x 8 x i1> %cmp1
189 ; This negative test enforces that the ptrues have a specified vl
190 define <vscale x 4 x i1> @masked_load_sext_i8i32_ptrue_all(ptr %ap, <vscale x 16 x i8> %b) #0 {
191 ; CHECK-LABEL: masked_load_sext_i8i32_ptrue_all:
193 ; CHECK-NEXT: ptrue p0.b, vl64
194 ; CHECK-NEXT: ptrue p1.s, vl32
195 ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
196 ; CHECK-NEXT: punpklo p0.h, p0.b
197 ; CHECK-NEXT: punpklo p0.h, p0.b
198 ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
200 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
201 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
202 %extract = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
203 %ext1 = sext <vscale x 4 x i1> %extract to <vscale x 4 x i32>
204 %p1 = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 10)
205 %cmp1 = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1> %p1, <vscale x 4 x i32> %ext1, <vscale x 4 x i32> zeroinitializer)
206 ret <vscale x 4 x i1> %cmp1
209 ; This negative test enforces that the ptrues have a specified vl
210 define <vscale x 2 x i1> @masked_load_sext_i8i64_ptrue_all(ptr %ap, <vscale x 16 x i8> %b) #0 {
211 ; CHECK-LABEL: masked_load_sext_i8i64_ptrue_all:
213 ; CHECK-NEXT: ptrue p0.b
214 ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
215 ; CHECK-NEXT: punpklo p0.h, p0.b
216 ; CHECK-NEXT: punpklo p0.h, p0.b
217 ; CHECK-NEXT: punpklo p0.h, p0.b
219 %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
220 %cmp = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %p0, <vscale x 16 x i8> %b, <vscale x 16 x i8> zeroinitializer)
221 %extract = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1> %cmp, i64 0)
222 %ext1 = sext <vscale x 2 x i1> %extract to <vscale x 2 x i64>
223 %p1 = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
224 %cmp1 = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> %p1, <vscale x 2 x i64> %ext1, <vscale x 2 x i64> zeroinitializer)
225 ret <vscale x 2 x i1> %cmp1
228 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
230 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
231 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
232 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
233 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
235 declare <vscale x 8 x i1> @llvm.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1>, i64)
236 declare <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1>, i64)
237 declare <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv16i1(<vscale x 16 x i1>, i64)
239 declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
240 declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
241 declare <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
243 attributes #0 = { "target-features"="+sve" }