1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
4 define void @add_lshr_rshrnb_b_6(ptr %ptr, ptr %dst, i64 %index){
5 ; CHECK-LABEL: add_lshr_rshrnb_b_6:
7 ; CHECK-NEXT: ptrue p0.h
8 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
9 ; CHECK-NEXT: rshrnb z0.b, z0.h, #6
10 ; CHECK-NEXT: st1b { z0.h }, p0, [x1, x2]
12 %load = load <vscale x 8 x i16>, ptr %ptr, align 2
13 %1 = add <vscale x 8 x i16> %load, trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 32, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>)
14 %2 = lshr <vscale x 8 x i16> %1, trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 6, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>)
15 %3 = trunc <vscale x 8 x i16> %2 to <vscale x 8 x i8>
16 %4 = getelementptr inbounds i8, ptr %dst, i64 %index
17 store <vscale x 8 x i8> %3, ptr %4, align 1
21 define void @neg_add_lshr_rshrnb_b_6(ptr %ptr, ptr %dst, i64 %index){
22 ; CHECK-LABEL: neg_add_lshr_rshrnb_b_6:
24 ; CHECK-NEXT: ptrue p0.h
25 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
26 ; CHECK-NEXT: add z0.h, z0.h, #1 // =0x1
27 ; CHECK-NEXT: lsr z0.h, z0.h, #6
28 ; CHECK-NEXT: st1b { z0.h }, p0, [x1, x2]
30 %load = load <vscale x 8 x i16>, ptr %ptr, align 2
31 %1 = add <vscale x 8 x i16> %load, trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>)
32 %2 = lshr <vscale x 8 x i16> %1, trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 6, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>)
33 %3 = trunc <vscale x 8 x i16> %2 to <vscale x 8 x i8>
34 %4 = getelementptr inbounds i8, ptr %dst, i64 %index
35 store <vscale x 8 x i8> %3, ptr %4, align 1
39 define void @add_lshr_rshrnb_h_7(ptr %ptr, ptr %dst, i64 %index){
40 ; CHECK-LABEL: add_lshr_rshrnb_h_7:
42 ; CHECK-NEXT: ptrue p0.h
43 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
44 ; CHECK-NEXT: rshrnb z0.b, z0.h, #7
45 ; CHECK-NEXT: st1b { z0.h }, p0, [x1, x2]
47 %load = load <vscale x 8 x i16>, ptr %ptr, align 2
48 %1 = add <vscale x 8 x i16> %load, trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 64, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>)
49 %2 = lshr <vscale x 8 x i16> %1, trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 7, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>)
50 %3 = trunc <vscale x 8 x i16> %2 to <vscale x 8 x i8>
51 %4 = getelementptr inbounds i8, ptr %dst, i64 %index
52 store <vscale x 8 x i8> %3, ptr %4, align 1
56 define void @add_lshr_rshrn_h_6(ptr %ptr, ptr %dst, i64 %index){
57 ; CHECK-LABEL: add_lshr_rshrn_h_6:
59 ; CHECK-NEXT: ptrue p0.s
60 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
61 ; CHECK-NEXT: rshrnb z0.h, z0.s, #6
62 ; CHECK-NEXT: st1h { z0.s }, p0, [x1, x2, lsl #1]
64 %load = load <vscale x 4 x i32>, ptr %ptr, align 2
65 %1 = add <vscale x 4 x i32> %load, trunc (<vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 32, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) to <vscale x 4 x i32>)
66 %2 = lshr <vscale x 4 x i32> %1, trunc (<vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 6, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) to <vscale x 4 x i32>)
67 %3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i16>
68 %4 = getelementptr inbounds i16, ptr %dst, i64 %index
69 store <vscale x 4 x i16> %3, ptr %4, align 1
73 define void @add_lshr_rshrnb_h_2(ptr %ptr, ptr %dst, i64 %index){
74 ; CHECK-LABEL: add_lshr_rshrnb_h_2:
76 ; CHECK-NEXT: ptrue p0.s
77 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
78 ; CHECK-NEXT: rshrnb z0.h, z0.s, #2
79 ; CHECK-NEXT: st1h { z0.s }, p0, [x1, x2, lsl #1]
81 %load = load <vscale x 4 x i32>, ptr %ptr, align 2
82 %1 = add <vscale x 4 x i32> %load, trunc (<vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) to <vscale x 4 x i32>)
83 %2 = lshr <vscale x 4 x i32> %1, trunc (<vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) to <vscale x 4 x i32>)
84 %3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i16>
85 %4 = getelementptr inbounds i16, ptr %dst, i64 %index
86 store <vscale x 4 x i16> %3, ptr %4, align 1
90 define void @neg_add_lshr_rshrnb_h_0(ptr %ptr, ptr %dst, i64 %index){
91 ; CHECK-LABEL: neg_add_lshr_rshrnb_h_0:
94 %load = load <vscale x 4 x i32>, ptr %ptr, align 2
95 %1 = add <vscale x 4 x i32> %load, trunc (<vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) to <vscale x 4 x i32>)
96 %2 = lshr <vscale x 4 x i32> %1, trunc (<vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 -1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) to <vscale x 4 x i32>)
97 %3 = trunc <vscale x 4 x i32> %2 to <vscale x 4 x i16>
98 %4 = getelementptr inbounds i16, ptr %dst, i64 %index
99 store <vscale x 4 x i16> %3, ptr %4, align 1
103 define void @wide_add_shift_add_rshrnb_b(ptr %dest, i64 %index, <vscale x 16 x i16> %arg1){
104 ; CHECK-LABEL: wide_add_shift_add_rshrnb_b:
106 ; CHECK-NEXT: ptrue p0.b
107 ; CHECK-NEXT: rshrnb z1.b, z1.h, #6
108 ; CHECK-NEXT: rshrnb z0.b, z0.h, #6
109 ; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
110 ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0, x1]
111 ; CHECK-NEXT: add z0.b, z1.b, z0.b
112 ; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
114 %1 = add <vscale x 16 x i16> %arg1, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 32, i64 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
115 %2 = lshr <vscale x 16 x i16> %1, shufflevector (<vscale x 16 x i16> insertelement (<vscale x 16 x i16> poison, i16 6, i64 0), <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer)
116 %3 = getelementptr inbounds i8, ptr %dest, i64 %index
117 %load = load <vscale x 16 x i8>, ptr %3, align 2
118 %4 = trunc <vscale x 16 x i16> %2 to <vscale x 16 x i8>
119 %5 = add <vscale x 16 x i8> %load, %4
120 store <vscale x 16 x i8> %5, ptr %3, align 2
124 define void @wide_add_shift_add_rshrnb_h(ptr %dest, i64 %index, <vscale x 8 x i32> %arg1){
125 ; CHECK-LABEL: wide_add_shift_add_rshrnb_h:
127 ; CHECK-NEXT: ptrue p0.h
128 ; CHECK-NEXT: rshrnb z1.h, z1.s, #6
129 ; CHECK-NEXT: rshrnb z0.h, z0.s, #6
130 ; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h
131 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x1, lsl #1]
132 ; CHECK-NEXT: add z0.h, z1.h, z0.h
133 ; CHECK-NEXT: st1h { z0.h }, p0, [x0, x1, lsl #1]
135 %1 = add <vscale x 8 x i32> %arg1, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 32, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
136 %2 = lshr <vscale x 8 x i32> %1, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 6, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
137 %3 = getelementptr inbounds i16, ptr %dest, i64 %index
138 %load = load <vscale x 8 x i16>, ptr %3, align 2
139 %4 = trunc <vscale x 8 x i32> %2 to <vscale x 8 x i16>
140 %5 = add <vscale x 8 x i16> %load, %4
141 store <vscale x 8 x i16> %5, ptr %3, align 2
145 define void @neg_trunc_lsr_add_op1_not_splat(ptr %ptr, ptr %dst, i64 %index, <vscale x 8 x i16> %add_op1){
146 ; CHECK-LABEL: neg_trunc_lsr_add_op1_not_splat:
148 ; CHECK-NEXT: ptrue p0.h
149 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
150 ; CHECK-NEXT: add z0.h, z1.h, z0.h
151 ; CHECK-NEXT: lsr z0.h, z0.h, #6
152 ; CHECK-NEXT: st1b { z0.h }, p0, [x1, x2]
154 %load = load <vscale x 8 x i16>, ptr %ptr, align 2
155 %1 = add <vscale x 8 x i16> %load, %add_op1
156 %2 = lshr <vscale x 8 x i16> %1, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 6, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
157 %3 = trunc <vscale x 8 x i16> %2 to <vscale x 8 x i8>
158 %4 = getelementptr inbounds i8, ptr %dst, i64 %index
159 store <vscale x 8 x i8> %3, ptr %4, align 1
163 define void @neg_trunc_lsr_op1_not_splat(ptr %ptr, ptr %dst, i64 %index, <vscale x 8 x i16> %lshr_op1){
164 ; CHECK-LABEL: neg_trunc_lsr_op1_not_splat:
166 ; CHECK-NEXT: ptrue p0.h
167 ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0]
168 ; CHECK-NEXT: add z1.h, z1.h, #32 // =0x20
169 ; CHECK-NEXT: lsrr z0.h, p0/m, z0.h, z1.h
170 ; CHECK-NEXT: st1b { z0.h }, p0, [x1, x2]
172 %load = load <vscale x 8 x i16>, ptr %ptr, align 2
173 %1 = add <vscale x 8 x i16> %load, shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 32, i64 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer)
174 %2 = lshr <vscale x 8 x i16> %1, %lshr_op1
175 %3 = trunc <vscale x 8 x i16> %2 to <vscale x 8 x i8>
176 %4 = getelementptr inbounds i8, ptr %dst, i64 %index
177 store <vscale x 8 x i8> %3, ptr %4, align 1
181 define void @neg_add_has_two_uses(ptr %ptr, ptr %dst, ptr %dst2, i64 %index){
182 ; CHECK-LABEL: neg_add_has_two_uses:
184 ; CHECK-NEXT: ptrue p0.h
185 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
186 ; CHECK-NEXT: add z0.h, z0.h, #32 // =0x20
187 ; CHECK-NEXT: add z1.h, z0.h, z0.h
188 ; CHECK-NEXT: lsr z0.h, z0.h, #6
189 ; CHECK-NEXT: st1h { z1.h }, p0, [x2, x3, lsl #1]
190 ; CHECK-NEXT: st1b { z0.h }, p0, [x1, x3]
192 %load = load <vscale x 8 x i16>, ptr %ptr, align 2
193 %1 = add <vscale x 8 x i16> %load, trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 32, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>)
194 %2 = lshr <vscale x 8 x i16> %1, trunc (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 6, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) to <vscale x 8 x i16>)
195 %3 = add <vscale x 8 x i16> %1, %1
196 %4 = getelementptr inbounds i16, ptr %dst2, i64 %index
197 %5 = trunc <vscale x 8 x i16> %2 to <vscale x 8 x i8>
198 %6 = getelementptr inbounds i8, ptr %dst, i64 %index
199 store <vscale x 8 x i16> %3, ptr %4, align 1
200 store <vscale x 8 x i8> %5, ptr %6, align 1
204 define void @add_lshr_rshrnb_s(ptr %ptr, ptr %dst, i64 %index){
205 ; CHECK-LABEL: add_lshr_rshrnb_s:
207 ; CHECK-NEXT: ptrue p0.d
208 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
209 ; CHECK-NEXT: rshrnb z0.s, z0.d, #6
210 ; CHECK-NEXT: st1w { z0.d }, p0, [x1, x2, lsl #2]
212 %load = load <vscale x 2 x i64>, ptr %ptr, align 2
213 %1 = add <vscale x 2 x i64> %load, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 32, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
214 %2 = lshr <vscale x 2 x i64> %1, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 6, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
215 %3 = trunc <vscale x 2 x i64> %2 to <vscale x 2 x i32>
216 %4 = getelementptr inbounds i32, ptr %dst, i64 %index
217 store <vscale x 2 x i32> %3, ptr %4, align 1
221 define void @neg_add_lshr_rshrnb_s(ptr %ptr, ptr %dst, i64 %index){
222 ; CHECK-LABEL: neg_add_lshr_rshrnb_s:
224 ; CHECK-NEXT: ptrue p0.d
225 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
226 ; CHECK-NEXT: add z0.d, z0.d, #32 // =0x20
227 ; CHECK-NEXT: lsr z0.d, z0.d, #6
228 ; CHECK-NEXT: st1h { z0.d }, p0, [x1, x2, lsl #1]
230 %load = load <vscale x 2 x i64>, ptr %ptr, align 2
231 %1 = add <vscale x 2 x i64> %load, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 32, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
232 %2 = lshr <vscale x 2 x i64> %1, shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 6, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
233 %3 = trunc <vscale x 2 x i64> %2 to <vscale x 2 x i16>
234 %4 = getelementptr inbounds i16, ptr %dst, i64 %index
235 store <vscale x 2 x i16> %3, ptr %4, align 1