1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -codegenprepare -S | FileCheck %s
4 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
5 target triple = "aarch64-unknown"
7 define <8 x i16> @sink_zext(<8 x i8> %a, <8 x i8> %b, i1 %c) {
8 ; CHECK-LABEL: @sink_zext(
10 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
12 ; CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16>
13 ; CHECK-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16>
14 ; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
15 ; CHECK-NEXT: ret <8 x i16> [[RES_1]]
17 ; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[A]] to <8 x i16>
18 ; CHECK-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16>
19 ; CHECK-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]]
20 ; CHECK-NEXT: ret <8 x i16> [[RES_2]]
23 %za = zext <8 x i8> %a to <8 x i16>
24 br i1 %c, label %if.then, label %if.else
27 %zb.1 = zext <8 x i8> %b to <8 x i16>
28 %res.1 = add <8 x i16> %za, %zb.1
32 %zb.2 = zext <8 x i8> %b to <8 x i16>
33 %res.2 = sub <8 x i16> %za, %zb.2
37 define <8 x i16> @sink_sext(<8 x i8> %a, <8 x i8> %b, i1 %c) {
38 ; CHECK-LABEL: @sink_sext(
40 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
42 ; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
43 ; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16>
44 ; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
45 ; CHECK-NEXT: ret <8 x i16> [[RES_1]]
47 ; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[A]] to <8 x i16>
48 ; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
49 ; CHECK-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]]
50 ; CHECK-NEXT: ret <8 x i16> [[RES_2]]
53 %za = sext <8 x i8> %a to <8 x i16>
54 br i1 %c, label %if.then, label %if.else
57 %zb.1 = sext <8 x i8> %b to <8 x i16>
58 %res.1 = add <8 x i16> %za, %zb.1
62 %zb.2 = sext <8 x i8> %b to <8 x i16>
63 %res.2 = sub <8 x i16> %za, %zb.2
67 define <8 x i16> @do_not_sink_nonfree_zext(<8 x i8> %a, <8 x i8> %b, i1 %c) {
68 ; CHECK-LABEL: @do_not_sink_nonfree_zext(
70 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
72 ; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
73 ; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16>
74 ; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
75 ; CHECK-NEXT: ret <8 x i16> [[RES_1]]
77 ; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
78 ; CHECK-NEXT: ret <8 x i16> [[ZB_2]]
81 %za = sext <8 x i8> %a to <8 x i16>
82 br i1 %c, label %if.then, label %if.else
85 %zb.1 = sext <8 x i8> %b to <8 x i16>
86 %res.1 = add <8 x i16> %za, %zb.1
90 %zb.2 = sext <8 x i8> %b to <8 x i16>
94 define <8 x i16> @do_not_sink_nonfree_sext(<8 x i8> %a, <8 x i8> %b, i1 %c) {
95 ; CHECK-LABEL: @do_not_sink_nonfree_sext(
97 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
99 ; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
100 ; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16>
101 ; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
102 ; CHECK-NEXT: ret <8 x i16> [[RES_1]]
104 ; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
105 ; CHECK-NEXT: ret <8 x i16> [[ZB_2]]
108 %za = sext <8 x i8> %a to <8 x i16>
109 br i1 %c, label %if.then, label %if.else
112 %zb.1 = sext <8 x i8> %b to <8 x i16>
113 %res.1 = add <8 x i16> %za, %zb.1
117 %zb.2 = sext <8 x i8> %b to <8 x i16>
121 ; The masks used are suitable for umull, sink shufflevector to users.
122 define <8 x i16> @sink_shufflevector_umull(<16 x i8> %a, <16 x i8> %b, i1 %c) {
123 ; CHECK-LABEL: @sink_shufflevector_umull(
125 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
127 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
128 ; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
129 ; CHECK-NEXT: [[VMULL0:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP0]], <8 x i8> [[S2]])
130 ; CHECK-NEXT: ret <8 x i16> [[VMULL0]]
132 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
133 ; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
134 ; CHECK-NEXT: [[VMULL1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[S4]])
135 ; CHECK-NEXT: ret <8 x i16> [[VMULL1]]
138 %s1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
139 %s3 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
140 br i1 %c, label %if.then, label %if.else
143 %s2 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
144 %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3
145 ret <8 x i16> %vmull0
148 %s4 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
149 %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3
150 ret <8 x i16> %vmull1
153 ; Both exts and their shufflevector operands can be sunk.
154 define <8 x i16> @sink_shufflevector_ext_subadd(<16 x i8> %a, <16 x i8> %b, i1 %c) {
155 ; CHECK-LABEL: @sink_shufflevector_ext_subadd(
157 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
159 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
160 ; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
161 ; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
162 ; CHECK-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16>
163 ; CHECK-NEXT: [[RES1:%.*]] = add <8 x i16> [[TMP1]], [[Z2]]
164 ; CHECK-NEXT: ret <8 x i16> [[RES1]]
166 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
167 ; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
168 ; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
169 ; CHECK-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16>
170 ; CHECK-NEXT: [[RES2:%.*]] = sub <8 x i16> [[TMP3]], [[Z4]]
171 ; CHECK-NEXT: ret <8 x i16> [[RES2]]
174 %s1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
175 %z1 = zext <8 x i8> %s1 to <8 x i16>
176 %s3 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
177 %z3 = sext <8 x i8> %s3 to <8 x i16>
178 br i1 %c, label %if.then, label %if.else
181 %s2 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
182 %z2 = zext <8 x i8> %s2 to <8 x i16>
183 %res1 = add <8 x i16> %z1, %z2
187 %s4 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
188 %z4 = sext <8 x i8> %s4 to <8 x i16>
189 %res2 = sub <8 x i16> %z3, %z4
194 declare void @user1(<8 x i16>)
196 ; Both exts and their shufflevector operands can be sunk.
197 define <8 x i16> @sink_shufflevector_ext_subadd_multiuse(<16 x i8> %a, <16 x i8> %b, i1 %c) {
198 ; CHECK-LABEL: @sink_shufflevector_ext_subadd_multiuse(
200 ; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
201 ; CHECK-NEXT: [[Z3:%.*]] = sext <8 x i8> [[S3]] to <8 x i16>
202 ; CHECK-NEXT: call void @user1(<8 x i16> [[Z3]])
203 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
205 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
206 ; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
207 ; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
208 ; CHECK-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16>
209 ; CHECK-NEXT: [[RES1:%.*]] = add <8 x i16> [[TMP1]], [[Z2]]
210 ; CHECK-NEXT: ret <8 x i16> [[RES1]]
212 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
213 ; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
214 ; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
215 ; CHECK-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16>
216 ; CHECK-NEXT: [[RES2:%.*]] = sub <8 x i16> [[TMP3]], [[Z4]]
217 ; CHECK-NEXT: ret <8 x i16> [[RES2]]
220 %s1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
221 %z1 = zext <8 x i8> %s1 to <8 x i16>
222 %s3 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
223 %z3 = sext <8 x i8> %s3 to <8 x i16>
224 call void @user1(<8 x i16> %z3)
225 br i1 %c, label %if.then, label %if.else
228 %s2 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
229 %z2 = zext <8 x i8> %s2 to <8 x i16>
230 %res1 = add <8 x i16> %z1, %z2
234 %s4 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
235 %z4 = sext <8 x i8> %s4 to <8 x i16>
236 %res2 = sub <8 x i16> %z3, %z4
241 ; The masks used are not suitable for umull, do not sink.
242 define <8 x i16> @no_sink_shufflevector_umull(<16 x i8> %a, <16 x i8> %b, i1 %c) {
243 ; CHECK-LABEL: @no_sink_shufflevector_umull(
245 ; CHECK-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 6, i32 7>
246 ; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
247 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
249 ; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
250 ; CHECK-NEXT: [[VMULL0:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[S1]], <8 x i8> [[S2]])
251 ; CHECK-NEXT: ret <8 x i16> [[VMULL0]]
253 ; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 10, i32 12, i32 13, i32 14, i32 15>
254 ; CHECK-NEXT: [[VMULL1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[S3]], <8 x i8> [[S4]])
255 ; CHECK-NEXT: ret <8 x i16> [[VMULL1]]
258 %s1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 6, i32 7>
259 %s3 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
260 br i1 %c, label %if.then, label %if.else
263 %s2 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
264 %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3
265 ret <8 x i16> %vmull0
268 %s4 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 10, i32 12, i32 13, i32 14, i32 15>
269 %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3
270 ret <8 x i16> %vmull1
274 ; Function Attrs: nounwind readnone
275 declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) #2