1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -codegenprepare -S | FileCheck --check-prefixes=CHECK,NOFP16 %s
3 ; RUN: opt < %s -codegenprepare -S -mattr=+fullfp16 | FileCheck --check-prefixes=CHECK,FULLFP16 %s
5 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
6 target triple = "aarch64-unknown"
8 define <8 x i16> @sink_zext(<8 x i8> %a, <8 x i8> %b, i1 %c) {
9 ; CHECK-LABEL: @sink_zext(
11 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
13 ; CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16>
14 ; CHECK-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16>
15 ; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
16 ; CHECK-NEXT: ret <8 x i16> [[RES_1]]
18 ; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[A]] to <8 x i16>
19 ; CHECK-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16>
20 ; CHECK-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]]
21 ; CHECK-NEXT: ret <8 x i16> [[RES_2]]
24 %za = zext <8 x i8> %a to <8 x i16>
25 br i1 %c, label %if.then, label %if.else
28 %zb.1 = zext <8 x i8> %b to <8 x i16>
29 %res.1 = add <8 x i16> %za, %zb.1
33 %zb.2 = zext <8 x i8> %b to <8 x i16>
34 %res.2 = sub <8 x i16> %za, %zb.2
38 define <8 x i16> @sink_sext(<8 x i8> %a, <8 x i8> %b, i1 %c) {
39 ; CHECK-LABEL: @sink_sext(
41 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
43 ; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
44 ; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16>
45 ; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
46 ; CHECK-NEXT: ret <8 x i16> [[RES_1]]
48 ; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[A]] to <8 x i16>
49 ; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
50 ; CHECK-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]]
51 ; CHECK-NEXT: ret <8 x i16> [[RES_2]]
54 %za = sext <8 x i8> %a to <8 x i16>
55 br i1 %c, label %if.then, label %if.else
58 %zb.1 = sext <8 x i8> %b to <8 x i16>
59 %res.1 = add <8 x i16> %za, %zb.1
63 %zb.2 = sext <8 x i8> %b to <8 x i16>
64 %res.2 = sub <8 x i16> %za, %zb.2
68 define <8 x i16> @do_not_sink_nonfree_zext(<8 x i8> %a, <8 x i8> %b, i1 %c) {
69 ; CHECK-LABEL: @do_not_sink_nonfree_zext(
71 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
73 ; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
74 ; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16>
75 ; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
76 ; CHECK-NEXT: ret <8 x i16> [[RES_1]]
78 ; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
79 ; CHECK-NEXT: ret <8 x i16> [[ZB_2]]
82 %za = sext <8 x i8> %a to <8 x i16>
83 br i1 %c, label %if.then, label %if.else
86 %zb.1 = sext <8 x i8> %b to <8 x i16>
87 %res.1 = add <8 x i16> %za, %zb.1
91 %zb.2 = sext <8 x i8> %b to <8 x i16>
95 define <8 x i16> @do_not_sink_nonfree_sext(<8 x i8> %a, <8 x i8> %b, i1 %c) {
96 ; CHECK-LABEL: @do_not_sink_nonfree_sext(
98 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
100 ; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16>
101 ; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16>
102 ; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]]
103 ; CHECK-NEXT: ret <8 x i16> [[RES_1]]
105 ; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16>
106 ; CHECK-NEXT: ret <8 x i16> [[ZB_2]]
109 %za = sext <8 x i8> %a to <8 x i16>
110 br i1 %c, label %if.then, label %if.else
113 %zb.1 = sext <8 x i8> %b to <8 x i16>
114 %res.1 = add <8 x i16> %za, %zb.1
118 %zb.2 = sext <8 x i8> %b to <8 x i16>
122 ; The masks used are suitable for umull, sink shufflevector to users.
123 define <8 x i16> @sink_shufflevector_umull(<16 x i8> %a, <16 x i8> %b, i1 %c) {
124 ; CHECK-LABEL: @sink_shufflevector_umull(
126 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
128 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
129 ; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
130 ; CHECK-NEXT: [[VMULL0:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP0]], <8 x i8> [[S2]])
131 ; CHECK-NEXT: ret <8 x i16> [[VMULL0]]
133 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
134 ; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
135 ; CHECK-NEXT: [[VMULL1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[S4]])
136 ; CHECK-NEXT: ret <8 x i16> [[VMULL1]]
139 %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
140 %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
141 br i1 %c, label %if.then, label %if.else
144 %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
145 %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3
146 ret <8 x i16> %vmull0
149 %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
150 %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3
151 ret <8 x i16> %vmull1
154 ; The masks used are suitable for umull, sink shufflevector to users.
155 define <8 x i16> @sink_shufflevector_smull(<16 x i8> %a, <16 x i8> %b, i1 %c) {
156 ; CHECK-LABEL: @sink_shufflevector_smull(
158 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
160 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
161 ; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
162 ; CHECK-NEXT: [[VMULL0:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[TMP0]], <8 x i8> [[S2]])
163 ; CHECK-NEXT: ret <8 x i16> [[VMULL0]]
165 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
166 ; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
167 ; CHECK-NEXT: [[VMULL1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[S4]])
168 ; CHECK-NEXT: ret <8 x i16> [[VMULL1]]
171 %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
172 %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
173 br i1 %c, label %if.then, label %if.else
176 %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
177 %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3
178 ret <8 x i16> %vmull0
181 %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
182 %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3
183 ret <8 x i16> %vmull1
186 ; Both exts and their shufflevector operands can be sunk.
187 define <8 x i16> @sink_shufflevector_ext_subadd(<16 x i8> %a, <16 x i8> %b, i1 %c) {
188 ; CHECK-LABEL: @sink_shufflevector_ext_subadd(
190 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
192 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
193 ; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
194 ; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
195 ; CHECK-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16>
196 ; CHECK-NEXT: [[RES1:%.*]] = add <8 x i16> [[TMP1]], [[Z2]]
197 ; CHECK-NEXT: ret <8 x i16> [[RES1]]
199 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
200 ; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
201 ; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
202 ; CHECK-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16>
203 ; CHECK-NEXT: [[RES2:%.*]] = sub <8 x i16> [[TMP3]], [[Z4]]
204 ; CHECK-NEXT: ret <8 x i16> [[RES2]]
207 %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
208 %z1 = zext <8 x i8> %s1 to <8 x i16>
209 %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
210 %z3 = sext <8 x i8> %s3 to <8 x i16>
211 br i1 %c, label %if.then, label %if.else
214 %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
215 %z2 = zext <8 x i8> %s2 to <8 x i16>
216 %res1 = add <8 x i16> %z1, %z2
220 %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
221 %z4 = sext <8 x i8> %s4 to <8 x i16>
222 %res2 = sub <8 x i16> %z3, %z4
227 declare void @user1(<8 x i16>)
229 ; Both exts and their shufflevector operands can be sunk.
230 define <8 x i16> @sink_shufflevector_ext_subadd_multiuse(<16 x i8> %a, <16 x i8> %b, i1 %c) {
231 ; CHECK-LABEL: @sink_shufflevector_ext_subadd_multiuse(
233 ; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
234 ; CHECK-NEXT: [[Z3:%.*]] = sext <8 x i8> [[S3]] to <8 x i16>
235 ; CHECK-NEXT: call void @user1(<8 x i16> [[Z3]])
236 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
238 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
239 ; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
240 ; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
241 ; CHECK-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16>
242 ; CHECK-NEXT: [[RES1:%.*]] = add <8 x i16> [[TMP1]], [[Z2]]
243 ; CHECK-NEXT: ret <8 x i16> [[RES1]]
245 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
246 ; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
247 ; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
248 ; CHECK-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16>
249 ; CHECK-NEXT: [[RES2:%.*]] = sub <8 x i16> [[TMP3]], [[Z4]]
250 ; CHECK-NEXT: ret <8 x i16> [[RES2]]
253 %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
254 %z1 = zext <8 x i8> %s1 to <8 x i16>
255 %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
256 %z3 = sext <8 x i8> %s3 to <8 x i16>
257 call void @user1(<8 x i16> %z3)
258 br i1 %c, label %if.then, label %if.else
261 %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
262 %z2 = zext <8 x i8> %s2 to <8 x i16>
263 %res1 = add <8 x i16> %z1, %z2
267 %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
268 %z4 = sext <8 x i8> %s4 to <8 x i16>
269 %res2 = sub <8 x i16> %z3, %z4
274 ; The masks used are not suitable for umull, do not sink.
275 define <8 x i16> @no_sink_shufflevector_umull(<16 x i8> %a, <16 x i8> %b, i1 %c) {
276 ; CHECK-LABEL: @no_sink_shufflevector_umull(
278 ; CHECK-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 6, i32 7>
279 ; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
280 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
282 ; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
283 ; CHECK-NEXT: [[VMULL0:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[S1]], <8 x i8> [[S2]])
284 ; CHECK-NEXT: ret <8 x i16> [[VMULL0]]
286 ; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 10, i32 12, i32 13, i32 14, i32 15>
287 ; CHECK-NEXT: [[VMULL1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[S3]], <8 x i8> [[S4]])
288 ; CHECK-NEXT: ret <8 x i16> [[VMULL1]]
291 %s1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 6, i32 7>
292 %s3 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
293 br i1 %c, label %if.then, label %if.else
296 %s2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
297 %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s1, <8 x i8> %s2) #3
298 ret <8 x i16> %vmull0
301 %s4 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 10, i32 12, i32 13, i32 14, i32 15>
302 %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %s3, <8 x i8> %s4) #3
303 ret <8 x i16> %vmull1
307 declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>)
308 declare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>)
310 ; The insertelement should be inserted before shufflevector, otherwise 'does not dominate all uses' error will occur.
311 define <4 x i32> @sink_insertelement(i16 %e, i8 %f) {
312 ; CHECK-LABEL: @sink_insertelement(
313 ; CHECK-NEXT: for.cond4.preheader.lr.ph:
314 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[F:%.*]], 0
315 ; CHECK-NEXT: [[CONV25:%.*]] = sext i16 [[E:%.*]] to i32
316 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT143:%.*]] = insertelement <4 x i32> poison, i32 [[CONV25]], i32 0
317 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND4_PREHEADER_US_PREHEADER:%.*]], label [[FOR_COND4_PREHEADER_PREHEADER:%.*]]
318 ; CHECK: for.cond4.preheader.us.preheader:
319 ; CHECK-NEXT: [[BROADCAST_SPLAT144:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT143]], <4 x i32> poison, <4 x i32> zeroinitializer
320 ; CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i32> zeroinitializer, [[BROADCAST_SPLAT144]]
321 ; CHECK-NEXT: ret <4 x i32> [[TMP0]]
322 ; CHECK: for.cond4.preheader.preheader:
323 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
325 for.cond4.preheader.lr.ph:
326 %cmp = icmp slt i8 %f, 0
327 %conv25 = sext i16 %e to i32
328 %broadcast.splatinsert143 = insertelement <4 x i32> poison, i32 %conv25, i32 0
329 br i1 %cmp, label %for.cond4.preheader.us.preheader, label %for.cond4.preheader.preheader
331 for.cond4.preheader.us.preheader: ; preds = %for.cond4.preheader.lr.ph
332 %broadcast.splat144 = shufflevector <4 x i32> %broadcast.splatinsert143, <4 x i32> poison, <4 x i32> zeroinitializer
333 %0 = mul <4 x i32> zeroinitializer, %broadcast.splat144
336 for.cond4.preheader.preheader: ; preds = %for.cond4.preheader.lr.ph
337 ret <4 x i32> zeroinitializer
340 define <4 x i32> @sinkadd_partial(<8 x i16> %a1, <8 x i16> %a2, i8 %f) {
341 ; CHECK-LABEL: @sinkadd_partial(
342 ; CHECK-NEXT: for.cond4.preheader.lr.ph:
343 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[F:%.*]], 0
344 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND4_PREHEADER_US_PREHEADER:%.*]], label [[FOR_COND4_PREHEADER_PREHEADER:%.*]]
345 ; CHECK: for.cond4.preheader.us.preheader:
346 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[A1:%.*]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
347 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A2:%.*]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
348 ; CHECK-NEXT: [[E1:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
349 ; CHECK-NEXT: [[E2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
350 ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[E1]], [[E2]]
351 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
352 ; CHECK: for.cond4.preheader.preheader:
353 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
355 for.cond4.preheader.lr.ph:
356 %cmp = icmp slt i8 %f, 0
357 %s2 = shufflevector <8 x i16> %a2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
358 %s1 = shufflevector <8 x i16> %a1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
359 br i1 %cmp, label %for.cond4.preheader.us.preheader, label %for.cond4.preheader.preheader
361 for.cond4.preheader.us.preheader: ; preds = %for.cond4.preheader.lr.ph
362 %e1 = sext <4 x i16> %s1 to <4 x i32>
363 %e2 = sext <4 x i16> %s2 to <4 x i32>
364 %0 = add <4 x i32> %e1, %e2
367 for.cond4.preheader.preheader: ; preds = %for.cond4.preheader.lr.ph
368 ret <4 x i32> zeroinitializer
371 define <4 x i32> @sinkadd_partial_rev(<8 x i16> %a1, <8 x i16> %a2, i8 %f) {
372 ; CHECK-LABEL: @sinkadd_partial_rev(
373 ; CHECK-NEXT: for.cond4.preheader.lr.ph:
374 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[F:%.*]], 0
375 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND4_PREHEADER_US_PREHEADER:%.*]], label [[FOR_COND4_PREHEADER_PREHEADER:%.*]]
376 ; CHECK: for.cond4.preheader.us.preheader:
377 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[A1:%.*]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
378 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A2:%.*]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
379 ; CHECK-NEXT: [[E2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
380 ; CHECK-NEXT: [[E1:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
381 ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[E1]], [[E2]]
382 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
383 ; CHECK: for.cond4.preheader.preheader:
384 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
386 for.cond4.preheader.lr.ph:
387 %cmp = icmp slt i8 %f, 0
388 %s2 = shufflevector <8 x i16> %a2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
389 %s1 = shufflevector <8 x i16> %a1, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
390 br i1 %cmp, label %for.cond4.preheader.us.preheader, label %for.cond4.preheader.preheader
392 for.cond4.preheader.us.preheader: ; preds = %for.cond4.preheader.lr.ph
393 %e2 = sext <4 x i16> %s2 to <4 x i32>
394 %e1 = sext <4 x i16> %s1 to <4 x i32>
395 %0 = add <4 x i32> %e1, %e2
398 for.cond4.preheader.preheader: ; preds = %for.cond4.preheader.lr.ph
399 ret <4 x i32> zeroinitializer
402 declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>)
404 define <8 x i16> @sink_shufflevector_pmull(<16 x i8> %a, <16 x i8> %b, i1 %c) {
405 ; CHECK-LABEL: @sink_shufflevector_pmull(
407 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
409 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
410 ; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
411 ; CHECK-NEXT: [[VMULL0:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[TMP0]], <8 x i8> [[S2]])
412 ; CHECK-NEXT: ret <8 x i16> [[VMULL0]]
414 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
415 ; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
416 ; CHECK-NEXT: [[VMULL1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[S4]])
417 ; CHECK-NEXT: ret <8 x i16> [[VMULL1]]
420 %s1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
421 %s3 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
422 br i1 %c, label %if.then, label %if.else
425 %s2 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
426 %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %s1, <8 x i8> %s2)
427 ret <8 x i16> %vmull0
430 %s4 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
431 %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %s3, <8 x i8> %s4)
432 ret <8 x i16> %vmull1
435 ; Indexed pmull is not available on aarch64. Shuffle vector should not be sunk here.
436 define <8 x i16> @no_sink_splatvector_pmull(<16 x i8> %a, <16 x i8> %b, i1 %c) {
437 ; CHECK-LABEL: @no_sink_splatvector_pmull(
439 ; CHECK-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
440 ; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
441 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
443 ; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
444 ; CHECK-NEXT: [[VMULL0:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[S1]], <8 x i8> [[S2]])
445 ; CHECK-NEXT: ret <8 x i16> [[VMULL0]]
447 ; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
448 ; CHECK-NEXT: [[VMULL1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[S3]], <8 x i8> [[S4]])
449 ; CHECK-NEXT: ret <8 x i16> [[VMULL1]]
452 %s1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
453 %s3 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
454 br i1 %c, label %if.then, label %if.else
457 %s2 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
458 %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %s1, <8 x i8> %s2)
459 ret <8 x i16> %vmull0
462 %s4 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
463 %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %s3, <8 x i8> %s4)
464 ret <8 x i16> %vmull1
467 ; Mask used are not suitable for pmull. Shuffle vector should not be sunk here.
468 define <8 x i16> @no_sink_shufflevector_pmull(<16 x i8> %a, <16 x i8> %b, i1 %c) {
469 ; CHECK-LABEL: @no_sink_shufflevector_pmull(
471 ; CHECK-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
472 ; CHECK-NEXT: [[S3:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
473 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
475 ; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
476 ; CHECK-NEXT: [[VMULL0:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[S1]], <8 x i8> [[S2]])
477 ; CHECK-NEXT: ret <8 x i16> [[VMULL0]]
479 ; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
480 ; CHECK-NEXT: [[VMULL1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[S3]], <8 x i8> [[S4]])
481 ; CHECK-NEXT: ret <8 x i16> [[VMULL1]]
484 %s1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
485 %s3 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
486 br i1 %c, label %if.then, label %if.else
489 %s2 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
490 %vmull0 = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %s1, <8 x i8> %s2)
491 ret <8 x i16> %vmull0
494 %s4 = shufflevector <16 x i8> %b, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
495 %vmull1 = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %s3, <8 x i8> %s4)
496 ret <8 x i16> %vmull1
499 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
501 define <8 x half> @sink_shufflevector_fma_v8f16(i1 %c, <8 x half> %a, <8 x half> %b) {
502 ; NOFP16-LABEL: @sink_shufflevector_fma_v8f16(
503 ; NOFP16-NEXT: entry:
504 ; NOFP16-NEXT: [[S0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
505 ; NOFP16-NEXT: [[S1:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
506 ; NOFP16-NEXT: [[S2:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
507 ; NOFP16-NEXT: [[S3:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
508 ; NOFP16-NEXT: [[S4:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
509 ; NOFP16-NEXT: [[S5:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
510 ; NOFP16-NEXT: [[S6:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
511 ; NOFP16-NEXT: [[S7:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
512 ; NOFP16-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
514 ; NOFP16-NEXT: [[R_0:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[S0]], <8 x half> [[B]])
515 ; NOFP16-NEXT: [[R_1:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_0]], <8 x half> [[S1]], <8 x half> [[B]])
516 ; NOFP16-NEXT: [[R_2:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_1]], <8 x half> [[S2]], <8 x half> [[B]])
517 ; NOFP16-NEXT: [[R_3:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_2]], <8 x half> [[S3]], <8 x half> [[B]])
518 ; NOFP16-NEXT: ret <8 x half> [[R_3]]
520 ; NOFP16-NEXT: [[R_4:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[S4]], <8 x half> [[B]])
521 ; NOFP16-NEXT: [[R_5:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_4]], <8 x half> [[S5]], <8 x half> [[B]])
522 ; NOFP16-NEXT: [[R_6:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_5]], <8 x half> [[S6]], <8 x half> [[B]])
523 ; NOFP16-NEXT: [[R_7:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_6]], <8 x half> [[S7]], <8 x half> [[B]])
524 ; NOFP16-NEXT: ret <8 x half> [[R_7]]
526 ; FULLFP16-LABEL: @sink_shufflevector_fma_v8f16(
527 ; FULLFP16-NEXT: entry:
528 ; FULLFP16-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
530 ; FULLFP16-NEXT: [[TMP0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
531 ; FULLFP16-NEXT: [[R_0:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B:%.*]], <8 x half> [[TMP0]], <8 x half> [[B]])
532 ; FULLFP16-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
533 ; FULLFP16-NEXT: [[R_1:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_0]], <8 x half> [[TMP1]], <8 x half> [[B]])
534 ; FULLFP16-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
535 ; FULLFP16-NEXT: [[R_2:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_1]], <8 x half> [[TMP2]], <8 x half> [[B]])
536 ; FULLFP16-NEXT: [[TMP3:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
537 ; FULLFP16-NEXT: [[R_3:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_2]], <8 x half> [[TMP3]], <8 x half> [[B]])
538 ; FULLFP16-NEXT: ret <8 x half> [[R_3]]
540 ; FULLFP16-NEXT: [[TMP4:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
541 ; FULLFP16-NEXT: [[R_4:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[TMP4]], <8 x half> [[B]])
542 ; FULLFP16-NEXT: [[TMP5:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
543 ; FULLFP16-NEXT: [[R_5:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_4]], <8 x half> [[TMP5]], <8 x half> [[B]])
544 ; FULLFP16-NEXT: [[TMP6:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
545 ; FULLFP16-NEXT: [[R_6:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_5]], <8 x half> [[TMP6]], <8 x half> [[B]])
546 ; FULLFP16-NEXT: [[TMP7:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
547 ; FULLFP16-NEXT: [[R_7:%.*]] = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> [[R_6]], <8 x half> [[TMP7]], <8 x half> [[B]])
548 ; FULLFP16-NEXT: ret <8 x half> [[R_7]]
551 %s0 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> zeroinitializer
552 %s1 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
553 %s2 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
554 %s3 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
555 %s4 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
556 %s5 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
557 %s6 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
558 %s7 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
559 br i1 %c, label %if.then, label %if.else
562 %r.0 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %s0, <8 x half> %b)
563 %r.1 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.0, <8 x half> %s1, <8 x half> %b)
564 %r.2 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.1, <8 x half> %s2, <8 x half> %b)
565 %r.3 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.2, <8 x half> %s3, <8 x half> %b)
569 %r.4 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %s4, <8 x half> %b)
570 %r.5 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.4, <8 x half> %s5, <8 x half> %b)
571 %r.6 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.5, <8 x half> %s6, <8 x half> %b)
572 %r.7 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %r.6, <8 x half> %s7, <8 x half> %b)
576 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
578 define <4 x float> @sink_shufflevector_fma_v4f32(i1 %c, <8 x float> %a, <4 x float> %b) {
579 ; CHECK-LABEL: @sink_shufflevector_fma_v4f32(
581 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
583 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer
584 ; CHECK-NEXT: [[R_0:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[TMP0]], <4 x float> [[B]])
585 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
586 ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[R_0]], <4 x float> [[TMP1]], <4 x float> [[B]])
587 ; CHECK-NEXT: ret <4 x float> [[R_1]]
589 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
590 ; CHECK-NEXT: [[R_2:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B]], <4 x float> [[TMP2]], <4 x float> [[B]])
591 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
592 ; CHECK-NEXT: [[R_3:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[R_2]], <4 x float> [[TMP3]], <4 x float> [[B]])
593 ; CHECK-NEXT: ret <4 x float> [[R_3]]
596 %s0 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> zeroinitializer
597 %s1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
598 %s2 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
599 %s3 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
600 br i1 %c, label %if.then, label %if.else
603 %r.0 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %s0, <4 x float> %b)
604 %r.1 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %r.0, <4 x float> %s1, <4 x float> %b)
608 %r.2 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %s2, <4 x float> %b)
609 %r.3 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %r.2, <4 x float> %s3, <4 x float> %b)
613 define <4 x float> @sink_shufflevector_first_arg_fma_v4f3(i1 %c, <8 x float> %a, <4 x float> %b) {
614 ; CHECK-LABEL: @sink_shufflevector_first_arg_fma_v4f3(
616 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
618 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer
619 ; CHECK-NEXT: [[R_0:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP0]], <4 x float> [[B:%.*]], <4 x float> [[B]])
620 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
621 ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP1]], <4 x float> [[R_0]], <4 x float> [[B]])
622 ; CHECK-NEXT: ret <4 x float> [[R_1]]
624 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
625 ; CHECK-NEXT: [[R_2:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP2]], <4 x float> [[B]], <4 x float> [[B]])
626 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
627 ; CHECK-NEXT: [[R_3:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP3]], <4 x float> [[R_2]], <4 x float> [[B]])
628 ; CHECK-NEXT: ret <4 x float> [[R_3]]
631 %s0 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> zeroinitializer
632 %s1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
633 %s2 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
634 %s3 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
635 br i1 %c, label %if.then, label %if.else
638 %r.0 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %s0, <4 x float> %b, <4 x float> %b)
639 %r.1 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %s1, <4 x float> %r.0, <4 x float> %b)
643 %r.2 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %s2, <4 x float> %b, <4 x float> %b)
644 %r.3 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %s3, <4 x float> %r.2, <4 x float> %b)
650 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
652 define <2 x double> @sink_shufflevector_fma_v2f64(i1 %c, <2 x double> %a, <2 x double> %b) {
653 ; CHECK-LABEL: @sink_shufflevector_fma_v2f64(
655 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
657 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> zeroinitializer
658 ; CHECK-NEXT: [[R_0:%.*]] = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[B:%.*]], <2 x double> [[TMP0]], <2 x double> [[B]])
659 ; CHECK-NEXT: ret <2 x double> [[R_0]]
661 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
662 ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[B]], <2 x double> [[TMP1]], <2 x double> [[B]])
663 ; CHECK-NEXT: ret <2 x double> [[R_1]]
666 %s0 = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> zeroinitializer
667 %s1 = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> <i32 1, i32 1>
668 br i1 %c, label %if.then, label %if.else
671 %r.0 = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %s0, <2 x double> %b)
672 ret <2 x double> %r.0
675 %r.1 = tail call fast <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %s1, <2 x double> %b)
676 ret <2 x double> %r.1
679 define <4 x float> @do_not_sink_out_of_range_shufflevector_fma_v4f32(i1 %c, <8 x float> %a, <4 x float> %b) {
680 ; CHECK-LABEL: @do_not_sink_out_of_range_shufflevector_fma_v4f32(
682 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
684 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
685 ; CHECK-NEXT: [[R:%.*]] = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> [[B:%.*]], <4 x float> [[TMP0]], <4 x float> [[B]])
686 ; CHECK-NEXT: ret <4 x float> [[R]]
688 ; CHECK-NEXT: ret <4 x float> zeroinitializer
691 %s4 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
692 br i1 %c, label %if.then, label %if.else
695 %r = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %s4, <4 x float> %b)
699 ret <4 x float> zeroinitializer
702 declare <5 x float> @llvm.fma.v5f32(<5 x float>, <5 x float>, <5 x float>)
704 define <5 x float> @sink_shufflevector_fma_v5f32(i1 %c, <8 x float> %a, <5 x float> %b) {
705 ; CHECK-LABEL: @sink_shufflevector_fma_v5f32(
707 ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <5 x i32> <i32 1, i32 1, i32 1, i32 1, i32 4>
708 ; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 2, i32 2, i32 2, i32 2, i32 4>
709 ; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 3, i32 3, i32 3, i32 3, i32 4>
710 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
712 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> zeroinitializer
713 ; CHECK-NEXT: [[R_0:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[B:%.*]], <5 x float> [[TMP0]], <5 x float> [[B]])
714 ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_0]], <5 x float> [[S1]], <5 x float> [[B]])
715 ; CHECK-NEXT: ret <5 x float> [[R_1]]
717 ; CHECK-NEXT: [[R_2:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[B]], <5 x float> [[S2]], <5 x float> [[B]])
718 ; CHECK-NEXT: [[R_3:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_2]], <5 x float> [[S3]], <5 x float> [[B]])
719 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4>
720 ; CHECK-NEXT: [[R_4:%.*]] = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> [[R_3]], <5 x float> [[TMP1]], <5 x float> [[B]])
721 ; CHECK-NEXT: ret <5 x float> [[R_4]]
724 %s0 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> zeroinitializer
725 %s1 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> <i32 1, i32 1, i32 1, i32 1, i32 4>
726 %s2 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> <i32 2, i32 2, i32 2, i32 2, i32 4>
727 %s3 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> <i32 3, i32 3, i32 3, i32 3, i32 4>
728 %s4 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4>
729 br i1 %c, label %if.then, label %if.else
732 %r.0 = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> %b, <5 x float> %s0, <5 x float> %b)
733 %r.1 = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> %r.0, <5 x float> %s1, <5 x float> %b)
737 %r.2 = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> %b, <5 x float> %s2, <5 x float> %b)
738 %r.3 = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> %r.2, <5 x float> %s3, <5 x float> %b)
739 %r.4 = tail call fast <5 x float> @llvm.fma.v5f32(<5 x float> %r.3, <5 x float> %s4, <5 x float> %b)
743 declare <8 x half> @llvm.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>)
745 define <8 x half> @sink_shufflevector_fmuladd_v8f16(i1 %c, <8 x half> %a, <8 x half> %b) {
746 ; NOFP16-LABEL: @sink_shufflevector_fmuladd_v8f16(
747 ; NOFP16-NEXT: entry:
748 ; NOFP16-NEXT: [[S0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
749 ; NOFP16-NEXT: [[S1:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
750 ; NOFP16-NEXT: [[S2:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
751 ; NOFP16-NEXT: [[S3:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
752 ; NOFP16-NEXT: [[S4:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
753 ; NOFP16-NEXT: [[S5:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
754 ; NOFP16-NEXT: [[S6:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
755 ; NOFP16-NEXT: [[S7:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
756 ; NOFP16-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
758 ; NOFP16-NEXT: [[R_0:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[B:%.*]], <8 x half> [[S0]], <8 x half> [[B]])
759 ; NOFP16-NEXT: [[R_1:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_0]], <8 x half> [[S1]], <8 x half> [[B]])
760 ; NOFP16-NEXT: [[R_2:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_1]], <8 x half> [[S2]], <8 x half> [[B]])
761 ; NOFP16-NEXT: [[R_3:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_2]], <8 x half> [[S3]], <8 x half> [[B]])
762 ; NOFP16-NEXT: ret <8 x half> [[R_3]]
764 ; NOFP16-NEXT: [[R_4:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[B]], <8 x half> [[S4]], <8 x half> [[B]])
765 ; NOFP16-NEXT: [[R_5:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_4]], <8 x half> [[S5]], <8 x half> [[B]])
766 ; NOFP16-NEXT: [[R_6:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_5]], <8 x half> [[S6]], <8 x half> [[B]])
767 ; NOFP16-NEXT: [[R_7:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_6]], <8 x half> [[S7]], <8 x half> [[B]])
768 ; NOFP16-NEXT: ret <8 x half> [[R_7]]
770 ; FULLFP16-LABEL: @sink_shufflevector_fmuladd_v8f16(
771 ; FULLFP16-NEXT: entry:
772 ; FULLFP16-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
774 ; FULLFP16-NEXT: [[TMP0:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <8 x i32> zeroinitializer
775 ; FULLFP16-NEXT: [[R_0:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[B:%.*]], <8 x half> [[TMP0]], <8 x half> [[B]])
776 ; FULLFP16-NEXT: [[TMP1:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
777 ; FULLFP16-NEXT: [[R_1:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_0]], <8 x half> [[TMP1]], <8 x half> [[B]])
778 ; FULLFP16-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
779 ; FULLFP16-NEXT: [[R_2:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_1]], <8 x half> [[TMP2]], <8 x half> [[B]])
780 ; FULLFP16-NEXT: [[TMP3:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
781 ; FULLFP16-NEXT: [[R_3:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_2]], <8 x half> [[TMP3]], <8 x half> [[B]])
782 ; FULLFP16-NEXT: ret <8 x half> [[R_3]]
784 ; FULLFP16-NEXT: [[TMP4:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
785 ; FULLFP16-NEXT: [[R_4:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[B]], <8 x half> [[TMP4]], <8 x half> [[B]])
786 ; FULLFP16-NEXT: [[TMP5:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
787 ; FULLFP16-NEXT: [[R_5:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_4]], <8 x half> [[TMP5]], <8 x half> [[B]])
788 ; FULLFP16-NEXT: [[TMP6:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
789 ; FULLFP16-NEXT: [[R_6:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_5]], <8 x half> [[TMP6]], <8 x half> [[B]])
790 ; FULLFP16-NEXT: [[TMP7:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
791 ; FULLFP16-NEXT: [[R_7:%.*]] = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> [[R_6]], <8 x half> [[TMP7]], <8 x half> [[B]])
792 ; FULLFP16-NEXT: ret <8 x half> [[R_7]]
795 %s0 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> zeroinitializer
796 %s1 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
797 %s2 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
798 %s3 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
799 %s4 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
800 %s5 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
801 %s6 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
802 %s7 = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
803 br i1 %c, label %if.then, label %if.else
806 %r.0 = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> %b, <8 x half> %s0, <8 x half> %b)
807 %r.1 = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> %r.0, <8 x half> %s1, <8 x half> %b)
808 %r.2 = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> %r.1, <8 x half> %s2, <8 x half> %b)
809 %r.3 = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> %r.2, <8 x half> %s3, <8 x half> %b)
813 %r.4 = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> %b, <8 x half> %s4, <8 x half> %b)
814 %r.5 = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> %r.4, <8 x half> %s5, <8 x half> %b)
815 %r.6 = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> %r.5, <8 x half> %s6, <8 x half> %b)
816 %r.7 = tail call fast <8 x half> @llvm.fmuladd.v8f16(<8 x half> %r.6, <8 x half> %s7, <8 x half> %b)
820 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
822 define <4 x float> @sink_shufflevector_fmuladd_v4f32(i1 %c, <8 x float> %a, <4 x float> %b) {
823 ; CHECK-LABEL: @sink_shufflevector_fmuladd_v4f32(
825 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
827 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer
828 ; CHECK-NEXT: [[R_0:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[B:%.*]], <4 x float> [[TMP0]], <4 x float> [[B]])
829 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
830 ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[R_0]], <4 x float> [[TMP1]], <4 x float> [[B]])
831 ; CHECK-NEXT: ret <4 x float> [[R_1]]
833 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
834 ; CHECK-NEXT: [[R_2:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[B]], <4 x float> [[TMP2]], <4 x float> [[B]])
835 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
836 ; CHECK-NEXT: [[R_3:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[R_2]], <4 x float> [[TMP3]], <4 x float> [[B]])
837 ; CHECK-NEXT: ret <4 x float> [[R_3]]
840 %s0 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> zeroinitializer
841 %s1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
842 %s2 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
843 %s3 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
844 br i1 %c, label %if.then, label %if.else
847 %r.0 = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> %b, <4 x float> %s0, <4 x float> %b)
848 %r.1 = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> %r.0, <4 x float> %s1, <4 x float> %b)
852 %r.2 = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> %b, <4 x float> %s2, <4 x float> %b)
853 %r.3 = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> %r.2, <4 x float> %s3, <4 x float> %b)
857 define <4 x float> @sink_shufflevector_first_arg_fmuladd_v4f3(i1 %c, <8 x float> %a, <4 x float> %b) {
858 ; CHECK-LABEL: @sink_shufflevector_first_arg_fmuladd_v4f3(
860 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
862 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> zeroinitializer
863 ; CHECK-NEXT: [[R_0:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP0]], <4 x float> [[B:%.*]], <4 x float> [[B]])
864 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
865 ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP1]], <4 x float> [[R_0]], <4 x float> [[B]])
866 ; CHECK-NEXT: ret <4 x float> [[R_1]]
868 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
869 ; CHECK-NEXT: [[R_2:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP2]], <4 x float> [[B]], <4 x float> [[B]])
870 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
871 ; CHECK-NEXT: [[R_3:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP3]], <4 x float> [[R_2]], <4 x float> [[B]])
872 ; CHECK-NEXT: ret <4 x float> [[R_3]]
875 %s0 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> zeroinitializer
876 %s1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
877 %s2 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
878 %s3 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
879 br i1 %c, label %if.then, label %if.else
882 %r.0 = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> %s0, <4 x float> %b, <4 x float> %b)
883 %r.1 = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> %s1, <4 x float> %r.0, <4 x float> %b)
887 %r.2 = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> %s2, <4 x float> %b, <4 x float> %b)
888 %r.3 = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> %s3, <4 x float> %r.2, <4 x float> %b)
894 declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>)
896 define <2 x double> @sink_shufflevector_fmuladd_v2f64(i1 %c, <2 x double> %a, <2 x double> %b) {
897 ; CHECK-LABEL: @sink_shufflevector_fmuladd_v2f64(
899 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
901 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> poison, <2 x i32> zeroinitializer
902 ; CHECK-NEXT: [[R_0:%.*]] = tail call fast <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[B:%.*]], <2 x double> [[TMP0]], <2 x double> [[B]])
903 ; CHECK-NEXT: ret <2 x double> [[R_0]]
905 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> <i32 1, i32 1>
906 ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[B]], <2 x double> [[TMP1]], <2 x double> [[B]])
907 ; CHECK-NEXT: ret <2 x double> [[R_1]]
910 %s0 = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> zeroinitializer
911 %s1 = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> <i32 1, i32 1>
912 br i1 %c, label %if.then, label %if.else
915 %r.0 = tail call fast <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> %s0, <2 x double> %b)
916 ret <2 x double> %r.0
919 %r.1 = tail call fast <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> %s1, <2 x double> %b)
920 ret <2 x double> %r.1
923 define <4 x float> @do_not_sink_out_of_range_shufflevector_fmuladd_v4f32(i1 %c, <8 x float> %a, <4 x float> %b) {
924 ; CHECK-LABEL: @do_not_sink_out_of_range_shufflevector_fmuladd_v4f32(
926 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
928 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
929 ; CHECK-NEXT: [[R:%.*]] = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[B:%.*]], <4 x float> [[TMP0]], <4 x float> [[B]])
930 ; CHECK-NEXT: ret <4 x float> [[R]]
932 ; CHECK-NEXT: ret <4 x float> zeroinitializer
935 %s4 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
936 br i1 %c, label %if.then, label %if.else
939 %r = tail call fast <4 x float> @llvm.fmuladd.v4f32(<4 x float> %b, <4 x float> %s4, <4 x float> %b)
943 ret <4 x float> zeroinitializer
946 declare <5 x float> @llvm.fmuladd.v5f32(<5 x float>, <5 x float>, <5 x float>)
948 define <5 x float> @sink_shufflevector_fmuladd_v5f32(i1 %c, <8 x float> %a, <5 x float> %b) {
949 ; CHECK-LABEL: @sink_shufflevector_fmuladd_v5f32(
951 ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> poison, <5 x i32> <i32 1, i32 1, i32 1, i32 1, i32 4>
952 ; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 2, i32 2, i32 2, i32 2, i32 4>
953 ; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 3, i32 3, i32 3, i32 3, i32 4>
954 ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
956 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> zeroinitializer
957 ; CHECK-NEXT: [[R_0:%.*]] = tail call fast <5 x float> @llvm.fmuladd.v5f32(<5 x float> [[B:%.*]], <5 x float> [[TMP0]], <5 x float> [[B]])
958 ; CHECK-NEXT: [[R_1:%.*]] = tail call fast <5 x float> @llvm.fmuladd.v5f32(<5 x float> [[R_0]], <5 x float> [[S1]], <5 x float> [[B]])
959 ; CHECK-NEXT: ret <5 x float> [[R_1]]
961 ; CHECK-NEXT: [[R_2:%.*]] = tail call fast <5 x float> @llvm.fmuladd.v5f32(<5 x float> [[B]], <5 x float> [[S2]], <5 x float> [[B]])
962 ; CHECK-NEXT: [[R_3:%.*]] = tail call fast <5 x float> @llvm.fmuladd.v5f32(<5 x float> [[R_2]], <5 x float> [[S3]], <5 x float> [[B]])
963 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <5 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4>
964 ; CHECK-NEXT: [[R_4:%.*]] = tail call fast <5 x float> @llvm.fmuladd.v5f32(<5 x float> [[R_3]], <5 x float> [[TMP1]], <5 x float> [[B]])
965 ; CHECK-NEXT: ret <5 x float> [[R_4]]
968 %s0 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> zeroinitializer
969 %s1 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> <i32 1, i32 1, i32 1, i32 1, i32 4>
970 %s2 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> <i32 2, i32 2, i32 2, i32 2, i32 4>
971 %s3 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> <i32 3, i32 3, i32 3, i32 3, i32 4>
972 %s4 = shufflevector <8 x float> %a, <8 x float> poison, <5 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4>
973 br i1 %c, label %if.then, label %if.else
976 %r.0 = tail call fast <5 x float> @llvm.fmuladd.v5f32(<5 x float> %b, <5 x float> %s0, <5 x float> %b)
977 %r.1 = tail call fast <5 x float> @llvm.fmuladd.v5f32(<5 x float> %r.0, <5 x float> %s1, <5 x float> %b)
981 %r.2 = tail call fast <5 x float> @llvm.fmuladd.v5f32(<5 x float> %b, <5 x float> %s2, <5 x float> %b)
982 %r.3 = tail call fast <5 x float> @llvm.fmuladd.v5f32(<5 x float> %r.2, <5 x float> %s3, <5 x float> %b)
983 %r.4 = tail call fast <5 x float> @llvm.fmuladd.v5f32(<5 x float> %r.3, <5 x float> %s4, <5 x float> %b)
987 ; This ran in an assert in `areExtractShuffleVectors`.
988 define <vscale x 8 x i16> @scalable_types_cannot_be_extract_shuffle() {
989 ; CHECK-LABEL: @scalable_types_cannot_be_extract_shuffle(
991 ; CHECK-NEXT: [[BROADCAST_SPLAT68:%.*]] = shufflevector <vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
992 ; CHECK-NEXT: [[TMP0:%.*]] = zext <vscale x 8 x i8> [[BROADCAST_SPLAT68]] to <vscale x 8 x i16>
993 ; CHECK-NEXT: [[BROADCAST_SPLAT70:%.*]] = shufflevector <vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
994 ; CHECK-NEXT: [[TMP1:%.*]] = zext <vscale x 8 x i8> [[BROADCAST_SPLAT70]] to <vscale x 8 x i16>
995 ; CHECK-NEXT: [[TMP2:%.*]] = sub <vscale x 8 x i16> [[TMP0]], [[TMP1]]
996 ; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
999 %broadcast.splat68 = shufflevector <vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
1000 %0 = zext <vscale x 8 x i8> %broadcast.splat68 to <vscale x 8 x i16>
1001 %broadcast.splat70 = shufflevector <vscale x 8 x i8> zeroinitializer, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
1002 %1 = zext <vscale x 8 x i8> %broadcast.splat70 to <vscale x 8 x i16>
1003 %2 = sub <vscale x 8 x i16> %0, %1
1004 ret <vscale x 8 x i16> %2