1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2 ; RUN: opt < %s -passes=loop-vectorize,simplifycfg,instcombine -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s
3 ; RUN: opt < %s -passes=loop-vectorize,simplifycfg,instcombine -force-vector-interleave=2 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s --check-prefix=INTERLEAVE
5 target triple = "aarch64-unknown-linux-gnu"
7 ; A call whose argument can remain a scalar for a vectorized function variant
8 ; with a uniform argument because it's loop invariant
9 define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i64 %n) #0 {
10 ; CHECK-LABEL: define void @test_uniform
11 ; CHECK-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[UNIFORM:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
13 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
14 ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1
15 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
16 ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1
17 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]])
18 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
19 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
21 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
22 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
23 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
24 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
25 ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x double> @foo_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD]], i64 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
26 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
27 ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP6]], ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
28 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
29 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]])
30 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
31 ; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
32 ; CHECK: for.cond.cleanup:
33 ; CHECK-NEXT: ret void
35 ; INTERLEAVE-LABEL: define void @test_uniform
36 ; INTERLEAVE-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[UNIFORM:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
37 ; INTERLEAVE-NEXT: entry:
38 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
39 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2
40 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
41 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2
42 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]])
43 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
44 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2
45 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP6]])
46 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
47 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 1
48 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
49 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP9]], i64 [[N]])
50 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
51 ; INTERLEAVE: vector.body:
52 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
53 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
54 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], [[VECTOR_BODY]] ]
55 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
56 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
57 ; INTERLEAVE-NEXT: [[TMP12:%.*]] = shl i64 [[TMP11]], 1
58 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i64 [[TMP12]]
59 ; INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP10]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
60 ; INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP13]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x double> poison)
61 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = call <vscale x 2 x double> @foo_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD]], i64 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
62 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = call <vscale x 2 x double> @foo_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD3]], i64 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
63 ; INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
64 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
65 ; INTERLEAVE-NEXT: [[TMP18:%.*]] = shl i64 [[TMP17]], 1
66 ; INTERLEAVE-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP18]]
67 ; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP14]], ptr [[TMP16]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
68 ; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP15]], ptr [[TMP19]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
69 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
70 ; INTERLEAVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
71 ; INTERLEAVE-NEXT: [[TMP21:%.*]] = shl i64 [[TMP20]], 1
72 ; INTERLEAVE-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], [[TMP21]]
73 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]])
74 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP22]], i64 [[TMP7]])
75 ; INTERLEAVE-NEXT: [[TMP23:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
76 ; INTERLEAVE-NEXT: br i1 [[TMP23]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
77 ; INTERLEAVE: for.cond.cleanup:
78 ; INTERLEAVE-NEXT: ret void
84 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
85 %gepsrc = getelementptr double, ptr %src, i64 %indvars.iv
86 %data = load double, ptr %gepsrc, align 8
87 %call = call double @foo(double %data, i64 %uniform) #1
88 %gepdst = getelementptr inbounds double, ptr %dst, i64 %indvars.iv
89 store double %call, ptr %gepdst
90 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
91 %exitcond = icmp eq i64 %indvars.iv.next, %n
92 br i1 %exitcond, label %for.cond.cleanup, label %for.body
98 define void @test_uniform_smaller_scalar(ptr noalias %dst, ptr readonly %src, i32 %uniform , i64 %n) #0 {
99 ; CHECK-LABEL: define void @test_uniform_smaller_scalar
100 ; CHECK-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i32 [[UNIFORM:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
102 ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
103 ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1
104 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
105 ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 1
106 ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]])
107 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
108 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
109 ; CHECK: vector.body:
110 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
111 ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
112 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
113 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP5]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
114 ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x double> @bar_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD]], i32 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
115 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
116 ; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP6]], ptr [[TMP7]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
117 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
118 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]])
119 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
120 ; CHECK-NEXT: br i1 [[TMP8]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
121 ; CHECK: for.cond.cleanup:
122 ; CHECK-NEXT: ret void
124 ; INTERLEAVE-LABEL: define void @test_uniform_smaller_scalar
125 ; INTERLEAVE-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i32 [[UNIFORM:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
126 ; INTERLEAVE-NEXT: entry:
127 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
128 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2
129 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
130 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 2
131 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP3]])
132 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
133 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2
134 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 [[TMP6]])
135 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
136 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = shl i64 [[TMP8]], 1
137 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
138 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP9]], i64 [[N]])
139 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
140 ; INTERLEAVE: vector.body:
141 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
142 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
143 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT4:%.*]], [[VECTOR_BODY]] ]
144 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
145 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
146 ; INTERLEAVE-NEXT: [[TMP12:%.*]] = shl i64 [[TMP11]], 1
147 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i64 [[TMP12]]
148 ; INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP10]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x double> poison)
149 ; INTERLEAVE-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP13]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]], <vscale x 2 x double> poison)
150 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = call <vscale x 2 x double> @bar_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD]], i32 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
151 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = call <vscale x 2 x double> @bar_uniform(<vscale x 2 x double> [[WIDE_MASKED_LOAD3]], i32 [[UNIFORM]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
152 ; INTERLEAVE-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
153 ; INTERLEAVE-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
154 ; INTERLEAVE-NEXT: [[TMP18:%.*]] = shl i64 [[TMP17]], 1
155 ; INTERLEAVE-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP18]]
156 ; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP14]], ptr [[TMP16]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
157 ; INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP15]], ptr [[TMP19]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK2]])
158 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
159 ; INTERLEAVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
160 ; INTERLEAVE-NEXT: [[TMP21:%.*]] = shl i64 [[TMP20]], 1
161 ; INTERLEAVE-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], [[TMP21]]
162 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP4]])
163 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT4]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP22]], i64 [[TMP7]])
164 ; INTERLEAVE-NEXT: [[TMP23:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
165 ; INTERLEAVE-NEXT: br i1 [[TMP23]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
166 ; INTERLEAVE: for.cond.cleanup:
167 ; INTERLEAVE-NEXT: ret void
173 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
174 %gepsrc = getelementptr double, ptr %src, i64 %indvars.iv
175 %data = load double, ptr %gepsrc, align 8
176 %call = call double @bar(double %data, i32 %uniform) #2
177 %gepdst = getelementptr inbounds double, ptr %dst, i64 %indvars.iv
178 store double %call, ptr %gepdst
179 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
180 %exitcond = icmp eq i64 %indvars.iv.next, %n
181 br i1 %exitcond, label %for.cond.cleanup, label %for.body
187 ; If the parameter is not uniform, then we can't use the vector variant.
188 define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64 %n) #0 {
189 ; CHECK-LABEL: define void @test_uniform_not_invariant
190 ; CHECK-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
192 ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
194 ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
195 ; CHECK-NEXT: [[GEPSRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDVARS_IV]]
196 ; CHECK-NEXT: [[DATA:%.*]] = load double, ptr [[GEPSRC]], align 8
197 ; CHECK-NEXT: [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[INDVARS_IV]]) #[[ATTR5:[0-9]+]]
198 ; CHECK-NEXT: [[GEPDST:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDVARS_IV]]
199 ; CHECK-NEXT: store double [[CALL]], ptr [[GEPDST]], align 8
200 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
201 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
202 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
203 ; CHECK: for.cond.cleanup:
204 ; CHECK-NEXT: ret void
206 ; INTERLEAVE-LABEL: define void @test_uniform_not_invariant
207 ; INTERLEAVE-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
208 ; INTERLEAVE-NEXT: entry:
209 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 2)
210 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[N]], i64 2)
211 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <1 x i1> @llvm.get.active.lane.mask.v1i1.i64(i64 0, i64 [[N]])
212 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = call <1 x i1> @llvm.get.active.lane.mask.v1i1.i64(i64 1, i64 [[N]])
213 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
214 ; INTERLEAVE: vector.body:
215 ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
216 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <1 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ]
217 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <1 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT5:%.*]], [[PRED_STORE_CONTINUE4]] ]
218 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = extractelement <1 x i1> [[ACTIVE_LANE_MASK]], i64 0
219 ; INTERLEAVE-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
220 ; INTERLEAVE: pred.store.if:
221 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
222 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8
223 ; INTERLEAVE-NEXT: [[TMP5:%.*]] = call double @foo(double [[TMP4]], i64 [[INDEX]]) #[[ATTR5:[0-9]+]]
224 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
225 ; INTERLEAVE-NEXT: store double [[TMP5]], ptr [[TMP6]], align 8
226 ; INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE]]
227 ; INTERLEAVE: pred.store.continue:
228 ; INTERLEAVE-NEXT: [[TMP7:%.*]] = extractelement <1 x i1> [[ACTIVE_LANE_MASK2]], i64 0
229 ; INTERLEAVE-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
230 ; INTERLEAVE: pred.store.if3:
231 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = or disjoint i64 [[INDEX]], 1
232 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP8]]
233 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP9]], align 8
234 ; INTERLEAVE-NEXT: [[TMP11:%.*]] = call double @foo(double [[TMP10]], i64 [[TMP8]]) #[[ATTR5]]
235 ; INTERLEAVE-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[TMP8]]
236 ; INTERLEAVE-NEXT: store double [[TMP11]], ptr [[TMP12]], align 8
237 ; INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE4]]
238 ; INTERLEAVE: pred.store.continue4:
239 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
240 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = or disjoint i64 [[INDEX]], 1
241 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <1 x i1> @llvm.get.active.lane.mask.v1i1.i64(i64 [[INDEX]], i64 [[TMP0]])
242 ; INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call <1 x i1> @llvm.get.active.lane.mask.v1i1.i64(i64 [[TMP13]], i64 [[TMP1]])
243 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = extractelement <1 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
244 ; INTERLEAVE-NEXT: br i1 [[TMP14]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP4:![0-9]+]]
245 ; INTERLEAVE: for.cond.cleanup:
246 ; INTERLEAVE-NEXT: ret void
252 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
253 %gepsrc = getelementptr double, ptr %src, i64 %indvars.iv
254 %data = load double, ptr %gepsrc, align 8
255 %call = call double @foo(double %data, i64 %indvars.iv) #1
256 %gepdst = getelementptr inbounds double, ptr %dst, i64 %indvars.iv
257 store double %call, ptr %gepdst
258 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
259 %exitcond = icmp eq i64 %indvars.iv.next, %n
260 br i1 %exitcond, label %for.cond.cleanup, label %for.body
267 declare double @foo(double, i64)
268 declare double @bar(double, i32)
271 declare <vscale x 2 x double> @foo_uniform(<vscale x 2 x double>, i64, <vscale x 2 x i1>)
272 declare <vscale x 2 x double> @bar_uniform(<vscale x 2 x double>, i32, <vscale x 2 x i1>)
274 attributes #0 = { "target-features"="+sve" }
277 attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsMxvu_foo(foo_uniform)" }
278 attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVsMxvu_bar(bar_uniform)" }