1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s
3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5 declare <4 x float> @ext(<4 x float>)
6 @g = global <4 x float> zeroinitializer
8 define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) {
11 ; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i32 0
12 ; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i32 1
13 ; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i32 2
14 ; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i32 3
15 ; CHECK-NEXT: br label [[LOOP:%.*]]
17 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
18 ; CHECK-NEXT: [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
19 ; CHECK-NEXT: [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
20 ; CHECK-NEXT: [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
21 ; CHECK-NEXT: [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
22 ; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1
23 ; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, <4 x float>* [[BASE:%.*]], i32 [[I]]
24 ; CHECK-NEXT: [[PTR_I0:%.*]] = bitcast <4 x float>* [[PTR]] to float*
25 ; CHECK-NEXT: [[VAL_I0:%.*]] = load float, float* [[PTR_I0]], align 16
26 ; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, float* [[PTR_I0]], i32 1
27 ; CHECK-NEXT: [[VAL_I1:%.*]] = load float, float* [[PTR_I1]], align 4
28 ; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr float, float* [[PTR_I0]], i32 2
29 ; CHECK-NEXT: [[VAL_I2:%.*]] = load float, float* [[PTR_I2]], align 8
30 ; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr float, float* [[PTR_I0]], i32 3
31 ; CHECK-NEXT: [[VAL_I3:%.*]] = load float, float* [[PTR_I3]], align 4
32 ; CHECK-NEXT: [[ADD_I0:%.*]] = fadd float [[VAL_I0]], [[VAL_I2]]
33 ; CHECK-NEXT: [[ADD_I1:%.*]] = fadd float [[VAL_I1]], [[VAL_I3]]
34 ; CHECK-NEXT: [[ADD_I2:%.*]] = fadd float [[ACC_I0]], [[ACC_I2]]
35 ; CHECK-NEXT: [[ADD_I3:%.*]] = fadd float [[ACC_I1]], [[ACC_I3]]
36 ; CHECK-NEXT: [[ADD_UPTO0:%.*]] = insertelement <4 x float> poison, float [[ADD_I0]], i32 0
37 ; CHECK-NEXT: [[ADD_UPTO1:%.*]] = insertelement <4 x float> [[ADD_UPTO0]], float [[ADD_I1]], i32 1
38 ; CHECK-NEXT: [[ADD_UPTO2:%.*]] = insertelement <4 x float> [[ADD_UPTO1]], float [[ADD_I2]], i32 2
39 ; CHECK-NEXT: [[ADD:%.*]] = insertelement <4 x float> [[ADD_UPTO2]], float [[ADD_I3]], i32 3
40 ; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[ADD]])
41 ; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0
42 ; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00
43 ; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1
44 ; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00
45 ; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2
46 ; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00
47 ; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3
48 ; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00
49 ; CHECK-NEXT: [[SEL_I0]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00
50 ; CHECK-NEXT: [[SEL_I1]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00
51 ; CHECK-NEXT: [[SEL_I2]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00
52 ; CHECK-NEXT: [[SEL_I3]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00
53 ; CHECK-NEXT: store float [[SEL_I0]], float* [[PTR_I0]], align 16
54 ; CHECK-NEXT: store float [[SEL_I1]], float* [[PTR_I1]], align 4
55 ; CHECK-NEXT: store float [[SEL_I2]], float* [[PTR_I2]], align 8
56 ; CHECK-NEXT: store float [[SEL_I3]], float* [[PTR_I3]], align 4
57 ; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
58 ; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
60 ; CHECK-NEXT: ret void
66 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
67 %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
68 %nexti = sub i32 %i, 1
70 %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i
71 %val = load <4 x float> , <4 x float> *%ptr
72 %dval = bitcast <4 x float> %val to <2 x double>
73 %dacc = bitcast <4 x float> %acc to <2 x double>
74 %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc,
75 <2 x i32> <i32 0, i32 2>
76 %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc,
77 <2 x i32> <i32 1, i32 3>
78 %f1 = bitcast <2 x double> %shuffle1 to <4 x float>
79 %f2 = bitcast <2 x double> %shuffle2 to <4 x float>
80 %add = fadd <4 x float> %f1, %f2
81 %call = call <4 x float> @ext(<4 x float> %add)
82 %cmp = fcmp ogt <4 x float> %call,
83 <float 1.0, float 2.0, float 3.0, float 4.0>
84 %sel = select <4 x i1> %cmp, <4 x float> %call,
85 <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
86 store <4 x float> %sel, <4 x float> *%ptr
88 %test = icmp eq i32 %nexti, 0
89 br i1 %test, label %loop, label %exit
95 define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) {
98 ; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i32 0
99 ; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i32 1
100 ; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i32 2
101 ; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i32 3
102 ; CHECK-NEXT: br label [[LOOP:%.*]]
104 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
105 ; CHECK-NEXT: [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
106 ; CHECK-NEXT: [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
107 ; CHECK-NEXT: [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
108 ; CHECK-NEXT: [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
109 ; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1
110 ; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x i8>, <4 x i8>* [[BASE:%.*]], i32 [[I]]
111 ; CHECK-NEXT: [[PTR_I0:%.*]] = bitcast <4 x i8>* [[PTR]] to i8*
112 ; CHECK-NEXT: [[VAL_I0:%.*]] = load i8, i8* [[PTR_I0]], align 4
113 ; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 1
114 ; CHECK-NEXT: [[VAL_I1:%.*]] = load i8, i8* [[PTR_I1]], align 1
115 ; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 2
116 ; CHECK-NEXT: [[VAL_I2:%.*]] = load i8, i8* [[PTR_I2]], align 2
117 ; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr i8, i8* [[PTR_I0]], i32 3
118 ; CHECK-NEXT: [[VAL_I3:%.*]] = load i8, i8* [[PTR_I3]], align 1
119 ; CHECK-NEXT: [[EXT_I0:%.*]] = sext i8 [[VAL_I0]] to i32
120 ; CHECK-NEXT: [[EXT_I1:%.*]] = sext i8 [[VAL_I1]] to i32
121 ; CHECK-NEXT: [[EXT_I2:%.*]] = sext i8 [[VAL_I2]] to i32
122 ; CHECK-NEXT: [[EXT_I3:%.*]] = sext i8 [[VAL_I3]] to i32
123 ; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[EXT_I0]], [[ACC_I0]]
124 ; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[EXT_I1]], [[ACC_I1]]
125 ; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[EXT_I2]], [[ACC_I2]]
126 ; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[EXT_I3]], [[ACC_I3]]
127 ; CHECK-NEXT: [[CMP_I0:%.*]] = icmp slt i32 [[ADD_I0]], -10
128 ; CHECK-NEXT: [[CMP_I1:%.*]] = icmp slt i32 [[ADD_I1]], -11
129 ; CHECK-NEXT: [[CMP_I2:%.*]] = icmp slt i32 [[ADD_I2]], -12
130 ; CHECK-NEXT: [[CMP_I3:%.*]] = icmp slt i32 [[ADD_I3]], -13
131 ; CHECK-NEXT: [[SEL_I0]] = select i1 [[CMP_I0]], i32 [[ADD_I0]], i32 [[I]]
132 ; CHECK-NEXT: [[SEL_I1]] = select i1 [[CMP_I1]], i32 [[ADD_I1]], i32 [[I]]
133 ; CHECK-NEXT: [[SEL_I2]] = select i1 [[CMP_I2]], i32 [[ADD_I2]], i32 [[I]]
134 ; CHECK-NEXT: [[SEL_I3]] = select i1 [[CMP_I3]], i32 [[ADD_I3]], i32 [[I]]
135 ; CHECK-NEXT: [[TRUNC_I0:%.*]] = trunc i32 [[SEL_I0]] to i8
136 ; CHECK-NEXT: [[TRUNC_I1:%.*]] = trunc i32 [[SEL_I1]] to i8
137 ; CHECK-NEXT: [[TRUNC_I2:%.*]] = trunc i32 [[SEL_I2]] to i8
138 ; CHECK-NEXT: [[TRUNC_I3:%.*]] = trunc i32 [[SEL_I3]] to i8
139 ; CHECK-NEXT: store i8 [[TRUNC_I0]], i8* [[PTR_I0]], align 4
140 ; CHECK-NEXT: store i8 [[TRUNC_I1]], i8* [[PTR_I1]], align 1
141 ; CHECK-NEXT: store i8 [[TRUNC_I2]], i8* [[PTR_I2]], align 2
142 ; CHECK-NEXT: store i8 [[TRUNC_I3]], i8* [[PTR_I3]], align 1
143 ; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
144 ; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
146 ; CHECK-NEXT: ret void
152 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
153 %acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ]
154 %nexti = sub i32 %i, 1
156 %ptr = getelementptr <4 x i8>, <4 x i8> *%base, i32 %i
157 %val = load <4 x i8> , <4 x i8> *%ptr
158 %ext = sext <4 x i8> %val to <4 x i32>
159 %add = add <4 x i32> %ext, %acc
160 %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13>
161 %single = insertelement <4 x i32> poison, i32 %i, i32 0
162 %limit = shufflevector <4 x i32> %single, <4 x i32> poison,
163 <4 x i32> zeroinitializer
164 %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit
165 %trunc = trunc <4 x i32> %sel to <4 x i8>
166 store <4 x i8> %trunc, <4 x i8> *%ptr
168 %test = icmp eq i32 %nexti, 0
169 br i1 %test, label %loop, label %exit
175 ; Check that !tbaa information is preserved.
176 define void @f3(<4 x i32> *%src, <4 x i32> *%dst) {
178 ; CHECK-NEXT: [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32*
179 ; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1
180 ; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2
181 ; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3
182 ; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32*
183 ; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16, !tbaa [[TBAA0:![0-9]+]]
184 ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1
185 ; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4, !tbaa [[TBAA0]]
186 ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
187 ; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8, !tbaa [[TBAA0]]
188 ; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
189 ; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4, !tbaa [[TBAA0]]
190 ; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
191 ; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
192 ; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
193 ; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
194 ; CHECK-NEXT: store i32 [[ADD_I0]], i32* [[DST_I0]], align 16, !tbaa [[TBAA3:![0-9]+]]
195 ; CHECK-NEXT: store i32 [[ADD_I1]], i32* [[DST_I1]], align 4, !tbaa [[TBAA3]]
196 ; CHECK-NEXT: store i32 [[ADD_I2]], i32* [[DST_I2]], align 8, !tbaa [[TBAA3]]
197 ; CHECK-NEXT: store i32 [[ADD_I3]], i32* [[DST_I3]], align 4, !tbaa [[TBAA3]]
198 ; CHECK-NEXT: ret void
200 %val = load <4 x i32> , <4 x i32> *%src, !tbaa !1
201 %add = add <4 x i32> %val, %val
202 store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2
206 ; Check that !tbaa.struct information is preserved.
207 define void @f4(<4 x i32> *%src, <4 x i32> *%dst) {
209 ; CHECK-NEXT: [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32*
210 ; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1
211 ; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2
212 ; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3
213 ; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32*
214 ; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16, !tbaa.struct !5
215 ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1
216 ; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4, !tbaa.struct !5
217 ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
218 ; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8, !tbaa.struct !5
219 ; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
220 ; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4, !tbaa.struct !5
221 ; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
222 ; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
223 ; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
224 ; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
225 ; CHECK-NEXT: store i32 [[ADD_I0]], i32* [[DST_I0]], align 16, !tbaa.struct !5
226 ; CHECK-NEXT: store i32 [[ADD_I1]], i32* [[DST_I1]], align 4, !tbaa.struct !5
227 ; CHECK-NEXT: store i32 [[ADD_I2]], i32* [[DST_I2]], align 8, !tbaa.struct !5
228 ; CHECK-NEXT: store i32 [[ADD_I3]], i32* [[DST_I3]], align 4, !tbaa.struct !5
229 ; CHECK-NEXT: ret void
231 %val = load <4 x i32> , <4 x i32> *%src, !tbaa.struct !5
232 %add = add <4 x i32> %val, %val
233 store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5
237 ; Check that llvm.access.group information is preserved.
238 define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) {
241 ; CHECK-NEXT: br label [[LOOP:%.*]]
243 ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ]
244 ; CHECK-NEXT: [[THIS_SRC:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[SRC:%.*]], i32 [[INDEX]]
245 ; CHECK-NEXT: [[THIS_SRC_I0:%.*]] = bitcast <4 x i32>* [[THIS_SRC]] to i32*
246 ; CHECK-NEXT: [[THIS_SRC_I1:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 1
247 ; CHECK-NEXT: [[THIS_SRC_I2:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 2
248 ; CHECK-NEXT: [[THIS_SRC_I3:%.*]] = getelementptr i32, i32* [[THIS_SRC_I0]], i32 3
249 ; CHECK-NEXT: [[THIS_DST:%.*]] = getelementptr <4 x i32>, <4 x i32>* [[DST:%.*]], i32 [[INDEX]]
250 ; CHECK-NEXT: [[THIS_DST_I0:%.*]] = bitcast <4 x i32>* [[THIS_DST]] to i32*
251 ; CHECK-NEXT: [[THIS_DST_I1:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 1
252 ; CHECK-NEXT: [[THIS_DST_I2:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 2
253 ; CHECK-NEXT: [[THIS_DST_I3:%.*]] = getelementptr i32, i32* [[THIS_DST_I0]], i32 3
254 ; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, i32* [[THIS_SRC_I0]], align 16, !llvm.access.group !6
255 ; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, i32* [[THIS_SRC_I1]], align 4, !llvm.access.group !6
256 ; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, i32* [[THIS_SRC_I2]], align 8, !llvm.access.group !6
257 ; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, i32* [[THIS_SRC_I3]], align 4, !llvm.access.group !6
258 ; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
259 ; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
260 ; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
261 ; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
262 ; CHECK-NEXT: store i32 [[ADD_I0]], i32* [[THIS_DST_I0]], align 16, !llvm.access.group !6
263 ; CHECK-NEXT: store i32 [[ADD_I1]], i32* [[THIS_DST_I1]], align 4, !llvm.access.group !6
264 ; CHECK-NEXT: store i32 [[ADD_I2]], i32* [[THIS_DST_I2]], align 8, !llvm.access.group !6
265 ; CHECK-NEXT: store i32 [[ADD_I3]], i32* [[THIS_DST_I3]], align 4, !llvm.access.group !6
266 ; CHECK-NEXT: [[NEXT_INDEX]] = add i32 [[INDEX]], -1
267 ; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT:%.*]]
268 ; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[END:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
270 ; CHECK-NEXT: ret void
276 %index = phi i32 [ 0, %entry ], [ %next_index, %loop ]
277 %this_src = getelementptr <4 x i32>, <4 x i32> *%src, i32 %index
278 %this_dst = getelementptr <4 x i32>, <4 x i32> *%dst, i32 %index
279 %val = load <4 x i32> , <4 x i32> *%this_src, !llvm.access.group !13
280 %add = add <4 x i32> %val, %val
281 store <4 x i32> %add, <4 x i32> *%this_dst, !llvm.access.group !13
282 %next_index = add i32 %index, -1
283 %continue = icmp ne i32 %next_index, %count
284 br i1 %continue, label %loop, label %end, !llvm.loop !3
290 ; Check that fpmath information is preserved.
291 define <4 x float> @f6(<4 x float> %x) {
293 ; CHECK-NEXT: [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
294 ; CHECK-NEXT: [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath !9
295 ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <4 x float> [[X]], i32 1
296 ; CHECK-NEXT: [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath !9
297 ; CHECK-NEXT: [[X_I2:%.*]] = extractelement <4 x float> [[X]], i32 2
298 ; CHECK-NEXT: [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath !9
299 ; CHECK-NEXT: [[X_I3:%.*]] = extractelement <4 x float> [[X]], i32 3
300 ; CHECK-NEXT: [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath !9
301 ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i32 0
302 ; CHECK-NEXT: [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
303 ; CHECK-NEXT: [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i32 2
304 ; CHECK-NEXT: [[RES:%.*]] = insertelement <4 x float> [[RES_UPTO2]], float [[RES_I3]], i32 3
305 ; CHECK-NEXT: ret <4 x float> [[RES]]
307 %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>,
312 ; Check that random metadata isn't kept.
313 define void @f7(<4 x i32> *%src, <4 x i32> *%dst) {
315 ; CHECK-NEXT: [[DST_I0:%.*]] = bitcast <4 x i32>* [[DST:%.*]] to i32*
316 ; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 1
317 ; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 2
318 ; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, i32* [[DST_I0]], i32 3
319 ; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x i32>* [[SRC:%.*]] to i32*
320 ; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, i32* [[SRC_I0]], align 16
321 ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 1
322 ; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, i32* [[SRC_I1]], align 4
323 ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 2
324 ; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, i32* [[SRC_I2]], align 8
325 ; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, i32* [[SRC_I0]], i32 3
326 ; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, i32* [[SRC_I3]], align 4
327 ; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
328 ; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
329 ; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
330 ; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
331 ; CHECK-NEXT: store i32 [[ADD_I0]], i32* [[DST_I0]], align 16
332 ; CHECK-NEXT: store i32 [[ADD_I1]], i32* [[DST_I1]], align 4
333 ; CHECK-NEXT: store i32 [[ADD_I2]], i32* [[DST_I2]], align 8
334 ; CHECK-NEXT: store i32 [[ADD_I3]], i32* [[DST_I3]], align 4
335 ; CHECK-NEXT: ret void
337 %val = load <4 x i32> , <4 x i32> *%src, !foo !5
338 %add = add <4 x i32> %val, %val
339 store <4 x i32> %add, <4 x i32> *%dst, !foo !5
343 ; Test GEP with vectors.
344 define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0,
346 ; CHECK-NEXT: [[DEST_I0:%.*]] = bitcast <4 x float*>* [[DEST:%.*]] to float**
347 ; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 1
348 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 2
349 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 3
350 ; CHECK-NEXT: [[PTR0_I0:%.*]] = extractelement <4 x float*> [[PTR0:%.*]], i32 0
351 ; CHECK-NEXT: [[PTR0_I2:%.*]] = extractelement <4 x float*> [[PTR0]], i32 2
352 ; CHECK-NEXT: [[PTR0_I3:%.*]] = extractelement <4 x float*> [[PTR0]], i32 3
353 ; CHECK-NEXT: [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i32 1
354 ; CHECK-NEXT: [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i32 3
355 ; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr float, float* [[PTR0_I0]], i32 100
356 ; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr float, float* [[OTHER:%.*]], i32 [[I0_I1]]
357 ; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr float, float* [[PTR0_I2]], i32 100
358 ; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr float, float* [[PTR0_I3]], i32 [[I0_I3]]
359 ; CHECK-NEXT: store float* [[VAL_I0]], float** [[DEST_I0]], align 32
360 ; CHECK-NEXT: store float* [[VAL_I1]], float** [[DEST_I1]], align 8
361 ; CHECK-NEXT: store float* [[VAL_I2]], float** [[DEST_I2]], align 16
362 ; CHECK-NEXT: store float* [[VAL_I3]], float** [[DEST_I3]], align 8
363 ; CHECK-NEXT: ret void
366 %i1 = insertelement <4 x i32> %i0, i32 100, i32 0
367 %i2 = insertelement <4 x i32> %i1, i32 100, i32 2
368 %ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1
369 %val = getelementptr float, <4 x float *> %ptr1, <4 x i32> %i2
370 store <4 x float *> %val, <4 x float *> *%dest
374 ; Test the handling of unaligned loads.
375 define void @f9(<4 x float> *%dest, <4 x float> *%src) {
377 ; CHECK-NEXT: [[DEST_I0:%.*]] = bitcast <4 x float>* [[DEST:%.*]] to float*
378 ; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, float* [[DEST_I0]], i32 1
379 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, float* [[DEST_I0]], i32 2
380 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, float* [[DEST_I0]], i32 3
381 ; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x float>* [[SRC:%.*]] to float*
382 ; CHECK-NEXT: [[VAL_I0:%.*]] = load float, float* [[SRC_I0]], align 4
383 ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, float* [[SRC_I0]], i32 1
384 ; CHECK-NEXT: [[VAL_I1:%.*]] = load float, float* [[SRC_I1]], align 4
385 ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, float* [[SRC_I0]], i32 2
386 ; CHECK-NEXT: [[VAL_I2:%.*]] = load float, float* [[SRC_I2]], align 4
387 ; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr float, float* [[SRC_I0]], i32 3
388 ; CHECK-NEXT: [[VAL_I3:%.*]] = load float, float* [[SRC_I3]], align 4
389 ; CHECK-NEXT: store float [[VAL_I0]], float* [[DEST_I0]], align 8
390 ; CHECK-NEXT: store float [[VAL_I1]], float* [[DEST_I1]], align 4
391 ; CHECK-NEXT: store float [[VAL_I2]], float* [[DEST_I2]], align 8
392 ; CHECK-NEXT: store float [[VAL_I3]], float* [[DEST_I3]], align 4
393 ; CHECK-NEXT: ret void
395 %val = load <4 x float> , <4 x float> *%src, align 4
396 store <4 x float> %val, <4 x float> *%dest, align 8
400 ; ...and again with subelement alignment.
401 define void @f10(<4 x float> *%dest, <4 x float> *%src) {
403 ; CHECK-NEXT: [[DEST_I0:%.*]] = bitcast <4 x float>* [[DEST:%.*]] to float*
404 ; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, float* [[DEST_I0]], i32 1
405 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, float* [[DEST_I0]], i32 2
406 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, float* [[DEST_I0]], i32 3
407 ; CHECK-NEXT: [[SRC_I0:%.*]] = bitcast <4 x float>* [[SRC:%.*]] to float*
408 ; CHECK-NEXT: [[VAL_I0:%.*]] = load float, float* [[SRC_I0]], align 1
409 ; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, float* [[SRC_I0]], i32 1
410 ; CHECK-NEXT: [[VAL_I1:%.*]] = load float, float* [[SRC_I1]], align 1
411 ; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, float* [[SRC_I0]], i32 2
412 ; CHECK-NEXT: [[VAL_I2:%.*]] = load float, float* [[SRC_I2]], align 1
413 ; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr float, float* [[SRC_I0]], i32 3
414 ; CHECK-NEXT: [[VAL_I3:%.*]] = load float, float* [[SRC_I3]], align 1
415 ; CHECK-NEXT: store float [[VAL_I0]], float* [[DEST_I0]], align 2
416 ; CHECK-NEXT: store float [[VAL_I1]], float* [[DEST_I1]], align 2
417 ; CHECK-NEXT: store float [[VAL_I2]], float* [[DEST_I2]], align 2
418 ; CHECK-NEXT: store float [[VAL_I3]], float* [[DEST_I3]], align 2
419 ; CHECK-NEXT: ret void
421 %val = load <4 x float> , <4 x float> *%src, align 1
422 store <4 x float> %val, <4 x float> *%dest, align 2
426 ; Test that sub-byte loads aren't scalarized.
427 define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) {
429 ; CHECK-NEXT: [[SRC1:%.*]] = getelementptr <32 x i1>, <32 x i1>* [[SRC0:%.*]], i32 1
430 ; CHECK-NEXT: [[VAL0:%.*]] = load <32 x i1>, <32 x i1>* [[SRC0]], align 4
431 ; CHECK-NEXT: [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i32 0
432 ; CHECK-NEXT: [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i32 1
433 ; CHECK-NEXT: [[VAL0_I2:%.*]] = extractelement <32 x i1> [[VAL0]], i32 2
434 ; CHECK-NEXT: [[VAL0_I3:%.*]] = extractelement <32 x i1> [[VAL0]], i32 3
435 ; CHECK-NEXT: [[VAL0_I4:%.*]] = extractelement <32 x i1> [[VAL0]], i32 4
436 ; CHECK-NEXT: [[VAL0_I5:%.*]] = extractelement <32 x i1> [[VAL0]], i32 5
437 ; CHECK-NEXT: [[VAL0_I6:%.*]] = extractelement <32 x i1> [[VAL0]], i32 6
438 ; CHECK-NEXT: [[VAL0_I7:%.*]] = extractelement <32 x i1> [[VAL0]], i32 7
439 ; CHECK-NEXT: [[VAL0_I8:%.*]] = extractelement <32 x i1> [[VAL0]], i32 8
440 ; CHECK-NEXT: [[VAL0_I9:%.*]] = extractelement <32 x i1> [[VAL0]], i32 9
441 ; CHECK-NEXT: [[VAL0_I10:%.*]] = extractelement <32 x i1> [[VAL0]], i32 10
442 ; CHECK-NEXT: [[VAL0_I11:%.*]] = extractelement <32 x i1> [[VAL0]], i32 11
443 ; CHECK-NEXT: [[VAL0_I12:%.*]] = extractelement <32 x i1> [[VAL0]], i32 12
444 ; CHECK-NEXT: [[VAL0_I13:%.*]] = extractelement <32 x i1> [[VAL0]], i32 13
445 ; CHECK-NEXT: [[VAL0_I14:%.*]] = extractelement <32 x i1> [[VAL0]], i32 14
446 ; CHECK-NEXT: [[VAL0_I15:%.*]] = extractelement <32 x i1> [[VAL0]], i32 15
447 ; CHECK-NEXT: [[VAL0_I16:%.*]] = extractelement <32 x i1> [[VAL0]], i32 16
448 ; CHECK-NEXT: [[VAL0_I17:%.*]] = extractelement <32 x i1> [[VAL0]], i32 17
449 ; CHECK-NEXT: [[VAL0_I18:%.*]] = extractelement <32 x i1> [[VAL0]], i32 18
450 ; CHECK-NEXT: [[VAL0_I19:%.*]] = extractelement <32 x i1> [[VAL0]], i32 19
451 ; CHECK-NEXT: [[VAL0_I20:%.*]] = extractelement <32 x i1> [[VAL0]], i32 20
452 ; CHECK-NEXT: [[VAL0_I21:%.*]] = extractelement <32 x i1> [[VAL0]], i32 21
453 ; CHECK-NEXT: [[VAL0_I22:%.*]] = extractelement <32 x i1> [[VAL0]], i32 22
454 ; CHECK-NEXT: [[VAL0_I23:%.*]] = extractelement <32 x i1> [[VAL0]], i32 23
455 ; CHECK-NEXT: [[VAL0_I24:%.*]] = extractelement <32 x i1> [[VAL0]], i32 24
456 ; CHECK-NEXT: [[VAL0_I25:%.*]] = extractelement <32 x i1> [[VAL0]], i32 25
457 ; CHECK-NEXT: [[VAL0_I26:%.*]] = extractelement <32 x i1> [[VAL0]], i32 26
458 ; CHECK-NEXT: [[VAL0_I27:%.*]] = extractelement <32 x i1> [[VAL0]], i32 27
459 ; CHECK-NEXT: [[VAL0_I28:%.*]] = extractelement <32 x i1> [[VAL0]], i32 28
460 ; CHECK-NEXT: [[VAL0_I29:%.*]] = extractelement <32 x i1> [[VAL0]], i32 29
461 ; CHECK-NEXT: [[VAL0_I30:%.*]] = extractelement <32 x i1> [[VAL0]], i32 30
462 ; CHECK-NEXT: [[VAL0_I31:%.*]] = extractelement <32 x i1> [[VAL0]], i32 31
463 ; CHECK-NEXT: [[VAL1:%.*]] = load <32 x i1>, <32 x i1>* [[SRC1]], align 4
464 ; CHECK-NEXT: [[VAL1_I0:%.*]] = extractelement <32 x i1> [[VAL1]], i32 0
465 ; CHECK-NEXT: [[AND_I0:%.*]] = and i1 [[VAL0_I0]], [[VAL1_I0]]
466 ; CHECK-NEXT: [[VAL1_I1:%.*]] = extractelement <32 x i1> [[VAL1]], i32 1
467 ; CHECK-NEXT: [[AND_I1:%.*]] = and i1 [[VAL0_I1]], [[VAL1_I1]]
468 ; CHECK-NEXT: [[VAL1_I2:%.*]] = extractelement <32 x i1> [[VAL1]], i32 2
469 ; CHECK-NEXT: [[AND_I2:%.*]] = and i1 [[VAL0_I2]], [[VAL1_I2]]
470 ; CHECK-NEXT: [[VAL1_I3:%.*]] = extractelement <32 x i1> [[VAL1]], i32 3
471 ; CHECK-NEXT: [[AND_I3:%.*]] = and i1 [[VAL0_I3]], [[VAL1_I3]]
472 ; CHECK-NEXT: [[VAL1_I4:%.*]] = extractelement <32 x i1> [[VAL1]], i32 4
473 ; CHECK-NEXT: [[AND_I4:%.*]] = and i1 [[VAL0_I4]], [[VAL1_I4]]
474 ; CHECK-NEXT: [[VAL1_I5:%.*]] = extractelement <32 x i1> [[VAL1]], i32 5
475 ; CHECK-NEXT: [[AND_I5:%.*]] = and i1 [[VAL0_I5]], [[VAL1_I5]]
476 ; CHECK-NEXT: [[VAL1_I6:%.*]] = extractelement <32 x i1> [[VAL1]], i32 6
477 ; CHECK-NEXT: [[AND_I6:%.*]] = and i1 [[VAL0_I6]], [[VAL1_I6]]
478 ; CHECK-NEXT: [[VAL1_I7:%.*]] = extractelement <32 x i1> [[VAL1]], i32 7
479 ; CHECK-NEXT: [[AND_I7:%.*]] = and i1 [[VAL0_I7]], [[VAL1_I7]]
480 ; CHECK-NEXT: [[VAL1_I8:%.*]] = extractelement <32 x i1> [[VAL1]], i32 8
481 ; CHECK-NEXT: [[AND_I8:%.*]] = and i1 [[VAL0_I8]], [[VAL1_I8]]
482 ; CHECK-NEXT: [[VAL1_I9:%.*]] = extractelement <32 x i1> [[VAL1]], i32 9
483 ; CHECK-NEXT: [[AND_I9:%.*]] = and i1 [[VAL0_I9]], [[VAL1_I9]]
484 ; CHECK-NEXT: [[VAL1_I10:%.*]] = extractelement <32 x i1> [[VAL1]], i32 10
485 ; CHECK-NEXT: [[AND_I10:%.*]] = and i1 [[VAL0_I10]], [[VAL1_I10]]
486 ; CHECK-NEXT: [[VAL1_I11:%.*]] = extractelement <32 x i1> [[VAL1]], i32 11
487 ; CHECK-NEXT: [[AND_I11:%.*]] = and i1 [[VAL0_I11]], [[VAL1_I11]]
488 ; CHECK-NEXT: [[VAL1_I12:%.*]] = extractelement <32 x i1> [[VAL1]], i32 12
489 ; CHECK-NEXT: [[AND_I12:%.*]] = and i1 [[VAL0_I12]], [[VAL1_I12]]
490 ; CHECK-NEXT: [[VAL1_I13:%.*]] = extractelement <32 x i1> [[VAL1]], i32 13
491 ; CHECK-NEXT: [[AND_I13:%.*]] = and i1 [[VAL0_I13]], [[VAL1_I13]]
492 ; CHECK-NEXT: [[VAL1_I14:%.*]] = extractelement <32 x i1> [[VAL1]], i32 14
493 ; CHECK-NEXT: [[AND_I14:%.*]] = and i1 [[VAL0_I14]], [[VAL1_I14]]
494 ; CHECK-NEXT: [[VAL1_I15:%.*]] = extractelement <32 x i1> [[VAL1]], i32 15
495 ; CHECK-NEXT: [[AND_I15:%.*]] = and i1 [[VAL0_I15]], [[VAL1_I15]]
496 ; CHECK-NEXT: [[VAL1_I16:%.*]] = extractelement <32 x i1> [[VAL1]], i32 16
497 ; CHECK-NEXT: [[AND_I16:%.*]] = and i1 [[VAL0_I16]], [[VAL1_I16]]
498 ; CHECK-NEXT: [[VAL1_I17:%.*]] = extractelement <32 x i1> [[VAL1]], i32 17
499 ; CHECK-NEXT: [[AND_I17:%.*]] = and i1 [[VAL0_I17]], [[VAL1_I17]]
500 ; CHECK-NEXT: [[VAL1_I18:%.*]] = extractelement <32 x i1> [[VAL1]], i32 18
501 ; CHECK-NEXT: [[AND_I18:%.*]] = and i1 [[VAL0_I18]], [[VAL1_I18]]
502 ; CHECK-NEXT: [[VAL1_I19:%.*]] = extractelement <32 x i1> [[VAL1]], i32 19
503 ; CHECK-NEXT: [[AND_I19:%.*]] = and i1 [[VAL0_I19]], [[VAL1_I19]]
504 ; CHECK-NEXT: [[VAL1_I20:%.*]] = extractelement <32 x i1> [[VAL1]], i32 20
505 ; CHECK-NEXT: [[AND_I20:%.*]] = and i1 [[VAL0_I20]], [[VAL1_I20]]
506 ; CHECK-NEXT: [[VAL1_I21:%.*]] = extractelement <32 x i1> [[VAL1]], i32 21
507 ; CHECK-NEXT: [[AND_I21:%.*]] = and i1 [[VAL0_I21]], [[VAL1_I21]]
508 ; CHECK-NEXT: [[VAL1_I22:%.*]] = extractelement <32 x i1> [[VAL1]], i32 22
509 ; CHECK-NEXT: [[AND_I22:%.*]] = and i1 [[VAL0_I22]], [[VAL1_I22]]
510 ; CHECK-NEXT: [[VAL1_I23:%.*]] = extractelement <32 x i1> [[VAL1]], i32 23
511 ; CHECK-NEXT: [[AND_I23:%.*]] = and i1 [[VAL0_I23]], [[VAL1_I23]]
512 ; CHECK-NEXT: [[VAL1_I24:%.*]] = extractelement <32 x i1> [[VAL1]], i32 24
513 ; CHECK-NEXT: [[AND_I24:%.*]] = and i1 [[VAL0_I24]], [[VAL1_I24]]
514 ; CHECK-NEXT: [[VAL1_I25:%.*]] = extractelement <32 x i1> [[VAL1]], i32 25
515 ; CHECK-NEXT: [[AND_I25:%.*]] = and i1 [[VAL0_I25]], [[VAL1_I25]]
516 ; CHECK-NEXT: [[VAL1_I26:%.*]] = extractelement <32 x i1> [[VAL1]], i32 26
517 ; CHECK-NEXT: [[AND_I26:%.*]] = and i1 [[VAL0_I26]], [[VAL1_I26]]
518 ; CHECK-NEXT: [[VAL1_I27:%.*]] = extractelement <32 x i1> [[VAL1]], i32 27
519 ; CHECK-NEXT: [[AND_I27:%.*]] = and i1 [[VAL0_I27]], [[VAL1_I27]]
520 ; CHECK-NEXT: [[VAL1_I28:%.*]] = extractelement <32 x i1> [[VAL1]], i32 28
521 ; CHECK-NEXT: [[AND_I28:%.*]] = and i1 [[VAL0_I28]], [[VAL1_I28]]
522 ; CHECK-NEXT: [[VAL1_I29:%.*]] = extractelement <32 x i1> [[VAL1]], i32 29
523 ; CHECK-NEXT: [[AND_I29:%.*]] = and i1 [[VAL0_I29]], [[VAL1_I29]]
524 ; CHECK-NEXT: [[VAL1_I30:%.*]] = extractelement <32 x i1> [[VAL1]], i32 30
525 ; CHECK-NEXT: [[AND_I30:%.*]] = and i1 [[VAL0_I30]], [[VAL1_I30]]
526 ; CHECK-NEXT: [[VAL1_I31:%.*]] = extractelement <32 x i1> [[VAL1]], i32 31
527 ; CHECK-NEXT: [[AND_I31:%.*]] = and i1 [[VAL0_I31]], [[VAL1_I31]]
528 ; CHECK-NEXT: [[AND_UPTO0:%.*]] = insertelement <32 x i1> poison, i1 [[AND_I0]], i32 0
529 ; CHECK-NEXT: [[AND_UPTO1:%.*]] = insertelement <32 x i1> [[AND_UPTO0]], i1 [[AND_I1]], i32 1
530 ; CHECK-NEXT: [[AND_UPTO2:%.*]] = insertelement <32 x i1> [[AND_UPTO1]], i1 [[AND_I2]], i32 2
531 ; CHECK-NEXT: [[AND_UPTO3:%.*]] = insertelement <32 x i1> [[AND_UPTO2]], i1 [[AND_I3]], i32 3
532 ; CHECK-NEXT: [[AND_UPTO4:%.*]] = insertelement <32 x i1> [[AND_UPTO3]], i1 [[AND_I4]], i32 4
533 ; CHECK-NEXT: [[AND_UPTO5:%.*]] = insertelement <32 x i1> [[AND_UPTO4]], i1 [[AND_I5]], i32 5
534 ; CHECK-NEXT: [[AND_UPTO6:%.*]] = insertelement <32 x i1> [[AND_UPTO5]], i1 [[AND_I6]], i32 6
535 ; CHECK-NEXT: [[AND_UPTO7:%.*]] = insertelement <32 x i1> [[AND_UPTO6]], i1 [[AND_I7]], i32 7
536 ; CHECK-NEXT: [[AND_UPTO8:%.*]] = insertelement <32 x i1> [[AND_UPTO7]], i1 [[AND_I8]], i32 8
537 ; CHECK-NEXT: [[AND_UPTO9:%.*]] = insertelement <32 x i1> [[AND_UPTO8]], i1 [[AND_I9]], i32 9
538 ; CHECK-NEXT: [[AND_UPTO10:%.*]] = insertelement <32 x i1> [[AND_UPTO9]], i1 [[AND_I10]], i32 10
539 ; CHECK-NEXT: [[AND_UPTO11:%.*]] = insertelement <32 x i1> [[AND_UPTO10]], i1 [[AND_I11]], i32 11
540 ; CHECK-NEXT: [[AND_UPTO12:%.*]] = insertelement <32 x i1> [[AND_UPTO11]], i1 [[AND_I12]], i32 12
541 ; CHECK-NEXT: [[AND_UPTO13:%.*]] = insertelement <32 x i1> [[AND_UPTO12]], i1 [[AND_I13]], i32 13
542 ; CHECK-NEXT: [[AND_UPTO14:%.*]] = insertelement <32 x i1> [[AND_UPTO13]], i1 [[AND_I14]], i32 14
543 ; CHECK-NEXT: [[AND_UPTO15:%.*]] = insertelement <32 x i1> [[AND_UPTO14]], i1 [[AND_I15]], i32 15
544 ; CHECK-NEXT: [[AND_UPTO16:%.*]] = insertelement <32 x i1> [[AND_UPTO15]], i1 [[AND_I16]], i32 16
545 ; CHECK-NEXT: [[AND_UPTO17:%.*]] = insertelement <32 x i1> [[AND_UPTO16]], i1 [[AND_I17]], i32 17
546 ; CHECK-NEXT: [[AND_UPTO18:%.*]] = insertelement <32 x i1> [[AND_UPTO17]], i1 [[AND_I18]], i32 18
547 ; CHECK-NEXT: [[AND_UPTO19:%.*]] = insertelement <32 x i1> [[AND_UPTO18]], i1 [[AND_I19]], i32 19
548 ; CHECK-NEXT: [[AND_UPTO20:%.*]] = insertelement <32 x i1> [[AND_UPTO19]], i1 [[AND_I20]], i32 20
549 ; CHECK-NEXT: [[AND_UPTO21:%.*]] = insertelement <32 x i1> [[AND_UPTO20]], i1 [[AND_I21]], i32 21
550 ; CHECK-NEXT: [[AND_UPTO22:%.*]] = insertelement <32 x i1> [[AND_UPTO21]], i1 [[AND_I22]], i32 22
551 ; CHECK-NEXT: [[AND_UPTO23:%.*]] = insertelement <32 x i1> [[AND_UPTO22]], i1 [[AND_I23]], i32 23
552 ; CHECK-NEXT: [[AND_UPTO24:%.*]] = insertelement <32 x i1> [[AND_UPTO23]], i1 [[AND_I24]], i32 24
553 ; CHECK-NEXT: [[AND_UPTO25:%.*]] = insertelement <32 x i1> [[AND_UPTO24]], i1 [[AND_I25]], i32 25
554 ; CHECK-NEXT: [[AND_UPTO26:%.*]] = insertelement <32 x i1> [[AND_UPTO25]], i1 [[AND_I26]], i32 26
555 ; CHECK-NEXT: [[AND_UPTO27:%.*]] = insertelement <32 x i1> [[AND_UPTO26]], i1 [[AND_I27]], i32 27
556 ; CHECK-NEXT: [[AND_UPTO28:%.*]] = insertelement <32 x i1> [[AND_UPTO27]], i1 [[AND_I28]], i32 28
557 ; CHECK-NEXT: [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i32 29
558 ; CHECK-NEXT: [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i32 30
559 ; CHECK-NEXT: [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i32 31
560 ; CHECK-NEXT: store <32 x i1> [[AND]], <32 x i1>* [[DEST:%.*]], align 4
561 ; CHECK-NEXT: ret void
563 %src1 = getelementptr <32 x i1>, <32 x i1> *%src0, i32 1
564 %val0 = load <32 x i1> , <32 x i1> *%src0
565 %val1 = load <32 x i1> , <32 x i1> *%src1
566 %and = and <32 x i1> %val0, %val1
567 store <32 x i1> %and, <32 x i1> *%dest
571 ; Test vector GEPs with more than one index.
572 define void @f13(<4 x float *> *%dest, <4 x [4 x float] *> %ptr, <4 x i32> %i,
574 ; CHECK-NEXT: [[DEST_I0:%.*]] = bitcast <4 x float*>* [[DEST:%.*]] to float**
575 ; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 1
576 ; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 2
577 ; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float*, float** [[DEST_I0]], i32 3
578 ; CHECK-NEXT: [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i32 0
579 ; CHECK-NEXT: [[PTR_I0:%.*]] = extractelement <4 x [4 x float]*> [[PTR:%.*]], i32 0
580 ; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I0]], i32 0, i32 [[I_I0]]
581 ; CHECK-NEXT: [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i32 1
582 ; CHECK-NEXT: [[PTR_I1:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 1
583 ; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I1]], i32 1, i32 [[I_I1]]
584 ; CHECK-NEXT: [[I_I2:%.*]] = extractelement <4 x i32> [[I]], i32 2
585 ; CHECK-NEXT: [[PTR_I2:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 2
586 ; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I2]], i32 2, i32 [[I_I2]]
587 ; CHECK-NEXT: [[I_I3:%.*]] = extractelement <4 x i32> [[I]], i32 3
588 ; CHECK-NEXT: [[PTR_I3:%.*]] = extractelement <4 x [4 x float]*> [[PTR]], i32 3
589 ; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[PTR_I3]], i32 3, i32 [[I_I3]]
590 ; CHECK-NEXT: store float* [[VAL_I0]], float** [[DEST_I0]], align 32
591 ; CHECK-NEXT: store float* [[VAL_I1]], float** [[DEST_I1]], align 8
592 ; CHECK-NEXT: store float* [[VAL_I2]], float** [[DEST_I2]], align 16
593 ; CHECK-NEXT: store float* [[VAL_I3]], float** [[DEST_I3]], align 8
594 ; CHECK-NEXT: ret void
597 %val = getelementptr inbounds [4 x float], <4 x [4 x float] *> %ptr,
598 <4 x i32> <i32 0, i32 1, i32 2, i32 3>,
600 store <4 x float *> %val, <4 x float *> *%dest
604 ; Test combinations of vector and non-vector PHIs.
605 define <4 x float> @f14(<4 x float> %acc, i32 %count) {
608 ; CHECK-NEXT: [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i32 0
609 ; CHECK-NEXT: [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i32 1
610 ; CHECK-NEXT: [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i32 2
611 ; CHECK-NEXT: [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i32 3
612 ; CHECK-NEXT: br label [[LOOP:%.*]]
614 ; CHECK-NEXT: [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ]
615 ; CHECK-NEXT: [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], [[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], [[LOOP]] ]
616 ; CHECK-NEXT: [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ]
617 ; CHECK-NEXT: [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ]
618 ; CHECK-NEXT: [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ]
619 ; CHECK-NEXT: [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i32 0
620 ; CHECK-NEXT: [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i32 1
621 ; CHECK-NEXT: [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i32 2
622 ; CHECK-NEXT: [[THIS_ACC:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO2]], float [[THIS_ACC_I3]], i32 3
623 ; CHECK-NEXT: [[FOO:%.*]] = call <4 x float> @ext(<4 x float> [[THIS_ACC]])
624 ; CHECK-NEXT: [[FOO_I0:%.*]] = extractelement <4 x float> [[FOO]], i32 0
625 ; CHECK-NEXT: [[NEXT_ACC_I0]] = fadd float [[THIS_ACC_I0]], [[FOO_I0]]
626 ; CHECK-NEXT: [[FOO_I1:%.*]] = extractelement <4 x float> [[FOO]], i32 1
627 ; CHECK-NEXT: [[NEXT_ACC_I1]] = fadd float [[THIS_ACC_I1]], [[FOO_I1]]
628 ; CHECK-NEXT: [[FOO_I2:%.*]] = extractelement <4 x float> [[FOO]], i32 2
629 ; CHECK-NEXT: [[NEXT_ACC_I2]] = fadd float [[THIS_ACC_I2]], [[FOO_I2]]
630 ; CHECK-NEXT: [[FOO_I3:%.*]] = extractelement <4 x float> [[FOO]], i32 3
631 ; CHECK-NEXT: [[NEXT_ACC_I3]] = fadd float [[THIS_ACC_I3]], [[FOO_I3]]
632 ; CHECK-NEXT: [[NEXT_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEXT_ACC_I0]], i32 0
633 ; CHECK-NEXT: [[NEXT_ACC_UPTO1:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO0]], float [[NEXT_ACC_I1]], i32 1
634 ; CHECK-NEXT: [[NEXT_ACC_UPTO2:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO1]], float [[NEXT_ACC_I2]], i32 2
635 ; CHECK-NEXT: [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i32 3
636 ; CHECK-NEXT: [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1
637 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0
638 ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
640 ; CHECK-NEXT: ret <4 x float> [[NEXT_ACC]]
646 %this_acc = phi <4 x float> [ %acc, %entry ], [ %next_acc, %loop ]
647 %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
648 %foo = call <4 x float> @ext(<4 x float> %this_acc)
649 %next_acc = fadd <4 x float> %this_acc, %foo
650 %next_count = sub i32 %this_count, 1
651 %cmp = icmp eq i32 %next_count, 0
652 br i1 %cmp, label %loop, label %exit
655 ret <4 x float> %next_acc
658 ; Test unary operator scalarization.
659 define void @f15(<4 x float> %init, <4 x float> *%base, i32 %count) {
662 ; CHECK-NEXT: br label [[LOOP:%.*]]
664 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
665 ; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1
666 ; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, <4 x float>* [[BASE:%.*]], i32 [[I]]
667 ; CHECK-NEXT: [[PTR_I0:%.*]] = bitcast <4 x float>* [[PTR]] to float*
668 ; CHECK-NEXT: [[VAL_I0:%.*]] = load float, float* [[PTR_I0]], align 16
669 ; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, float* [[PTR_I0]], i32 1
670 ; CHECK-NEXT: [[VAL_I1:%.*]] = load float, float* [[PTR_I1]], align 4
671 ; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr float, float* [[PTR_I0]], i32 2
672 ; CHECK-NEXT: [[VAL_I2:%.*]] = load float, float* [[PTR_I2]], align 8
673 ; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr float, float* [[PTR_I0]], i32 3
674 ; CHECK-NEXT: [[VAL_I3:%.*]] = load float, float* [[PTR_I3]], align 4
675 ; CHECK-NEXT: [[NEG_I0:%.*]] = fneg float [[VAL_I0]]
676 ; CHECK-NEXT: [[NEG_I1:%.*]] = fneg float [[VAL_I1]]
677 ; CHECK-NEXT: [[NEG_I2:%.*]] = fneg float [[VAL_I2]]
678 ; CHECK-NEXT: [[NEG_I3:%.*]] = fneg float [[VAL_I3]]
679 ; CHECK-NEXT: [[NEG_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEG_I0]], i32 0
680 ; CHECK-NEXT: [[NEG_UPTO1:%.*]] = insertelement <4 x float> [[NEG_UPTO0]], float [[NEG_I1]], i32 1
681 ; CHECK-NEXT: [[NEG_UPTO2:%.*]] = insertelement <4 x float> [[NEG_UPTO1]], float [[NEG_I2]], i32 2
682 ; CHECK-NEXT: [[NEG:%.*]] = insertelement <4 x float> [[NEG_UPTO2]], float [[NEG_I3]], i32 3
683 ; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[NEG]])
684 ; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i32 0
685 ; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00
686 ; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i32 1
687 ; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00
688 ; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i32 2
689 ; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00
690 ; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i32 3
691 ; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00
692 ; CHECK-NEXT: [[SEL_I0:%.*]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00
693 ; CHECK-NEXT: [[SEL_I1:%.*]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00
694 ; CHECK-NEXT: [[SEL_I2:%.*]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00
695 ; CHECK-NEXT: [[SEL_I3:%.*]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00
696 ; CHECK-NEXT: store float [[SEL_I0]], float* [[PTR_I0]], align 16
697 ; CHECK-NEXT: store float [[SEL_I1]], float* [[PTR_I1]], align 4
698 ; CHECK-NEXT: store float [[SEL_I2]], float* [[PTR_I2]], align 8
699 ; CHECK-NEXT: store float [[SEL_I3]], float* [[PTR_I3]], align 4
700 ; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
701 ; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
703 ; CHECK-NEXT: ret void
709 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
710 %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
711 %nexti = sub i32 %i, 1
713 %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i
714 %val = load <4 x float> , <4 x float> *%ptr
715 %neg = fneg <4 x float> %val
716 %call = call <4 x float> @ext(<4 x float> %neg)
717 %cmp = fcmp ogt <4 x float> %call,
718 <float 1.0, float 2.0, float 3.0, float 4.0>
719 %sel = select <4 x i1> %cmp, <4 x float> %call,
720 <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
721 store <4 x float> %sel, <4 x float> *%ptr
723 %test = icmp eq i32 %nexti, 0
724 br i1 %test, label %loop, label %exit
730 ; Check that IR flags are preserved.
731 define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) {
733 ; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0
734 ; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0
735 ; CHECK-NEXT: [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]]
736 ; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1
737 ; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1
738 ; CHECK-NEXT: [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]]
739 ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0
740 ; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1
741 ; CHECK-NEXT: ret <2 x i32> [[RES]]
743 %res = add nuw nsw <2 x i32> %i, %j
746 define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) {
748 ; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0
749 ; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0
750 ; CHECK-NEXT: [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]]
751 ; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1
752 ; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1
753 ; CHECK-NEXT: [[RES_I1:%.*]] = sdiv exact i32 [[I_I1]], [[J_I1]]
754 ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i32 0
755 ; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1
756 ; CHECK-NEXT: ret <2 x i32> [[RES]]
758 %res = sdiv exact <2 x i32> %i, %j
761 define <2 x float> @f18(<2 x float> %x, <2 x float> %y) {
763 ; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
764 ; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0
765 ; CHECK-NEXT: [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]]
766 ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
767 ; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1
768 ; CHECK-NEXT: [[RES_I1:%.*]] = fadd fast float [[X_I1]], [[Y_I1]]
769 ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0
770 ; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
771 ; CHECK-NEXT: ret <2 x float> [[RES]]
773 %res = fadd fast <2 x float> %x, %y
776 define <2 x float> @f19(<2 x float> %x) {
778 ; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
779 ; CHECK-NEXT: [[RES_I0:%.*]] = fneg fast float [[X_I0]]
780 ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
781 ; CHECK-NEXT: [[RES_I1:%.*]] = fneg fast float [[X_I1]]
782 ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0
783 ; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
784 ; CHECK-NEXT: ret <2 x float> [[RES]]
786 %res = fneg fast <2 x float> %x
789 define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) {
791 ; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
792 ; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0
793 ; CHECK-NEXT: [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]]
794 ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
795 ; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1
796 ; CHECK-NEXT: [[RES_I1:%.*]] = fcmp fast ogt float [[X_I1]], [[Y_I1]]
797 ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[RES_I0]], i32 0
798 ; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i1> [[RES_UPTO0]], i1 [[RES_I1]], i32 1
799 ; CHECK-NEXT: ret <2 x i1> [[RES]]
801 %res = fcmp fast ogt <2 x float> %x, %y
804 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
805 define <2 x float> @f21(<2 x float> %x) {
807 ; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
808 ; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]])
809 ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
810 ; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]])
811 ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0
812 ; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
813 ; CHECK-NEXT: ret <2 x float> [[RES]]
815 %res = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
818 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
819 define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
821 ; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0
822 ; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i32 0
823 ; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i32 0
824 ; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]])
825 ; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i32 1
826 ; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i32 1
827 ; CHECK-NEXT: [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i32 1
828 ; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.fma.f32(float [[X_I1]], float [[Y_I1]], float [[Z_I1]])
829 ; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i32 0
830 ; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i32 1
831 ; CHECK-NEXT: ret <2 x float> [[RES]]
833 %res = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z)
837 ; See https://reviews.llvm.org/D83101#2133062
838 define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) {
839 ; CHECK-LABEL: @f23_crash(
840 ; CHECK-NEXT: [[V0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i32 0
841 ; CHECK-NEXT: [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[V0]], i32 0
842 ; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i32 1
843 ; CHECK-NEXT: ret <2 x i32> [[T1]]
845 %v0 = extractelement <2 x i32> %srcvec, i32 0
846 %t0 = insertelement <2 x i32> poison, i32 %v0, i32 0
847 %t1 = insertelement <2 x i32> %t0, i32 %v1, i32 1
852 !1 = !{ !"set1", !0 }
853 !2 = !{ !"set2", !0 }
854 !3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} }
856 !5 = !{ i64 0, i64 8, null }