1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mcpu=corei7 -O1 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1
3 ; RUN: opt < %s -mcpu=corei7 -O2 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O2
4 ; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-threshold=150 -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3
5 ; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DEFAULT
6 ; RUN: opt < %s -mcpu=corei7 -Os -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Os
7 ; RUN: opt < %s -mcpu=corei7 -Oz -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Oz
8 ; RUN: opt < %s -mcpu=corei7 -O1 -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC2
9 ; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2
10 ; RUN: opt < %s -mcpu=corei7 -O3 -unroll-threshold=150 -vectorize-loops=false -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS
11 ; RUN: opt < %s -mcpu=corei7 -O1 -loop-vectorize -S -unroll-allow-partial=0 -enable-new-pm=1 | FileCheck %s --check-prefix=O1VEC2
13 ; This file tests the llvm.loop.vectorize.enable metadata forcing
14 ; vectorization even when optimization levels are too low, or when
15 ; vectorization is disabled.
17 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
18 target triple = "x86_64-unknown-linux-gnu"
20 define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
23 ; O1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
24 ; O1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
25 ; O1-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
26 ; O1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
27 ; O1-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
28 ; O1-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
29 ; O1-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
30 ; O1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4
31 ; O1-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
32 ; O1-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
33 ; O1-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
34 ; O1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
35 ; O1-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
36 ; O1-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4
37 ; O1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8
38 ; O1-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
39 ; O1-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
40 ; O1-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
41 ; O1-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8
42 ; O1-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
43 ; O1-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4
44 ; O1-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12
45 ; O1-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
46 ; O1-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
47 ; O1-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
48 ; O1-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
49 ; O1-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
50 ; O1-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4
51 ; O1-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16
52 ; O1-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
53 ; O1-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4
54 ; O1-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
55 ; O1-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16
56 ; O1-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
57 ; O1-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4
58 ; O1-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20
59 ; O1-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
60 ; O1-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4
61 ; O1-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
62 ; O1-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20
63 ; O1-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
64 ; O1-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4
65 ; O1-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24
66 ; O1-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>*
67 ; O1-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4
68 ; O1-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
69 ; O1-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24
70 ; O1-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>*
71 ; O1-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4
72 ; O1-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28
73 ; O1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>*
74 ; O1-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4
75 ; O1-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
76 ; O1-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28
77 ; O1-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>*
78 ; O1-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4
79 ; O1-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32
80 ; O1-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>*
81 ; O1-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4
82 ; O1-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]]
83 ; O1-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32
84 ; O1-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>*
85 ; O1-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4
86 ; O1-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36
87 ; O1-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>*
88 ; O1-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4
89 ; O1-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]]
90 ; O1-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36
91 ; O1-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>*
92 ; O1-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4
93 ; O1-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40
94 ; O1-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
95 ; O1-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4
96 ; O1-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]]
97 ; O1-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40
98 ; O1-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>*
99 ; O1-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4
100 ; O1-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44
101 ; O1-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>*
102 ; O1-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4
103 ; O1-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]]
104 ; O1-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44
105 ; O1-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>*
106 ; O1-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4
107 ; O1-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48
108 ; O1-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>*
109 ; O1-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4
110 ; O1-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]]
111 ; O1-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48
112 ; O1-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>*
113 ; O1-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4
114 ; O1-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52
115 ; O1-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>*
116 ; O1-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4
117 ; O1-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]]
118 ; O1-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52
119 ; O1-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>*
120 ; O1-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4
121 ; O1-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56
122 ; O1-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
123 ; O1-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4
124 ; O1-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]]
125 ; O1-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56
126 ; O1-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
127 ; O1-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4
128 ; O1-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60
129 ; O1-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
130 ; O1-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4
131 ; O1-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]]
132 ; O1-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60
133 ; O1-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>*
134 ; O1-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4
135 ; O1-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4
136 ; O1-NEXT: ret i32 [[TMP78]]
138 ; O2-LABEL: @enabled(
140 ; O2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
141 ; O2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
142 ; O2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
143 ; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
144 ; O2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
145 ; O2-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
146 ; O2-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
147 ; O2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4
148 ; O2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
149 ; O2-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
150 ; O2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
151 ; O2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
152 ; O2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
153 ; O2-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4
154 ; O2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8
155 ; O2-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
156 ; O2-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
157 ; O2-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
158 ; O2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8
159 ; O2-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
160 ; O2-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4
161 ; O2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12
162 ; O2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
163 ; O2-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
164 ; O2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
165 ; O2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
166 ; O2-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
167 ; O2-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4
168 ; O2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16
169 ; O2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
170 ; O2-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4
171 ; O2-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
172 ; O2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16
173 ; O2-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
174 ; O2-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4
175 ; O2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20
176 ; O2-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
177 ; O2-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4
178 ; O2-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
179 ; O2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20
180 ; O2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
181 ; O2-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4
182 ; O2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24
183 ; O2-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>*
184 ; O2-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4
185 ; O2-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
186 ; O2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24
187 ; O2-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>*
188 ; O2-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4
189 ; O2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28
190 ; O2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>*
191 ; O2-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4
192 ; O2-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
193 ; O2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28
194 ; O2-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>*
195 ; O2-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4
196 ; O2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32
197 ; O2-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>*
198 ; O2-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4
199 ; O2-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]]
200 ; O2-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32
201 ; O2-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>*
202 ; O2-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4
203 ; O2-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36
204 ; O2-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>*
205 ; O2-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4
206 ; O2-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]]
207 ; O2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36
208 ; O2-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>*
209 ; O2-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4
210 ; O2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40
211 ; O2-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
212 ; O2-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4
213 ; O2-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]]
214 ; O2-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40
215 ; O2-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>*
216 ; O2-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4
217 ; O2-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44
218 ; O2-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>*
219 ; O2-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4
220 ; O2-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]]
221 ; O2-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44
222 ; O2-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>*
223 ; O2-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4
224 ; O2-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48
225 ; O2-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>*
226 ; O2-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4
227 ; O2-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]]
228 ; O2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48
229 ; O2-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>*
230 ; O2-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4
231 ; O2-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52
232 ; O2-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>*
233 ; O2-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4
234 ; O2-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]]
235 ; O2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52
236 ; O2-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>*
237 ; O2-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4
238 ; O2-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56
239 ; O2-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
240 ; O2-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4
241 ; O2-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]]
242 ; O2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56
243 ; O2-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
244 ; O2-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4
245 ; O2-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60
246 ; O2-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
247 ; O2-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4
248 ; O2-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]]
249 ; O2-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60
250 ; O2-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>*
251 ; O2-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4
252 ; O2-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4
253 ; O2-NEXT: ret i32 [[TMP78]]
255 ; O3-LABEL: @enabled(
257 ; O3-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
258 ; O3-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
259 ; O3-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
260 ; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
261 ; O3-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
262 ; O3-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
263 ; O3-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
264 ; O3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4
265 ; O3-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
266 ; O3-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
267 ; O3-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
268 ; O3-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
269 ; O3-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
270 ; O3-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4
271 ; O3-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8
272 ; O3-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
273 ; O3-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
274 ; O3-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
275 ; O3-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8
276 ; O3-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
277 ; O3-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4
278 ; O3-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12
279 ; O3-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
280 ; O3-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
281 ; O3-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
282 ; O3-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
283 ; O3-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
284 ; O3-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4
285 ; O3-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16
286 ; O3-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
287 ; O3-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4
288 ; O3-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
289 ; O3-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16
290 ; O3-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
291 ; O3-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4
292 ; O3-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20
293 ; O3-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
294 ; O3-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4
295 ; O3-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
296 ; O3-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20
297 ; O3-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
298 ; O3-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4
299 ; O3-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24
300 ; O3-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>*
301 ; O3-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4
302 ; O3-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
303 ; O3-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24
304 ; O3-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>*
305 ; O3-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4
306 ; O3-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28
307 ; O3-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>*
308 ; O3-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4
309 ; O3-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
310 ; O3-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28
311 ; O3-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>*
312 ; O3-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4
313 ; O3-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32
314 ; O3-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>*
315 ; O3-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4
316 ; O3-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]]
317 ; O3-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32
318 ; O3-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>*
319 ; O3-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4
320 ; O3-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36
321 ; O3-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>*
322 ; O3-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4
323 ; O3-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]]
324 ; O3-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36
325 ; O3-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>*
326 ; O3-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4
327 ; O3-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40
328 ; O3-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
329 ; O3-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4
330 ; O3-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]]
331 ; O3-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40
332 ; O3-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>*
333 ; O3-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4
334 ; O3-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44
335 ; O3-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>*
336 ; O3-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4
337 ; O3-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]]
338 ; O3-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44
339 ; O3-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>*
340 ; O3-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4
341 ; O3-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48
342 ; O3-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>*
343 ; O3-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4
344 ; O3-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]]
345 ; O3-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48
346 ; O3-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>*
347 ; O3-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4
348 ; O3-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52
349 ; O3-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>*
350 ; O3-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4
351 ; O3-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]]
352 ; O3-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52
353 ; O3-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>*
354 ; O3-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4
355 ; O3-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56
356 ; O3-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
357 ; O3-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4
358 ; O3-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]]
359 ; O3-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56
360 ; O3-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
361 ; O3-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4
362 ; O3-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60
363 ; O3-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
364 ; O3-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4
365 ; O3-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]]
366 ; O3-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60
367 ; O3-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>*
368 ; O3-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4
369 ; O3-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4
370 ; O3-NEXT: ret i32 [[TMP78]]
372 ; O3DEFAULT-LABEL: @enabled(
373 ; O3DEFAULT-NEXT: entry:
374 ; O3DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
375 ; O3DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
376 ; O3DEFAULT-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
377 ; O3DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
378 ; O3DEFAULT-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
379 ; O3DEFAULT-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
380 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
381 ; O3DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4
382 ; O3DEFAULT-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
383 ; O3DEFAULT-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
384 ; O3DEFAULT-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
385 ; O3DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
386 ; O3DEFAULT-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
387 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4
388 ; O3DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8
389 ; O3DEFAULT-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
390 ; O3DEFAULT-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
391 ; O3DEFAULT-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
392 ; O3DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8
393 ; O3DEFAULT-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
394 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4
395 ; O3DEFAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12
396 ; O3DEFAULT-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
397 ; O3DEFAULT-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
398 ; O3DEFAULT-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
399 ; O3DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
400 ; O3DEFAULT-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
401 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4
402 ; O3DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16
403 ; O3DEFAULT-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
404 ; O3DEFAULT-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4
405 ; O3DEFAULT-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
406 ; O3DEFAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16
407 ; O3DEFAULT-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
408 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4
409 ; O3DEFAULT-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20
410 ; O3DEFAULT-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
411 ; O3DEFAULT-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4
412 ; O3DEFAULT-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
413 ; O3DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20
414 ; O3DEFAULT-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
415 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4
416 ; O3DEFAULT-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24
417 ; O3DEFAULT-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>*
418 ; O3DEFAULT-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4
419 ; O3DEFAULT-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
420 ; O3DEFAULT-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24
421 ; O3DEFAULT-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>*
422 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4
423 ; O3DEFAULT-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28
424 ; O3DEFAULT-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>*
425 ; O3DEFAULT-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4
426 ; O3DEFAULT-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
427 ; O3DEFAULT-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28
428 ; O3DEFAULT-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>*
429 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4
430 ; O3DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32
431 ; O3DEFAULT-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>*
432 ; O3DEFAULT-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4
433 ; O3DEFAULT-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]]
434 ; O3DEFAULT-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32
435 ; O3DEFAULT-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>*
436 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4
437 ; O3DEFAULT-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36
438 ; O3DEFAULT-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>*
439 ; O3DEFAULT-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4
440 ; O3DEFAULT-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]]
441 ; O3DEFAULT-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36
442 ; O3DEFAULT-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>*
443 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4
444 ; O3DEFAULT-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40
445 ; O3DEFAULT-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
446 ; O3DEFAULT-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4
447 ; O3DEFAULT-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]]
448 ; O3DEFAULT-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40
449 ; O3DEFAULT-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>*
450 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4
451 ; O3DEFAULT-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44
452 ; O3DEFAULT-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>*
453 ; O3DEFAULT-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4
454 ; O3DEFAULT-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]]
455 ; O3DEFAULT-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44
456 ; O3DEFAULT-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>*
457 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4
458 ; O3DEFAULT-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48
459 ; O3DEFAULT-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>*
460 ; O3DEFAULT-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4
461 ; O3DEFAULT-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]]
462 ; O3DEFAULT-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48
463 ; O3DEFAULT-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>*
464 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4
465 ; O3DEFAULT-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52
466 ; O3DEFAULT-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>*
467 ; O3DEFAULT-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4
468 ; O3DEFAULT-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]]
469 ; O3DEFAULT-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52
470 ; O3DEFAULT-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>*
471 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4
472 ; O3DEFAULT-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56
473 ; O3DEFAULT-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
474 ; O3DEFAULT-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4
475 ; O3DEFAULT-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]]
476 ; O3DEFAULT-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56
477 ; O3DEFAULT-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
478 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4
479 ; O3DEFAULT-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60
480 ; O3DEFAULT-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
481 ; O3DEFAULT-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4
482 ; O3DEFAULT-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]]
483 ; O3DEFAULT-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60
484 ; O3DEFAULT-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>*
485 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4
486 ; O3DEFAULT-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4
487 ; O3DEFAULT-NEXT: ret i32 [[TMP78]]
489 ; Os-LABEL: @enabled(
491 ; Os-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
492 ; Os-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
493 ; Os-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
494 ; Os-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
495 ; Os-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
496 ; Os-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
497 ; Os-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
498 ; Os-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4
499 ; Os-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
500 ; Os-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
501 ; Os-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
502 ; Os-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
503 ; Os-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
504 ; Os-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4
505 ; Os-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8
506 ; Os-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
507 ; Os-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
508 ; Os-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
509 ; Os-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8
510 ; Os-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
511 ; Os-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4
512 ; Os-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12
513 ; Os-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
514 ; Os-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
515 ; Os-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
516 ; Os-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
517 ; Os-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
518 ; Os-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4
519 ; Os-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16
520 ; Os-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
521 ; Os-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4
522 ; Os-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
523 ; Os-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16
524 ; Os-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
525 ; Os-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4
526 ; Os-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20
527 ; Os-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
528 ; Os-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4
529 ; Os-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
530 ; Os-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20
531 ; Os-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
532 ; Os-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4
533 ; Os-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24
534 ; Os-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>*
535 ; Os-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4
536 ; Os-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
537 ; Os-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24
538 ; Os-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>*
539 ; Os-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4
540 ; Os-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28
541 ; Os-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>*
542 ; Os-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4
543 ; Os-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
544 ; Os-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28
545 ; Os-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>*
546 ; Os-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4
547 ; Os-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32
548 ; Os-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>*
549 ; Os-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4
550 ; Os-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]]
551 ; Os-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32
552 ; Os-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>*
553 ; Os-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4
554 ; Os-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36
555 ; Os-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>*
556 ; Os-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4
557 ; Os-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]]
558 ; Os-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36
559 ; Os-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>*
560 ; Os-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4
561 ; Os-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40
562 ; Os-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
563 ; Os-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4
564 ; Os-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]]
565 ; Os-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40
566 ; Os-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>*
567 ; Os-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4
568 ; Os-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44
569 ; Os-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>*
570 ; Os-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4
571 ; Os-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]]
572 ; Os-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44
573 ; Os-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>*
574 ; Os-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4
575 ; Os-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48
576 ; Os-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>*
577 ; Os-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4
578 ; Os-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]]
579 ; Os-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48
580 ; Os-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>*
581 ; Os-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4
582 ; Os-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52
583 ; Os-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>*
584 ; Os-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4
585 ; Os-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]]
586 ; Os-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52
587 ; Os-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>*
588 ; Os-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4
589 ; Os-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56
590 ; Os-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
591 ; Os-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4
592 ; Os-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]]
593 ; Os-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56
594 ; Os-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
595 ; Os-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4
596 ; Os-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60
597 ; Os-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
598 ; Os-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4
599 ; Os-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]]
600 ; Os-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60
601 ; Os-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>*
602 ; Os-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4
603 ; Os-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4
604 ; Os-NEXT: ret i32 [[TMP78]]
606 ; Oz-LABEL: @enabled(
608 ; Oz-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
609 ; Oz-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
610 ; Oz-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
611 ; Oz-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
612 ; Oz-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
613 ; Oz-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
614 ; Oz-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
615 ; Oz-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4
616 ; Oz-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
617 ; Oz-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
618 ; Oz-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
619 ; Oz-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
620 ; Oz-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
621 ; Oz-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4
622 ; Oz-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8
623 ; Oz-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
624 ; Oz-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
625 ; Oz-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
626 ; Oz-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8
627 ; Oz-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
628 ; Oz-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4
629 ; Oz-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12
630 ; Oz-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
631 ; Oz-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
632 ; Oz-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
633 ; Oz-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
634 ; Oz-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
635 ; Oz-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4
636 ; Oz-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16
637 ; Oz-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
638 ; Oz-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4
639 ; Oz-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
640 ; Oz-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16
641 ; Oz-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
642 ; Oz-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4
643 ; Oz-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20
644 ; Oz-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
645 ; Oz-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4
646 ; Oz-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
647 ; Oz-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20
648 ; Oz-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
649 ; Oz-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4
650 ; Oz-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24
651 ; Oz-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>*
652 ; Oz-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4
653 ; Oz-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
654 ; Oz-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24
655 ; Oz-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>*
656 ; Oz-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4
657 ; Oz-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28
658 ; Oz-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>*
659 ; Oz-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4
660 ; Oz-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
661 ; Oz-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28
662 ; Oz-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>*
663 ; Oz-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4
664 ; Oz-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32
665 ; Oz-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>*
666 ; Oz-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4
667 ; Oz-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]]
668 ; Oz-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32
669 ; Oz-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>*
670 ; Oz-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4
671 ; Oz-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36
672 ; Oz-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>*
673 ; Oz-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4
674 ; Oz-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]]
675 ; Oz-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36
676 ; Oz-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>*
677 ; Oz-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4
678 ; Oz-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40
679 ; Oz-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
680 ; Oz-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4
681 ; Oz-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]]
682 ; Oz-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40
683 ; Oz-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>*
684 ; Oz-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4
685 ; Oz-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44
686 ; Oz-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>*
687 ; Oz-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4
688 ; Oz-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]]
689 ; Oz-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44
690 ; Oz-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>*
691 ; Oz-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4
692 ; Oz-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48
693 ; Oz-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>*
694 ; Oz-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4
695 ; Oz-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]]
696 ; Oz-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48
697 ; Oz-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>*
698 ; Oz-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4
699 ; Oz-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52
700 ; Oz-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>*
701 ; Oz-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4
702 ; Oz-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]]
703 ; Oz-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52
704 ; Oz-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>*
705 ; Oz-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4
706 ; Oz-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56
707 ; Oz-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
708 ; Oz-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4
709 ; Oz-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]]
710 ; Oz-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56
711 ; Oz-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
712 ; Oz-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4
713 ; Oz-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60
714 ; Oz-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
715 ; Oz-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4
716 ; Oz-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]]
717 ; Oz-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60
718 ; Oz-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>*
719 ; Oz-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4
720 ; Oz-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4
721 ; Oz-NEXT: ret i32 [[TMP78]]
723 ; O1VEC2-LABEL: @enabled(
724 ; O1VEC2-NEXT: entry:
725 ; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
726 ; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
727 ; O1VEC2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
728 ; O1VEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
729 ; O1VEC2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
730 ; O1VEC2-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
731 ; O1VEC2-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
732 ; O1VEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4
733 ; O1VEC2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
734 ; O1VEC2-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
735 ; O1VEC2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
736 ; O1VEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
737 ; O1VEC2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
738 ; O1VEC2-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4
739 ; O1VEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8
740 ; O1VEC2-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
741 ; O1VEC2-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
742 ; O1VEC2-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
743 ; O1VEC2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8
744 ; O1VEC2-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
745 ; O1VEC2-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4
746 ; O1VEC2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12
747 ; O1VEC2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
748 ; O1VEC2-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
749 ; O1VEC2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
750 ; O1VEC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
751 ; O1VEC2-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
752 ; O1VEC2-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4
753 ; O1VEC2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16
754 ; O1VEC2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
755 ; O1VEC2-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4
756 ; O1VEC2-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
757 ; O1VEC2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16
758 ; O1VEC2-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
759 ; O1VEC2-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4
760 ; O1VEC2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20
761 ; O1VEC2-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
762 ; O1VEC2-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4
763 ; O1VEC2-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
764 ; O1VEC2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20
765 ; O1VEC2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
766 ; O1VEC2-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4
767 ; O1VEC2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24
768 ; O1VEC2-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>*
769 ; O1VEC2-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4
770 ; O1VEC2-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
771 ; O1VEC2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24
772 ; O1VEC2-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>*
773 ; O1VEC2-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4
774 ; O1VEC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28
775 ; O1VEC2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>*
776 ; O1VEC2-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4
777 ; O1VEC2-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
778 ; O1VEC2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28
779 ; O1VEC2-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>*
780 ; O1VEC2-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4
781 ; O1VEC2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32
782 ; O1VEC2-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>*
783 ; O1VEC2-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4
784 ; O1VEC2-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]]
785 ; O1VEC2-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32
786 ; O1VEC2-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>*
787 ; O1VEC2-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4
788 ; O1VEC2-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36
789 ; O1VEC2-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>*
790 ; O1VEC2-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4
791 ; O1VEC2-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]]
792 ; O1VEC2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36
793 ; O1VEC2-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>*
794 ; O1VEC2-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4
795 ; O1VEC2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40
796 ; O1VEC2-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
797 ; O1VEC2-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4
798 ; O1VEC2-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]]
799 ; O1VEC2-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40
800 ; O1VEC2-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>*
801 ; O1VEC2-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4
802 ; O1VEC2-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44
803 ; O1VEC2-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>*
804 ; O1VEC2-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4
805 ; O1VEC2-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]]
806 ; O1VEC2-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44
807 ; O1VEC2-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>*
808 ; O1VEC2-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4
809 ; O1VEC2-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48
810 ; O1VEC2-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>*
811 ; O1VEC2-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4
812 ; O1VEC2-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]]
813 ; O1VEC2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48
814 ; O1VEC2-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>*
815 ; O1VEC2-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4
816 ; O1VEC2-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52
817 ; O1VEC2-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>*
818 ; O1VEC2-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4
819 ; O1VEC2-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]]
820 ; O1VEC2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52
821 ; O1VEC2-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>*
822 ; O1VEC2-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4
823 ; O1VEC2-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56
824 ; O1VEC2-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
825 ; O1VEC2-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4
826 ; O1VEC2-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]]
827 ; O1VEC2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56
828 ; O1VEC2-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
829 ; O1VEC2-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4
830 ; O1VEC2-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60
831 ; O1VEC2-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
832 ; O1VEC2-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4
833 ; O1VEC2-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]]
834 ; O1VEC2-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60
835 ; O1VEC2-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>*
836 ; O1VEC2-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4
837 ; O1VEC2-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4
838 ; O1VEC2-NEXT: ret i32 [[TMP78]]
840 ; OzVEC2-LABEL: @enabled(
841 ; OzVEC2-NEXT: entry:
842 ; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
843 ; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
844 ; OzVEC2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
845 ; OzVEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
846 ; OzVEC2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
847 ; OzVEC2-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
848 ; OzVEC2-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
849 ; OzVEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4
850 ; OzVEC2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
851 ; OzVEC2-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
852 ; OzVEC2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
853 ; OzVEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
854 ; OzVEC2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
855 ; OzVEC2-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4
856 ; OzVEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8
857 ; OzVEC2-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
858 ; OzVEC2-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
859 ; OzVEC2-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
860 ; OzVEC2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8
861 ; OzVEC2-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
862 ; OzVEC2-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4
863 ; OzVEC2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12
864 ; OzVEC2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
865 ; OzVEC2-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
866 ; OzVEC2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
867 ; OzVEC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
868 ; OzVEC2-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
869 ; OzVEC2-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4
870 ; OzVEC2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16
871 ; OzVEC2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
872 ; OzVEC2-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4
873 ; OzVEC2-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
874 ; OzVEC2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16
875 ; OzVEC2-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
876 ; OzVEC2-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4
877 ; OzVEC2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20
878 ; OzVEC2-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
879 ; OzVEC2-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4
880 ; OzVEC2-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
881 ; OzVEC2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20
882 ; OzVEC2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
883 ; OzVEC2-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4
884 ; OzVEC2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24
885 ; OzVEC2-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>*
886 ; OzVEC2-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4
887 ; OzVEC2-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
888 ; OzVEC2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24
889 ; OzVEC2-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>*
890 ; OzVEC2-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4
891 ; OzVEC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28
892 ; OzVEC2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>*
893 ; OzVEC2-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4
894 ; OzVEC2-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
895 ; OzVEC2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28
896 ; OzVEC2-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>*
897 ; OzVEC2-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4
898 ; OzVEC2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32
899 ; OzVEC2-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>*
900 ; OzVEC2-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4
901 ; OzVEC2-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]]
902 ; OzVEC2-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32
903 ; OzVEC2-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>*
904 ; OzVEC2-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4
905 ; OzVEC2-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36
906 ; OzVEC2-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>*
907 ; OzVEC2-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4
908 ; OzVEC2-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]]
909 ; OzVEC2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36
910 ; OzVEC2-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>*
911 ; OzVEC2-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4
912 ; OzVEC2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40
913 ; OzVEC2-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
914 ; OzVEC2-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4
915 ; OzVEC2-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]]
916 ; OzVEC2-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40
917 ; OzVEC2-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>*
918 ; OzVEC2-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4
919 ; OzVEC2-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44
920 ; OzVEC2-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>*
921 ; OzVEC2-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4
922 ; OzVEC2-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]]
923 ; OzVEC2-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44
924 ; OzVEC2-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>*
925 ; OzVEC2-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4
926 ; OzVEC2-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48
927 ; OzVEC2-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>*
928 ; OzVEC2-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4
929 ; OzVEC2-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]]
930 ; OzVEC2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48
931 ; OzVEC2-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>*
932 ; OzVEC2-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4
933 ; OzVEC2-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52
934 ; OzVEC2-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>*
935 ; OzVEC2-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4
936 ; OzVEC2-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]]
937 ; OzVEC2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52
938 ; OzVEC2-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>*
939 ; OzVEC2-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4
940 ; OzVEC2-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56
941 ; OzVEC2-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
942 ; OzVEC2-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4
943 ; OzVEC2-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]]
944 ; OzVEC2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56
945 ; OzVEC2-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
946 ; OzVEC2-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4
947 ; OzVEC2-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60
948 ; OzVEC2-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
949 ; OzVEC2-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4
950 ; OzVEC2-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]]
951 ; OzVEC2-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60
952 ; OzVEC2-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>*
953 ; OzVEC2-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4
954 ; OzVEC2-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4
955 ; OzVEC2-NEXT: ret i32 [[TMP78]]
957 ; O3DIS-LABEL: @enabled(
959 ; O3DIS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
960 ; O3DIS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
961 ; O3DIS-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
962 ; O3DIS-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
963 ; O3DIS-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
964 ; O3DIS-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
965 ; O3DIS-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
966 ; O3DIS-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4
967 ; O3DIS-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
968 ; O3DIS-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
969 ; O3DIS-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
970 ; O3DIS-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
971 ; O3DIS-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
972 ; O3DIS-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4
973 ; O3DIS-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8
974 ; O3DIS-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
975 ; O3DIS-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
976 ; O3DIS-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
977 ; O3DIS-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8
978 ; O3DIS-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
979 ; O3DIS-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4
980 ; O3DIS-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12
981 ; O3DIS-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
982 ; O3DIS-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
983 ; O3DIS-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
984 ; O3DIS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
985 ; O3DIS-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
986 ; O3DIS-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4
987 ; O3DIS-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16
988 ; O3DIS-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
989 ; O3DIS-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4
990 ; O3DIS-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
991 ; O3DIS-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16
992 ; O3DIS-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
993 ; O3DIS-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4
994 ; O3DIS-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20
995 ; O3DIS-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
996 ; O3DIS-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4
997 ; O3DIS-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
998 ; O3DIS-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20
999 ; O3DIS-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
1000 ; O3DIS-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4
1001 ; O3DIS-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24
1002 ; O3DIS-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>*
1003 ; O3DIS-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4
1004 ; O3DIS-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
1005 ; O3DIS-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24
1006 ; O3DIS-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>*
1007 ; O3DIS-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4
1008 ; O3DIS-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28
1009 ; O3DIS-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>*
1010 ; O3DIS-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4
1011 ; O3DIS-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
1012 ; O3DIS-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28
1013 ; O3DIS-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>*
1014 ; O3DIS-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4
1015 ; O3DIS-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32
1016 ; O3DIS-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>*
1017 ; O3DIS-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4
1018 ; O3DIS-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]]
1019 ; O3DIS-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32
1020 ; O3DIS-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>*
1021 ; O3DIS-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4
1022 ; O3DIS-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36
1023 ; O3DIS-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>*
1024 ; O3DIS-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4
1025 ; O3DIS-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]]
1026 ; O3DIS-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36
1027 ; O3DIS-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>*
1028 ; O3DIS-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4
1029 ; O3DIS-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40
1030 ; O3DIS-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
1031 ; O3DIS-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4
1032 ; O3DIS-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]]
1033 ; O3DIS-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40
1034 ; O3DIS-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>*
1035 ; O3DIS-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4
1036 ; O3DIS-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44
1037 ; O3DIS-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>*
1038 ; O3DIS-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4
1039 ; O3DIS-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]]
1040 ; O3DIS-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44
1041 ; O3DIS-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>*
1042 ; O3DIS-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4
1043 ; O3DIS-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48
1044 ; O3DIS-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>*
1045 ; O3DIS-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4
1046 ; O3DIS-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]]
1047 ; O3DIS-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48
1048 ; O3DIS-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>*
1049 ; O3DIS-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4
1050 ; O3DIS-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52
1051 ; O3DIS-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>*
1052 ; O3DIS-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4
1053 ; O3DIS-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]]
1054 ; O3DIS-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52
1055 ; O3DIS-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>*
1056 ; O3DIS-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4
1057 ; O3DIS-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56
1058 ; O3DIS-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
1059 ; O3DIS-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4
1060 ; O3DIS-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]]
1061 ; O3DIS-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56
1062 ; O3DIS-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
1063 ; O3DIS-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4
1064 ; O3DIS-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60
1065 ; O3DIS-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
1066 ; O3DIS-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4
1067 ; O3DIS-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]]
1068 ; O3DIS-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60
1069 ; O3DIS-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>*
1070 ; O3DIS-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4
1071 ; O3DIS-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4
1072 ; O3DIS-NEXT: ret i32 [[TMP78]]
1077 for.body: ; preds = %for.body, %entry
1078 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1079 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
1080 %0 = load i32, i32* %arrayidx, align 4
1081 %add = add nsw i32 %0, %N
1082 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
1083 store i32 %add, i32* %arrayidx2, align 4
1084 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1085 %exitcond = icmp eq i64 %indvars.iv.next, 64
1086 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
1088 for.end: ; preds = %for.body
1089 %1 = load i32, i32* %a, align 4
1093 define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
1094 ; O1-LABEL: @nopragma(
1096 ; O1-NEXT: br label [[FOR_BODY:%.*]]
1098 ; O1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1099 ; O1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
1100 ; O1-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1101 ; O1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1102 ; O1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
1103 ; O1-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
1104 ; O1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1105 ; O1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
1106 ; O1-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
1108 ; O1-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
1109 ; O1-NEXT: ret i32 [[TMP1]]
1111 ; O2-LABEL: @nopragma(
1113 ; O2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
1114 ; O2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1115 ; O2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
1116 ; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
1117 ; O2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1118 ; O2-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
1119 ; O2-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
1120 ; O2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4
1121 ; O2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
1122 ; O2-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
1123 ; O2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
1124 ; O2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
1125 ; O2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
1126 ; O2-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4
1127 ; O2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8
1128 ; O2-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
1129 ; O2-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
1130 ; O2-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
1131 ; O2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8
1132 ; O2-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
1133 ; O2-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4
1134 ; O2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12
1135 ; O2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
1136 ; O2-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
1137 ; O2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
1138 ; O2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
1139 ; O2-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
1140 ; O2-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4
1141 ; O2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16
1142 ; O2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
1143 ; O2-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4
1144 ; O2-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
1145 ; O2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16
1146 ; O2-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
1147 ; O2-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4
1148 ; O2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20
1149 ; O2-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
1150 ; O2-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4
1151 ; O2-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
1152 ; O2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20
1153 ; O2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
1154 ; O2-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4
1155 ; O2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24
1156 ; O2-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>*
1157 ; O2-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4
1158 ; O2-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
1159 ; O2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24
1160 ; O2-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>*
1161 ; O2-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4
1162 ; O2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28
1163 ; O2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>*
1164 ; O2-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4
1165 ; O2-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
1166 ; O2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28
1167 ; O2-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>*
1168 ; O2-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4
1169 ; O2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32
1170 ; O2-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>*
1171 ; O2-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4
1172 ; O2-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]]
1173 ; O2-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32
1174 ; O2-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>*
1175 ; O2-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4
1176 ; O2-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36
1177 ; O2-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>*
1178 ; O2-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4
1179 ; O2-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]]
1180 ; O2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36
1181 ; O2-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>*
1182 ; O2-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4
1183 ; O2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40
1184 ; O2-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
1185 ; O2-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4
1186 ; O2-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]]
1187 ; O2-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40
1188 ; O2-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>*
1189 ; O2-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4
1190 ; O2-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44
1191 ; O2-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>*
1192 ; O2-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4
1193 ; O2-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]]
1194 ; O2-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44
1195 ; O2-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>*
1196 ; O2-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4
1197 ; O2-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48
1198 ; O2-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>*
1199 ; O2-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4
1200 ; O2-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]]
1201 ; O2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48
1202 ; O2-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>*
1203 ; O2-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4
1204 ; O2-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52
1205 ; O2-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>*
1206 ; O2-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4
1207 ; O2-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]]
1208 ; O2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52
1209 ; O2-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>*
1210 ; O2-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4
1211 ; O2-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56
1212 ; O2-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
1213 ; O2-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4
1214 ; O2-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]]
1215 ; O2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56
1216 ; O2-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
1217 ; O2-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4
1218 ; O2-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60
1219 ; O2-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
1220 ; O2-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4
1221 ; O2-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]]
1222 ; O2-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60
1223 ; O2-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>*
1224 ; O2-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4
1225 ; O2-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4
1226 ; O2-NEXT: ret i32 [[TMP78]]
1228 ; O3-LABEL: @nopragma(
1230 ; O3-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
1231 ; O3-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1232 ; O3-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
1233 ; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
1234 ; O3-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1235 ; O3-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
1236 ; O3-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
1237 ; O3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4
1238 ; O3-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
1239 ; O3-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
1240 ; O3-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
1241 ; O3-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
1242 ; O3-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
1243 ; O3-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4
1244 ; O3-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8
1245 ; O3-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
1246 ; O3-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
1247 ; O3-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
1248 ; O3-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8
1249 ; O3-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
1250 ; O3-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4
1251 ; O3-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12
1252 ; O3-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
1253 ; O3-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
1254 ; O3-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
1255 ; O3-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
1256 ; O3-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
1257 ; O3-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4
1258 ; O3-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16
1259 ; O3-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
1260 ; O3-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4
1261 ; O3-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
1262 ; O3-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16
1263 ; O3-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
1264 ; O3-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4
1265 ; O3-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20
1266 ; O3-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
1267 ; O3-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4
1268 ; O3-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
1269 ; O3-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20
1270 ; O3-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
1271 ; O3-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4
1272 ; O3-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24
1273 ; O3-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>*
1274 ; O3-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4
1275 ; O3-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
1276 ; O3-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24
1277 ; O3-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>*
1278 ; O3-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4
1279 ; O3-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28
1280 ; O3-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>*
1281 ; O3-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4
1282 ; O3-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
1283 ; O3-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28
1284 ; O3-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>*
1285 ; O3-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4
1286 ; O3-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32
1287 ; O3-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>*
1288 ; O3-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4
1289 ; O3-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]]
1290 ; O3-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32
1291 ; O3-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>*
1292 ; O3-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4
1293 ; O3-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36
1294 ; O3-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>*
1295 ; O3-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4
1296 ; O3-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]]
1297 ; O3-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36
1298 ; O3-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>*
1299 ; O3-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4
1300 ; O3-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40
1301 ; O3-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
1302 ; O3-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4
1303 ; O3-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]]
1304 ; O3-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40
1305 ; O3-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>*
1306 ; O3-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4
1307 ; O3-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44
1308 ; O3-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>*
1309 ; O3-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4
1310 ; O3-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]]
1311 ; O3-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44
1312 ; O3-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>*
1313 ; O3-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4
1314 ; O3-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48
1315 ; O3-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>*
1316 ; O3-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4
1317 ; O3-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]]
1318 ; O3-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48
1319 ; O3-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>*
1320 ; O3-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4
1321 ; O3-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52
1322 ; O3-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>*
1323 ; O3-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4
1324 ; O3-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]]
1325 ; O3-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52
1326 ; O3-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>*
1327 ; O3-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4
1328 ; O3-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56
1329 ; O3-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
1330 ; O3-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4
1331 ; O3-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]]
1332 ; O3-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56
1333 ; O3-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
1334 ; O3-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4
1335 ; O3-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60
1336 ; O3-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
1337 ; O3-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4
1338 ; O3-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]]
1339 ; O3-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60
1340 ; O3-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>*
1341 ; O3-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4
1342 ; O3-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4
1343 ; O3-NEXT: ret i32 [[TMP78]]
1345 ; O3DEFAULT-LABEL: @nopragma(
1346 ; O3DEFAULT-NEXT: entry:
1347 ; O3DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
1348 ; O3DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1349 ; O3DEFAULT-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
1350 ; O3DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
1351 ; O3DEFAULT-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1352 ; O3DEFAULT-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
1353 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
1354 ; O3DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4
1355 ; O3DEFAULT-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
1356 ; O3DEFAULT-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
1357 ; O3DEFAULT-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
1358 ; O3DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
1359 ; O3DEFAULT-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
1360 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4
1361 ; O3DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8
1362 ; O3DEFAULT-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
1363 ; O3DEFAULT-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
1364 ; O3DEFAULT-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
1365 ; O3DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8
1366 ; O3DEFAULT-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
1367 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4
1368 ; O3DEFAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12
1369 ; O3DEFAULT-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
1370 ; O3DEFAULT-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
1371 ; O3DEFAULT-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
1372 ; O3DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
1373 ; O3DEFAULT-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
1374 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4
1375 ; O3DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16
1376 ; O3DEFAULT-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
1377 ; O3DEFAULT-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4
1378 ; O3DEFAULT-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
1379 ; O3DEFAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16
1380 ; O3DEFAULT-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
1381 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4
1382 ; O3DEFAULT-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20
1383 ; O3DEFAULT-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
1384 ; O3DEFAULT-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4
1385 ; O3DEFAULT-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
1386 ; O3DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20
1387 ; O3DEFAULT-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
1388 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4
1389 ; O3DEFAULT-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24
1390 ; O3DEFAULT-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>*
1391 ; O3DEFAULT-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4
1392 ; O3DEFAULT-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
1393 ; O3DEFAULT-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24
1394 ; O3DEFAULT-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>*
1395 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4
1396 ; O3DEFAULT-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28
1397 ; O3DEFAULT-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>*
1398 ; O3DEFAULT-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4
1399 ; O3DEFAULT-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
1400 ; O3DEFAULT-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28
1401 ; O3DEFAULT-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>*
1402 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4
1403 ; O3DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32
1404 ; O3DEFAULT-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>*
1405 ; O3DEFAULT-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4
1406 ; O3DEFAULT-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]]
1407 ; O3DEFAULT-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32
1408 ; O3DEFAULT-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>*
1409 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4
1410 ; O3DEFAULT-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36
1411 ; O3DEFAULT-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>*
1412 ; O3DEFAULT-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4
1413 ; O3DEFAULT-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]]
1414 ; O3DEFAULT-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36
1415 ; O3DEFAULT-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>*
1416 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4
1417 ; O3DEFAULT-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40
1418 ; O3DEFAULT-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
1419 ; O3DEFAULT-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4
1420 ; O3DEFAULT-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]]
1421 ; O3DEFAULT-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40
1422 ; O3DEFAULT-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>*
1423 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4
1424 ; O3DEFAULT-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44
1425 ; O3DEFAULT-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>*
1426 ; O3DEFAULT-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4
1427 ; O3DEFAULT-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]]
1428 ; O3DEFAULT-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44
1429 ; O3DEFAULT-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>*
1430 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4
1431 ; O3DEFAULT-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48
1432 ; O3DEFAULT-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>*
1433 ; O3DEFAULT-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4
1434 ; O3DEFAULT-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]]
1435 ; O3DEFAULT-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48
1436 ; O3DEFAULT-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>*
1437 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4
1438 ; O3DEFAULT-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52
1439 ; O3DEFAULT-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>*
1440 ; O3DEFAULT-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4
1441 ; O3DEFAULT-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]]
1442 ; O3DEFAULT-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52
1443 ; O3DEFAULT-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>*
1444 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4
1445 ; O3DEFAULT-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56
1446 ; O3DEFAULT-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
1447 ; O3DEFAULT-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4
1448 ; O3DEFAULT-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]]
1449 ; O3DEFAULT-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56
1450 ; O3DEFAULT-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
1451 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4
1452 ; O3DEFAULT-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60
1453 ; O3DEFAULT-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
1454 ; O3DEFAULT-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4
1455 ; O3DEFAULT-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]]
1456 ; O3DEFAULT-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60
1457 ; O3DEFAULT-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>*
1458 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4
1459 ; O3DEFAULT-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4
1460 ; O3DEFAULT-NEXT: ret i32 [[TMP78]]
1462 ; Os-LABEL: @nopragma(
1464 ; Os-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
1465 ; Os-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1466 ; Os-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
1467 ; Os-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
1468 ; Os-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1469 ; Os-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
1470 ; Os-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
1471 ; Os-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4
1472 ; Os-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
1473 ; Os-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
1474 ; Os-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
1475 ; Os-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
1476 ; Os-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
1477 ; Os-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4
1478 ; Os-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8
1479 ; Os-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
1480 ; Os-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
1481 ; Os-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
1482 ; Os-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8
1483 ; Os-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
1484 ; Os-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4
1485 ; Os-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12
1486 ; Os-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
1487 ; Os-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
1488 ; Os-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
1489 ; Os-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
1490 ; Os-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
1491 ; Os-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4
1492 ; Os-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16
1493 ; Os-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
1494 ; Os-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4
1495 ; Os-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
1496 ; Os-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16
1497 ; Os-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
1498 ; Os-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4
1499 ; Os-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20
1500 ; Os-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
1501 ; Os-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4
1502 ; Os-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
1503 ; Os-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20
1504 ; Os-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
1505 ; Os-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4
1506 ; Os-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24
1507 ; Os-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>*
1508 ; Os-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4
1509 ; Os-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
1510 ; Os-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24
1511 ; Os-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>*
1512 ; Os-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4
1513 ; Os-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28
1514 ; Os-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>*
1515 ; Os-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4
1516 ; Os-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
1517 ; Os-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28
1518 ; Os-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>*
1519 ; Os-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4
1520 ; Os-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32
1521 ; Os-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>*
1522 ; Os-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4
1523 ; Os-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]]
1524 ; Os-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32
1525 ; Os-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>*
1526 ; Os-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4
1527 ; Os-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36
1528 ; Os-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>*
1529 ; Os-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4
1530 ; Os-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]]
1531 ; Os-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36
1532 ; Os-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>*
1533 ; Os-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4
1534 ; Os-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40
1535 ; Os-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
1536 ; Os-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4
1537 ; Os-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]]
1538 ; Os-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40
1539 ; Os-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>*
1540 ; Os-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4
1541 ; Os-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44
1542 ; Os-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>*
1543 ; Os-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4
1544 ; Os-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]]
1545 ; Os-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44
1546 ; Os-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>*
1547 ; Os-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4
1548 ; Os-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48
1549 ; Os-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>*
1550 ; Os-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4
1551 ; Os-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]]
1552 ; Os-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48
1553 ; Os-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>*
1554 ; Os-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4
1555 ; Os-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52
1556 ; Os-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>*
1557 ; Os-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4
1558 ; Os-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]]
1559 ; Os-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52
1560 ; Os-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>*
1561 ; Os-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4
1562 ; Os-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56
1563 ; Os-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>*
1564 ; Os-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4
1565 ; Os-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]]
1566 ; Os-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56
1567 ; Os-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>*
1568 ; Os-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4
1569 ; Os-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60
1570 ; Os-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>*
1571 ; Os-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4
1572 ; Os-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]]
1573 ; Os-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60
1574 ; Os-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>*
1575 ; Os-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4
1576 ; Os-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4
1577 ; Os-NEXT: ret i32 [[TMP78]]
1579 ; Oz-LABEL: @nopragma(
1581 ; Oz-NEXT: br label [[FOR_BODY:%.*]]
1583 ; Oz-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1584 ; Oz-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
1585 ; Oz-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1586 ; Oz-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1587 ; Oz-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
1588 ; Oz-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
1589 ; Oz-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1590 ; Oz-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
1591 ; Oz-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
1593 ; Oz-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
1594 ; Oz-NEXT: ret i32 [[TMP1]]
1596 ; O1VEC2-LABEL: @nopragma(
1597 ; O1VEC2-NEXT: entry:
1598 ; O1VEC2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1599 ; O1VEC2: vector.ph:
1600 ; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
1601 ; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1602 ; O1VEC2-NEXT: br label [[VECTOR_BODY:%.*]]
1603 ; O1VEC2: vector.body:
1604 ; O1VEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1605 ; O1VEC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1606 ; O1VEC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
1607 ; O1VEC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
1608 ; O1VEC2-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
1609 ; O1VEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
1610 ; O1VEC2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1611 ; O1VEC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
1612 ; O1VEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
1613 ; O1VEC2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
1614 ; O1VEC2-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP7]], align 4
1615 ; O1VEC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1616 ; O1VEC2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
1617 ; O1VEC2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1618 ; O1VEC2: middle.block:
1619 ; O1VEC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, 64
1620 ; O1VEC2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1621 ; O1VEC2: scalar.ph:
1622 ; O1VEC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1623 ; O1VEC2-NEXT: br label [[FOR_BODY:%.*]]
1625 ; O1VEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1626 ; O1VEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
1627 ; O1VEC2-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1628 ; O1VEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[N]]
1629 ; O1VEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
1630 ; O1VEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
1631 ; O1VEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1632 ; O1VEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
1633 ; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
1635 ; O1VEC2-NEXT: [[TMP10:%.*]] = load i32, i32* [[A]], align 4
1636 ; O1VEC2-NEXT: ret i32 [[TMP10]]
1638 ; OzVEC2-LABEL: @nopragma(
1639 ; OzVEC2-NEXT: entry:
1640 ; OzVEC2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1641 ; OzVEC2: vector.ph:
1642 ; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
1643 ; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1644 ; OzVEC2-NEXT: br label [[VECTOR_BODY:%.*]]
1645 ; OzVEC2: vector.body:
1646 ; OzVEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1647 ; OzVEC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1648 ; OzVEC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
1649 ; OzVEC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
1650 ; OzVEC2-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
1651 ; OzVEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
1652 ; OzVEC2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1653 ; OzVEC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
1654 ; OzVEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
1655 ; OzVEC2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
1656 ; OzVEC2-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP7]], align 4
1657 ; OzVEC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1658 ; OzVEC2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
1659 ; OzVEC2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1660 ; OzVEC2: middle.block:
1661 ; OzVEC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, 64
1662 ; OzVEC2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1663 ; OzVEC2: scalar.ph:
1664 ; OzVEC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1665 ; OzVEC2-NEXT: br label [[FOR_BODY:%.*]]
1667 ; OzVEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1668 ; OzVEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
1669 ; OzVEC2-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1670 ; OzVEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[N]]
1671 ; OzVEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
1672 ; OzVEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
1673 ; OzVEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1674 ; OzVEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
1675 ; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
1677 ; OzVEC2-NEXT: [[TMP10:%.*]] = load i32, i32* [[A]], align 4
1678 ; OzVEC2-NEXT: ret i32 [[TMP10]]
1680 ; O3DIS-LABEL: @nopragma(
1681 ; O3DIS-NEXT: entry:
1682 ; O3DIS-NEXT: br label [[FOR_BODY:%.*]]
1684 ; O3DIS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1685 ; O3DIS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
1686 ; O3DIS-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1687 ; O3DIS-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1688 ; O3DIS-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
1689 ; O3DIS-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
1690 ; O3DIS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1691 ; O3DIS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
1692 ; O3DIS-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
1694 ; O3DIS-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
1695 ; O3DIS-NEXT: ret i32 [[TMP1]]
1700 for.body: ; preds = %for.body, %entry
1701 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1702 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
1703 %0 = load i32, i32* %arrayidx, align 4
1704 %add = add nsw i32 %0, %N
1705 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
1706 store i32 %add, i32* %arrayidx2, align 4
1707 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1708 %exitcond = icmp eq i64 %indvars.iv.next, 64
1709 br i1 %exitcond, label %for.end, label %for.body
1711 for.end: ; preds = %for.body
1712 %1 = load i32, i32* %a, align 4
1716 define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
1717 ; O1-LABEL: @disabled(
1719 ; O1-NEXT: br label [[FOR_BODY:%.*]]
1721 ; O1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1722 ; O1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
1723 ; O1-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1724 ; O1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1725 ; O1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
1726 ; O1-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
1727 ; O1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1728 ; O1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1729 ; O1-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1731 ; O1-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
1732 ; O1-NEXT: ret i32 [[TMP1]]
1734 ; O2-LABEL: @disabled(
1736 ; O2-NEXT: br label [[FOR_BODY:%.*]]
1738 ; O2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1739 ; O2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
1740 ; O2-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1741 ; O2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1742 ; O2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
1743 ; O2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
1744 ; O2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1745 ; O2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1746 ; O2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1748 ; O2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
1749 ; O2-NEXT: ret i32 [[TMP1]]
1751 ; O3-LABEL: @disabled(
1753 ; O3-NEXT: br label [[FOR_BODY:%.*]]
1755 ; O3-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1756 ; O3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
1757 ; O3-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1758 ; O3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1759 ; O3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
1760 ; O3-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
1761 ; O3-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1762 ; O3-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1763 ; O3-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1765 ; O3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
1766 ; O3-NEXT: ret i32 [[TMP1]]
1768 ; O3DEFAULT-LABEL: @disabled(
1769 ; O3DEFAULT-NEXT: entry:
1770 ; O3DEFAULT-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>*
1771 ; O3DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
1772 ; O3DEFAULT-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
1773 ; O3DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
1774 ; O3DEFAULT-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP1]], [[TMP3]]
1775 ; O3DEFAULT-NEXT: [[TMP5:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>*
1776 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4
1777 ; O3DEFAULT-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4
1778 ; O3DEFAULT-NEXT: [[ARRAYIDX2_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
1779 ; O3DEFAULT-NEXT: [[TMP6:%.*]] = bitcast i32* [[ARRAYIDX_4]] to <4 x i32>*
1780 ; O3DEFAULT-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
1781 ; O3DEFAULT-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP3]]
1782 ; O3DEFAULT-NEXT: [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX2_4]] to <4 x i32>*
1783 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP9]], align 4
1784 ; O3DEFAULT-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8
1785 ; O3DEFAULT-NEXT: [[ARRAYIDX2_8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8
1786 ; O3DEFAULT-NEXT: [[TMP10:%.*]] = bitcast i32* [[ARRAYIDX_8]] to <4 x i32>*
1787 ; O3DEFAULT-NEXT: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4
1788 ; O3DEFAULT-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP11]], [[TMP3]]
1789 ; O3DEFAULT-NEXT: [[TMP13:%.*]] = bitcast i32* [[ARRAYIDX2_8]] to <4 x i32>*
1790 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* [[TMP13]], align 4
1791 ; O3DEFAULT-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12
1792 ; O3DEFAULT-NEXT: [[ARRAYIDX2_12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12
1793 ; O3DEFAULT-NEXT: [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX_12]] to <4 x i32>*
1794 ; O3DEFAULT-NEXT: [[TMP15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
1795 ; O3DEFAULT-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[TMP15]], [[TMP3]]
1796 ; O3DEFAULT-NEXT: [[TMP17:%.*]] = bitcast i32* [[ARRAYIDX2_12]] to <4 x i32>*
1797 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP17]], align 4
1798 ; O3DEFAULT-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16
1799 ; O3DEFAULT-NEXT: [[ARRAYIDX2_16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16
1800 ; O3DEFAULT-NEXT: [[TMP18:%.*]] = bitcast i32* [[ARRAYIDX_16]] to <4 x i32>*
1801 ; O3DEFAULT-NEXT: [[TMP19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP18]], align 4
1802 ; O3DEFAULT-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[TMP19]], [[TMP3]]
1803 ; O3DEFAULT-NEXT: [[TMP21:%.*]] = bitcast i32* [[ARRAYIDX2_16]] to <4 x i32>*
1804 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP21]], align 4
1805 ; O3DEFAULT-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20
1806 ; O3DEFAULT-NEXT: [[ARRAYIDX2_20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20
1807 ; O3DEFAULT-NEXT: [[TMP22:%.*]] = bitcast i32* [[ARRAYIDX_20]] to <4 x i32>*
1808 ; O3DEFAULT-NEXT: [[TMP23:%.*]] = load <4 x i32>, <4 x i32>* [[TMP22]], align 4
1809 ; O3DEFAULT-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[TMP23]], [[TMP3]]
1810 ; O3DEFAULT-NEXT: [[TMP25:%.*]] = bitcast i32* [[ARRAYIDX2_20]] to <4 x i32>*
1811 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP24]], <4 x i32>* [[TMP25]], align 4
1812 ; O3DEFAULT-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24
1813 ; O3DEFAULT-NEXT: [[ARRAYIDX2_24:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24
1814 ; O3DEFAULT-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARRAYIDX_24]] to <4 x i32>*
1815 ; O3DEFAULT-NEXT: [[TMP27:%.*]] = load <4 x i32>, <4 x i32>* [[TMP26]], align 4
1816 ; O3DEFAULT-NEXT: [[TMP28:%.*]] = add nsw <4 x i32> [[TMP27]], [[TMP3]]
1817 ; O3DEFAULT-NEXT: [[TMP29:%.*]] = bitcast i32* [[ARRAYIDX2_24]] to <4 x i32>*
1818 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP28]], <4 x i32>* [[TMP29]], align 4
1819 ; O3DEFAULT-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28
1820 ; O3DEFAULT-NEXT: [[ARRAYIDX2_28:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28
1821 ; O3DEFAULT-NEXT: [[TMP30:%.*]] = bitcast i32* [[ARRAYIDX_28]] to <4 x i32>*
1822 ; O3DEFAULT-NEXT: [[TMP31:%.*]] = load <4 x i32>, <4 x i32>* [[TMP30]], align 4
1823 ; O3DEFAULT-NEXT: [[TMP32:%.*]] = add nsw <4 x i32> [[TMP31]], [[TMP3]]
1824 ; O3DEFAULT-NEXT: [[TMP33:%.*]] = bitcast i32* [[ARRAYIDX2_28]] to <4 x i32>*
1825 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP32]], <4 x i32>* [[TMP33]], align 4
1826 ; O3DEFAULT-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32
1827 ; O3DEFAULT-NEXT: [[ARRAYIDX2_32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32
1828 ; O3DEFAULT-NEXT: [[TMP34:%.*]] = bitcast i32* [[ARRAYIDX_32]] to <4 x i32>*
1829 ; O3DEFAULT-NEXT: [[TMP35:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4
1830 ; O3DEFAULT-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[TMP35]], [[TMP3]]
1831 ; O3DEFAULT-NEXT: [[TMP37:%.*]] = bitcast i32* [[ARRAYIDX2_32]] to <4 x i32>*
1832 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP36]], <4 x i32>* [[TMP37]], align 4
1833 ; O3DEFAULT-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36
1834 ; O3DEFAULT-NEXT: [[ARRAYIDX2_36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36
1835 ; O3DEFAULT-NEXT: [[TMP38:%.*]] = bitcast i32* [[ARRAYIDX_36]] to <4 x i32>*
1836 ; O3DEFAULT-NEXT: [[TMP39:%.*]] = load <4 x i32>, <4 x i32>* [[TMP38]], align 4
1837 ; O3DEFAULT-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP3]]
1838 ; O3DEFAULT-NEXT: [[TMP41:%.*]] = bitcast i32* [[ARRAYIDX2_36]] to <4 x i32>*
1839 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP41]], align 4
1840 ; O3DEFAULT-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40
1841 ; O3DEFAULT-NEXT: [[ARRAYIDX2_40:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40
1842 ; O3DEFAULT-NEXT: [[TMP42:%.*]] = bitcast i32* [[ARRAYIDX_40]] to <4 x i32>*
1843 ; O3DEFAULT-NEXT: [[TMP43:%.*]] = load <4 x i32>, <4 x i32>* [[TMP42]], align 4
1844 ; O3DEFAULT-NEXT: [[TMP44:%.*]] = add nsw <4 x i32> [[TMP43]], [[TMP3]]
1845 ; O3DEFAULT-NEXT: [[TMP45:%.*]] = bitcast i32* [[ARRAYIDX2_40]] to <4 x i32>*
1846 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP44]], <4 x i32>* [[TMP45]], align 4
1847 ; O3DEFAULT-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44
1848 ; O3DEFAULT-NEXT: [[ARRAYIDX2_44:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44
1849 ; O3DEFAULT-NEXT: [[TMP46:%.*]] = bitcast i32* [[ARRAYIDX_44]] to <4 x i32>*
1850 ; O3DEFAULT-NEXT: [[TMP47:%.*]] = load <4 x i32>, <4 x i32>* [[TMP46]], align 4
1851 ; O3DEFAULT-NEXT: [[TMP48:%.*]] = add nsw <4 x i32> [[TMP47]], [[TMP3]]
1852 ; O3DEFAULT-NEXT: [[TMP49:%.*]] = bitcast i32* [[ARRAYIDX2_44]] to <4 x i32>*
1853 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP48]], <4 x i32>* [[TMP49]], align 4
1854 ; O3DEFAULT-NEXT: [[TMP50:%.*]] = load i32, i32* [[A]], align 4
1855 ; O3DEFAULT-NEXT: ret i32 [[TMP50]]
1857 ; Os-LABEL: @disabled(
1859 ; Os-NEXT: br label [[FOR_BODY:%.*]]
1861 ; Os-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1862 ; Os-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
1863 ; Os-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1864 ; Os-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1865 ; Os-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
1866 ; Os-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
1867 ; Os-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1868 ; Os-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1869 ; Os-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1871 ; Os-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
1872 ; Os-NEXT: ret i32 [[TMP1]]
1874 ; Oz-LABEL: @disabled(
1876 ; Oz-NEXT: br label [[FOR_BODY:%.*]]
1878 ; Oz-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1879 ; Oz-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
1880 ; Oz-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1881 ; Oz-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1882 ; Oz-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
1883 ; Oz-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
1884 ; Oz-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1885 ; Oz-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1886 ; Oz-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1888 ; Oz-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
1889 ; Oz-NEXT: ret i32 [[TMP1]]
1891 ; O1VEC2-LABEL: @disabled(
1892 ; O1VEC2-NEXT: entry:
1893 ; O1VEC2-NEXT: br label [[FOR_BODY:%.*]]
1895 ; O1VEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1896 ; O1VEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
1897 ; O1VEC2-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1898 ; O1VEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1899 ; O1VEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
1900 ; O1VEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
1901 ; O1VEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1902 ; O1VEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1903 ; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
1905 ; O1VEC2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
1906 ; O1VEC2-NEXT: ret i32 [[TMP1]]
1908 ; OzVEC2-LABEL: @disabled(
1909 ; OzVEC2-NEXT: entry:
1910 ; OzVEC2-NEXT: br label [[FOR_BODY:%.*]]
1912 ; OzVEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1913 ; OzVEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
1914 ; OzVEC2-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1915 ; OzVEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1916 ; OzVEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
1917 ; OzVEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
1918 ; OzVEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1919 ; OzVEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1920 ; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
1922 ; OzVEC2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
1923 ; OzVEC2-NEXT: ret i32 [[TMP1]]
1925 ; O3DIS-LABEL: @disabled(
1926 ; O3DIS-NEXT: entry:
1927 ; O3DIS-NEXT: br label [[FOR_BODY:%.*]]
1929 ; O3DIS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1930 ; O3DIS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]]
1931 ; O3DIS-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
1932 ; O3DIS-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1933 ; O3DIS-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
1934 ; O3DIS-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
1935 ; O3DIS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1936 ; O3DIS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1937 ; O3DIS-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1939 ; O3DIS-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
1940 ; O3DIS-NEXT: ret i32 [[TMP1]]
1945 for.body: ; preds = %for.body, %entry
1946 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1947 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
1948 %0 = load i32, i32* %arrayidx, align 4
1949 %add = add nsw i32 %0, %N
1950 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
1951 store i32 %add, i32* %arrayidx2, align 4
1952 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1953 %exitcond = icmp eq i64 %indvars.iv.next, 48
1954 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
1956 for.end: ; preds = %for.body
1957 %1 = load i32, i32* %a, align 4
1962 !1 = !{!"llvm.loop.vectorize.enable", i1 1}
1964 !3 = !{!"llvm.loop.vectorize.enable", i1 0}