1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mcpu=corei7 -passes="default<O1>" -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1
3 ; RUN: opt < %s -mcpu=corei7 -passes="default<O2>" -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O2
4 ; RUN: opt < %s -mcpu=corei7 -passes="default<O3>" -S -unroll-threshold=150 -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3
5 ; RUN: opt < %s -mcpu=corei7 -passes="default<O3>" -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DEFAULT
6 ; RUN: opt < %s -mcpu=corei7 -passes="default<Os>" -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Os
7 ; RUN: opt < %s -mcpu=corei7 -passes="default<Oz>" -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Oz
8 ; RUN: opt < %s -mcpu=corei7 -passes="default<O1>,loop-vectorize" -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC2
9 ; RUN: opt < %s -mcpu=corei7 -passes="default<Oz>,loop-vectorize" -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2
10 ; RUN: opt < %s -mcpu=corei7 -passes="default<O3>" -unroll-threshold=150 -vectorize-loops=false -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS
12 ; This file tests the llvm.loop.vectorize.enable metadata forcing
13 ; vectorization even when optimization levels are too low, or when
14 ; vectorization is disabled.
16 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
17 target triple = "x86_64-unknown-linux-gnu"
19 define i32 @enabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %N) {
22 ; O1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
23 ; O1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
24 ; O1-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
25 ; O1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
26 ; O1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
27 ; O1-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
28 ; O1-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
29 ; O1-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
30 ; O1-NEXT: store <4 x i32> [[TMP1]], ptr [[A]], align 4
31 ; O1-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
32 ; O1-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
33 ; O1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
34 ; O1-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
35 ; O1-NEXT: [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
36 ; O1-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
37 ; O1-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
38 ; O1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
39 ; O1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
40 ; O1-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
41 ; O1-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
42 ; O1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
43 ; O1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
44 ; O1-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
45 ; O1-NEXT: [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
46 ; O1-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
47 ; O1-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
48 ; O1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
49 ; O1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
50 ; O1-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
51 ; O1-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
52 ; O1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
53 ; O1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
54 ; O1-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
55 ; O1-NEXT: [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
56 ; O1-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
57 ; O1-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
58 ; O1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
59 ; O1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
60 ; O1-NEXT: store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
61 ; O1-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
62 ; O1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
63 ; O1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
64 ; O1-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
65 ; O1-NEXT: [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
66 ; O1-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
67 ; O1-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
68 ; O1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
69 ; O1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
70 ; O1-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
71 ; O1-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
72 ; O1-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
73 ; O1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
74 ; O1-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
75 ; O1-NEXT: [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
76 ; O1-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
77 ; O1-NEXT: [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
78 ; O1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
79 ; O1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
80 ; O1-NEXT: store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
81 ; O1-NEXT: store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
82 ; O1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
83 ; O1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
84 ; O1-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
85 ; O1-NEXT: [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
86 ; O1-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
87 ; O1-NEXT: [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
88 ; O1-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
89 ; O1-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
90 ; O1-NEXT: store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
91 ; O1-NEXT: store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
92 ; O1-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
93 ; O1-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
94 ; O1-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
95 ; O1-NEXT: [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
96 ; O1-NEXT: [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
97 ; O1-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
98 ; O1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
99 ; O1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
100 ; O1-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
101 ; O1-NEXT: store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
102 ; O1-NEXT: [[TMP46:%.*]] = load i32, ptr [[A]], align 4
103 ; O1-NEXT: ret i32 [[TMP46]]
105 ; O2-LABEL: @enabled(
107 ; O2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
108 ; O2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
109 ; O2-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
110 ; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
111 ; O2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
112 ; O2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
113 ; O2-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
114 ; O2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
115 ; O2-NEXT: store <4 x i32> [[TMP1]], ptr [[A]], align 4
116 ; O2-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
117 ; O2-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
118 ; O2-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
119 ; O2-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
120 ; O2-NEXT: [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
121 ; O2-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
122 ; O2-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
123 ; O2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
124 ; O2-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
125 ; O2-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
126 ; O2-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
127 ; O2-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
128 ; O2-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
129 ; O2-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
130 ; O2-NEXT: [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
131 ; O2-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
132 ; O2-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
133 ; O2-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
134 ; O2-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
135 ; O2-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
136 ; O2-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
137 ; O2-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
138 ; O2-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
139 ; O2-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
140 ; O2-NEXT: [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
141 ; O2-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
142 ; O2-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
143 ; O2-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
144 ; O2-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
145 ; O2-NEXT: store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
146 ; O2-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
147 ; O2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
148 ; O2-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
149 ; O2-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
150 ; O2-NEXT: [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
151 ; O2-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
152 ; O2-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
153 ; O2-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
154 ; O2-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
155 ; O2-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
156 ; O2-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
157 ; O2-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
158 ; O2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
159 ; O2-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
160 ; O2-NEXT: [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
161 ; O2-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
162 ; O2-NEXT: [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
163 ; O2-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
164 ; O2-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
165 ; O2-NEXT: store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
166 ; O2-NEXT: store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
167 ; O2-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
168 ; O2-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
169 ; O2-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
170 ; O2-NEXT: [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
171 ; O2-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
172 ; O2-NEXT: [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
173 ; O2-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
174 ; O2-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
175 ; O2-NEXT: store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
176 ; O2-NEXT: store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
177 ; O2-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
178 ; O2-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
179 ; O2-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
180 ; O2-NEXT: [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
181 ; O2-NEXT: [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
182 ; O2-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
183 ; O2-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
184 ; O2-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
185 ; O2-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
186 ; O2-NEXT: store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
187 ; O2-NEXT: [[TMP46:%.*]] = load i32, ptr [[A]], align 4
188 ; O2-NEXT: ret i32 [[TMP46]]
190 ; O3-LABEL: @enabled(
192 ; O3-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
193 ; O3-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
194 ; O3-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
195 ; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
196 ; O3-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
197 ; O3-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
198 ; O3-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
199 ; O3-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
200 ; O3-NEXT: store <4 x i32> [[TMP1]], ptr [[A]], align 4
201 ; O3-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
202 ; O3-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
203 ; O3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
204 ; O3-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
205 ; O3-NEXT: [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
206 ; O3-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
207 ; O3-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
208 ; O3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
209 ; O3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
210 ; O3-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
211 ; O3-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
212 ; O3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
213 ; O3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
214 ; O3-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
215 ; O3-NEXT: [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
216 ; O3-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
217 ; O3-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
218 ; O3-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
219 ; O3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
220 ; O3-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
221 ; O3-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
222 ; O3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
223 ; O3-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
224 ; O3-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
225 ; O3-NEXT: [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
226 ; O3-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
227 ; O3-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
228 ; O3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
229 ; O3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
230 ; O3-NEXT: store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
231 ; O3-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
232 ; O3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
233 ; O3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
234 ; O3-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
235 ; O3-NEXT: [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
236 ; O3-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
237 ; O3-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
238 ; O3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
239 ; O3-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
240 ; O3-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
241 ; O3-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
242 ; O3-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
243 ; O3-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
244 ; O3-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
245 ; O3-NEXT: [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
246 ; O3-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
247 ; O3-NEXT: [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
248 ; O3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
249 ; O3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
250 ; O3-NEXT: store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
251 ; O3-NEXT: store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
252 ; O3-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
253 ; O3-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
254 ; O3-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
255 ; O3-NEXT: [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
256 ; O3-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
257 ; O3-NEXT: [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
258 ; O3-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
259 ; O3-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
260 ; O3-NEXT: store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
261 ; O3-NEXT: store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
262 ; O3-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
263 ; O3-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
264 ; O3-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
265 ; O3-NEXT: [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
266 ; O3-NEXT: [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
267 ; O3-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
268 ; O3-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
269 ; O3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
270 ; O3-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
271 ; O3-NEXT: store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
272 ; O3-NEXT: [[TMP46:%.*]] = load i32, ptr [[A]], align 4
273 ; O3-NEXT: ret i32 [[TMP46]]
275 ; O3DEFAULT-LABEL: @enabled(
276 ; O3DEFAULT-NEXT: entry:
277 ; O3DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
278 ; O3DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
279 ; O3DEFAULT-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
280 ; O3DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
281 ; O3DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
282 ; O3DEFAULT-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
283 ; O3DEFAULT-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
284 ; O3DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
285 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP1]], ptr [[A]], align 4
286 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
287 ; O3DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
288 ; O3DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
289 ; O3DEFAULT-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
290 ; O3DEFAULT-NEXT: [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
291 ; O3DEFAULT-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
292 ; O3DEFAULT-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
293 ; O3DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
294 ; O3DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
295 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
296 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
297 ; O3DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
298 ; O3DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
299 ; O3DEFAULT-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
300 ; O3DEFAULT-NEXT: [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
301 ; O3DEFAULT-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
302 ; O3DEFAULT-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
303 ; O3DEFAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
304 ; O3DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
305 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
306 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
307 ; O3DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
308 ; O3DEFAULT-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
309 ; O3DEFAULT-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
310 ; O3DEFAULT-NEXT: [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
311 ; O3DEFAULT-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
312 ; O3DEFAULT-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
313 ; O3DEFAULT-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
314 ; O3DEFAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
315 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
316 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
317 ; O3DEFAULT-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
318 ; O3DEFAULT-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
319 ; O3DEFAULT-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
320 ; O3DEFAULT-NEXT: [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
321 ; O3DEFAULT-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
322 ; O3DEFAULT-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
323 ; O3DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
324 ; O3DEFAULT-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
325 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
326 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
327 ; O3DEFAULT-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
328 ; O3DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
329 ; O3DEFAULT-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
330 ; O3DEFAULT-NEXT: [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
331 ; O3DEFAULT-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
332 ; O3DEFAULT-NEXT: [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
333 ; O3DEFAULT-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
334 ; O3DEFAULT-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
335 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
336 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
337 ; O3DEFAULT-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
338 ; O3DEFAULT-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
339 ; O3DEFAULT-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
340 ; O3DEFAULT-NEXT: [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
341 ; O3DEFAULT-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
342 ; O3DEFAULT-NEXT: [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
343 ; O3DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
344 ; O3DEFAULT-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
345 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
346 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
347 ; O3DEFAULT-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
348 ; O3DEFAULT-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
349 ; O3DEFAULT-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
350 ; O3DEFAULT-NEXT: [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
351 ; O3DEFAULT-NEXT: [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
352 ; O3DEFAULT-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
353 ; O3DEFAULT-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
354 ; O3DEFAULT-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
355 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
356 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
357 ; O3DEFAULT-NEXT: [[TMP46:%.*]] = load i32, ptr [[A]], align 4
358 ; O3DEFAULT-NEXT: ret i32 [[TMP46]]
360 ; Os-LABEL: @enabled(
362 ; Os-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
363 ; Os-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
364 ; Os-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
365 ; Os-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
366 ; Os-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
367 ; Os-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
368 ; Os-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
369 ; Os-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
370 ; Os-NEXT: store <4 x i32> [[TMP1]], ptr [[A]], align 4
371 ; Os-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
372 ; Os-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
373 ; Os-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
374 ; Os-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
375 ; Os-NEXT: [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
376 ; Os-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
377 ; Os-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
378 ; Os-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
379 ; Os-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
380 ; Os-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
381 ; Os-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
382 ; Os-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
383 ; Os-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
384 ; Os-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
385 ; Os-NEXT: [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
386 ; Os-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
387 ; Os-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
388 ; Os-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
389 ; Os-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
390 ; Os-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
391 ; Os-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
392 ; Os-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
393 ; Os-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
394 ; Os-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
395 ; Os-NEXT: [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
396 ; Os-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
397 ; Os-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
398 ; Os-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
399 ; Os-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
400 ; Os-NEXT: store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
401 ; Os-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
402 ; Os-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
403 ; Os-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
404 ; Os-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
405 ; Os-NEXT: [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
406 ; Os-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
407 ; Os-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
408 ; Os-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
409 ; Os-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
410 ; Os-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
411 ; Os-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
412 ; Os-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
413 ; Os-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
414 ; Os-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
415 ; Os-NEXT: [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
416 ; Os-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
417 ; Os-NEXT: [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
418 ; Os-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
419 ; Os-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
420 ; Os-NEXT: store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
421 ; Os-NEXT: store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
422 ; Os-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
423 ; Os-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
424 ; Os-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
425 ; Os-NEXT: [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
426 ; Os-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
427 ; Os-NEXT: [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
428 ; Os-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
429 ; Os-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
430 ; Os-NEXT: store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
431 ; Os-NEXT: store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
432 ; Os-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
433 ; Os-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
434 ; Os-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
435 ; Os-NEXT: [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
436 ; Os-NEXT: [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
437 ; Os-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
438 ; Os-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
439 ; Os-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
440 ; Os-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
441 ; Os-NEXT: store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
442 ; Os-NEXT: [[TMP46:%.*]] = load i32, ptr [[A]], align 4
443 ; Os-NEXT: ret i32 [[TMP46]]
445 ; Oz-LABEL: @enabled(
447 ; Oz-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
448 ; Oz-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
449 ; Oz-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
450 ; Oz-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
451 ; Oz-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
452 ; Oz-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
453 ; Oz-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
454 ; Oz-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
455 ; Oz-NEXT: store <4 x i32> [[TMP1]], ptr [[A]], align 4
456 ; Oz-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
457 ; Oz-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
458 ; Oz-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
459 ; Oz-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
460 ; Oz-NEXT: [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
461 ; Oz-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
462 ; Oz-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
463 ; Oz-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
464 ; Oz-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
465 ; Oz-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
466 ; Oz-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
467 ; Oz-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
468 ; Oz-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
469 ; Oz-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
470 ; Oz-NEXT: [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
471 ; Oz-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
472 ; Oz-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
473 ; Oz-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
474 ; Oz-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
475 ; Oz-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
476 ; Oz-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
477 ; Oz-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
478 ; Oz-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
479 ; Oz-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
480 ; Oz-NEXT: [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
481 ; Oz-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
482 ; Oz-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
483 ; Oz-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
484 ; Oz-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
485 ; Oz-NEXT: store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
486 ; Oz-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
487 ; Oz-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
488 ; Oz-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
489 ; Oz-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
490 ; Oz-NEXT: [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
491 ; Oz-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
492 ; Oz-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
493 ; Oz-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
494 ; Oz-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
495 ; Oz-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
496 ; Oz-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
497 ; Oz-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
498 ; Oz-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
499 ; Oz-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
500 ; Oz-NEXT: [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
501 ; Oz-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
502 ; Oz-NEXT: [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
503 ; Oz-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
504 ; Oz-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
505 ; Oz-NEXT: store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
506 ; Oz-NEXT: store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
507 ; Oz-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
508 ; Oz-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
509 ; Oz-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
510 ; Oz-NEXT: [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
511 ; Oz-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
512 ; Oz-NEXT: [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
513 ; Oz-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
514 ; Oz-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
515 ; Oz-NEXT: store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
516 ; Oz-NEXT: store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
517 ; Oz-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
518 ; Oz-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
519 ; Oz-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
520 ; Oz-NEXT: [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
521 ; Oz-NEXT: [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
522 ; Oz-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
523 ; Oz-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
524 ; Oz-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
525 ; Oz-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
526 ; Oz-NEXT: store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
527 ; Oz-NEXT: [[TMP46:%.*]] = load i32, ptr [[A]], align 4
528 ; Oz-NEXT: ret i32 [[TMP46]]
530 ; O1VEC2-LABEL: @enabled(
531 ; O1VEC2-NEXT: entry:
532 ; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
533 ; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
534 ; O1VEC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
535 ; O1VEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
536 ; O1VEC2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
537 ; O1VEC2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
538 ; O1VEC2-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
539 ; O1VEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
540 ; O1VEC2-NEXT: store <4 x i32> [[TMP1]], ptr [[A]], align 4
541 ; O1VEC2-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
542 ; O1VEC2-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
543 ; O1VEC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
544 ; O1VEC2-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
545 ; O1VEC2-NEXT: [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
546 ; O1VEC2-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
547 ; O1VEC2-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
548 ; O1VEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
549 ; O1VEC2-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
550 ; O1VEC2-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
551 ; O1VEC2-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
552 ; O1VEC2-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
553 ; O1VEC2-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
554 ; O1VEC2-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
555 ; O1VEC2-NEXT: [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
556 ; O1VEC2-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
557 ; O1VEC2-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
558 ; O1VEC2-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
559 ; O1VEC2-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
560 ; O1VEC2-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
561 ; O1VEC2-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
562 ; O1VEC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
563 ; O1VEC2-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
564 ; O1VEC2-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
565 ; O1VEC2-NEXT: [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
566 ; O1VEC2-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
567 ; O1VEC2-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
568 ; O1VEC2-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
569 ; O1VEC2-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
570 ; O1VEC2-NEXT: store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
571 ; O1VEC2-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
572 ; O1VEC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
573 ; O1VEC2-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
574 ; O1VEC2-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
575 ; O1VEC2-NEXT: [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
576 ; O1VEC2-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
577 ; O1VEC2-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
578 ; O1VEC2-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
579 ; O1VEC2-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
580 ; O1VEC2-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
581 ; O1VEC2-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
582 ; O1VEC2-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
583 ; O1VEC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
584 ; O1VEC2-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
585 ; O1VEC2-NEXT: [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
586 ; O1VEC2-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
587 ; O1VEC2-NEXT: [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
588 ; O1VEC2-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
589 ; O1VEC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
590 ; O1VEC2-NEXT: store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
591 ; O1VEC2-NEXT: store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
592 ; O1VEC2-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
593 ; O1VEC2-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
594 ; O1VEC2-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
595 ; O1VEC2-NEXT: [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
596 ; O1VEC2-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
597 ; O1VEC2-NEXT: [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
598 ; O1VEC2-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
599 ; O1VEC2-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
600 ; O1VEC2-NEXT: store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
601 ; O1VEC2-NEXT: store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
602 ; O1VEC2-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
603 ; O1VEC2-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
604 ; O1VEC2-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
605 ; O1VEC2-NEXT: [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
606 ; O1VEC2-NEXT: [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
607 ; O1VEC2-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
608 ; O1VEC2-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
609 ; O1VEC2-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
610 ; O1VEC2-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
611 ; O1VEC2-NEXT: store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
612 ; O1VEC2-NEXT: [[TMP46:%.*]] = load i32, ptr [[A]], align 4
613 ; O1VEC2-NEXT: ret i32 [[TMP46]]
615 ; OzVEC2-LABEL: @enabled(
616 ; OzVEC2-NEXT: entry:
617 ; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
618 ; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
619 ; OzVEC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
620 ; OzVEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
621 ; OzVEC2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
622 ; OzVEC2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
623 ; OzVEC2-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
624 ; OzVEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
625 ; OzVEC2-NEXT: store <4 x i32> [[TMP1]], ptr [[A]], align 4
626 ; OzVEC2-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
627 ; OzVEC2-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
628 ; OzVEC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
629 ; OzVEC2-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
630 ; OzVEC2-NEXT: [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
631 ; OzVEC2-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
632 ; OzVEC2-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
633 ; OzVEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
634 ; OzVEC2-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
635 ; OzVEC2-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
636 ; OzVEC2-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
637 ; OzVEC2-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
638 ; OzVEC2-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
639 ; OzVEC2-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
640 ; OzVEC2-NEXT: [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
641 ; OzVEC2-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
642 ; OzVEC2-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
643 ; OzVEC2-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
644 ; OzVEC2-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
645 ; OzVEC2-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
646 ; OzVEC2-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
647 ; OzVEC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
648 ; OzVEC2-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
649 ; OzVEC2-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
650 ; OzVEC2-NEXT: [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
651 ; OzVEC2-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
652 ; OzVEC2-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
653 ; OzVEC2-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
654 ; OzVEC2-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
655 ; OzVEC2-NEXT: store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
656 ; OzVEC2-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
657 ; OzVEC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
658 ; OzVEC2-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
659 ; OzVEC2-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
660 ; OzVEC2-NEXT: [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
661 ; OzVEC2-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
662 ; OzVEC2-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
663 ; OzVEC2-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
664 ; OzVEC2-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
665 ; OzVEC2-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
666 ; OzVEC2-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
667 ; OzVEC2-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
668 ; OzVEC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
669 ; OzVEC2-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
670 ; OzVEC2-NEXT: [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
671 ; OzVEC2-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
672 ; OzVEC2-NEXT: [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
673 ; OzVEC2-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
674 ; OzVEC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
675 ; OzVEC2-NEXT: store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
676 ; OzVEC2-NEXT: store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
677 ; OzVEC2-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
678 ; OzVEC2-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
679 ; OzVEC2-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
680 ; OzVEC2-NEXT: [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
681 ; OzVEC2-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
682 ; OzVEC2-NEXT: [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
683 ; OzVEC2-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
684 ; OzVEC2-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
685 ; OzVEC2-NEXT: store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
686 ; OzVEC2-NEXT: store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
687 ; OzVEC2-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
688 ; OzVEC2-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
689 ; OzVEC2-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
690 ; OzVEC2-NEXT: [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
691 ; OzVEC2-NEXT: [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
692 ; OzVEC2-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
693 ; OzVEC2-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
694 ; OzVEC2-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
695 ; OzVEC2-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
696 ; OzVEC2-NEXT: store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
697 ; OzVEC2-NEXT: [[TMP46:%.*]] = load i32, ptr [[A]], align 4
698 ; OzVEC2-NEXT: ret i32 [[TMP46]]
700 ; O3DIS-LABEL: @enabled(
702 ; O3DIS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
703 ; O3DIS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
704 ; O3DIS-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
705 ; O3DIS-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
706 ; O3DIS-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
707 ; O3DIS-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
708 ; O3DIS-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
709 ; O3DIS-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
710 ; O3DIS-NEXT: store <4 x i32> [[TMP1]], ptr [[A]], align 4
711 ; O3DIS-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
712 ; O3DIS-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
713 ; O3DIS-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
714 ; O3DIS-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
715 ; O3DIS-NEXT: [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
716 ; O3DIS-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
717 ; O3DIS-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
718 ; O3DIS-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
719 ; O3DIS-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
720 ; O3DIS-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
721 ; O3DIS-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
722 ; O3DIS-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
723 ; O3DIS-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
724 ; O3DIS-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
725 ; O3DIS-NEXT: [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
726 ; O3DIS-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
727 ; O3DIS-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
728 ; O3DIS-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
729 ; O3DIS-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
730 ; O3DIS-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
731 ; O3DIS-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
732 ; O3DIS-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
733 ; O3DIS-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
734 ; O3DIS-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
735 ; O3DIS-NEXT: [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
736 ; O3DIS-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
737 ; O3DIS-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
738 ; O3DIS-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
739 ; O3DIS-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
740 ; O3DIS-NEXT: store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
741 ; O3DIS-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
742 ; O3DIS-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
743 ; O3DIS-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
744 ; O3DIS-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
745 ; O3DIS-NEXT: [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
746 ; O3DIS-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
747 ; O3DIS-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
748 ; O3DIS-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
749 ; O3DIS-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
750 ; O3DIS-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
751 ; O3DIS-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
752 ; O3DIS-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
753 ; O3DIS-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
754 ; O3DIS-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
755 ; O3DIS-NEXT: [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
756 ; O3DIS-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
757 ; O3DIS-NEXT: [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
758 ; O3DIS-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
759 ; O3DIS-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
760 ; O3DIS-NEXT: store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
761 ; O3DIS-NEXT: store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
762 ; O3DIS-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
763 ; O3DIS-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
764 ; O3DIS-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
765 ; O3DIS-NEXT: [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
766 ; O3DIS-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
767 ; O3DIS-NEXT: [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
768 ; O3DIS-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
769 ; O3DIS-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
770 ; O3DIS-NEXT: store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
771 ; O3DIS-NEXT: store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
772 ; O3DIS-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
773 ; O3DIS-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
774 ; O3DIS-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
775 ; O3DIS-NEXT: [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
776 ; O3DIS-NEXT: [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
777 ; O3DIS-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
778 ; O3DIS-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
779 ; O3DIS-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
780 ; O3DIS-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
781 ; O3DIS-NEXT: store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
782 ; O3DIS-NEXT: [[TMP46:%.*]] = load i32, ptr [[A]], align 4
783 ; O3DIS-NEXT: ret i32 [[TMP46]]
788 for.body: ; preds = %for.body, %entry
789 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
790 %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
791 %0 = load i32, ptr %arrayidx, align 4
792 %add = add nsw i32 %0, %N
793 %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
794 store i32 %add, ptr %arrayidx2, align 4
795 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
796 %exitcond = icmp eq i64 %indvars.iv.next, 64
797 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
799 for.end: ; preds = %for.body
800 %1 = load i32, ptr %a, align 4
804 define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %N) {
805 ; O1-LABEL: @nopragma(
807 ; O1-NEXT: br label [[FOR_BODY:%.*]]
809 ; O1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
810 ; O1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
811 ; O1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
812 ; O1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
813 ; O1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
814 ; O1-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
815 ; O1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
816 ; O1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
817 ; O1-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
819 ; O1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
820 ; O1-NEXT: ret i32 [[TMP1]]
822 ; O2-LABEL: @nopragma(
824 ; O2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
825 ; O2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
826 ; O2-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
827 ; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
828 ; O2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
829 ; O2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
830 ; O2-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
831 ; O2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
832 ; O2-NEXT: store <4 x i32> [[TMP1]], ptr [[A]], align 4
833 ; O2-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
834 ; O2-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
835 ; O2-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
836 ; O2-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
837 ; O2-NEXT: [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
838 ; O2-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
839 ; O2-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
840 ; O2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
841 ; O2-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
842 ; O2-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
843 ; O2-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
844 ; O2-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
845 ; O2-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
846 ; O2-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
847 ; O2-NEXT: [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
848 ; O2-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
849 ; O2-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
850 ; O2-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
851 ; O2-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
852 ; O2-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
853 ; O2-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
854 ; O2-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
855 ; O2-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
856 ; O2-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
857 ; O2-NEXT: [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
858 ; O2-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
859 ; O2-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
860 ; O2-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
861 ; O2-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
862 ; O2-NEXT: store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
863 ; O2-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
864 ; O2-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
865 ; O2-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
866 ; O2-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
867 ; O2-NEXT: [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
868 ; O2-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
869 ; O2-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
870 ; O2-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
871 ; O2-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
872 ; O2-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
873 ; O2-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
874 ; O2-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
875 ; O2-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
876 ; O2-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
877 ; O2-NEXT: [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
878 ; O2-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
879 ; O2-NEXT: [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
880 ; O2-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
881 ; O2-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
882 ; O2-NEXT: store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
883 ; O2-NEXT: store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
884 ; O2-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
885 ; O2-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
886 ; O2-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
887 ; O2-NEXT: [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
888 ; O2-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
889 ; O2-NEXT: [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
890 ; O2-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
891 ; O2-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
892 ; O2-NEXT: store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
893 ; O2-NEXT: store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
894 ; O2-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
895 ; O2-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
896 ; O2-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
897 ; O2-NEXT: [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
898 ; O2-NEXT: [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
899 ; O2-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
900 ; O2-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
901 ; O2-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
902 ; O2-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
903 ; O2-NEXT: store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
904 ; O2-NEXT: [[TMP46:%.*]] = load i32, ptr [[A]], align 4
905 ; O2-NEXT: ret i32 [[TMP46]]
907 ; O3-LABEL: @nopragma(
909 ; O3-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
910 ; O3-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
911 ; O3-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
912 ; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
913 ; O3-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
914 ; O3-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
915 ; O3-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
916 ; O3-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
917 ; O3-NEXT: store <4 x i32> [[TMP1]], ptr [[A]], align 4
918 ; O3-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
919 ; O3-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
920 ; O3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
921 ; O3-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
922 ; O3-NEXT: [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
923 ; O3-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
924 ; O3-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
925 ; O3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
926 ; O3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
927 ; O3-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
928 ; O3-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
929 ; O3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
930 ; O3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
931 ; O3-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
932 ; O3-NEXT: [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
933 ; O3-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
934 ; O3-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
935 ; O3-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
936 ; O3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
937 ; O3-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
938 ; O3-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
939 ; O3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
940 ; O3-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
941 ; O3-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
942 ; O3-NEXT: [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
943 ; O3-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
944 ; O3-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
945 ; O3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
946 ; O3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
947 ; O3-NEXT: store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
948 ; O3-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
949 ; O3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
950 ; O3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
951 ; O3-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
952 ; O3-NEXT: [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
953 ; O3-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
954 ; O3-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
955 ; O3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
956 ; O3-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
957 ; O3-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
958 ; O3-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
959 ; O3-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
960 ; O3-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
961 ; O3-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
962 ; O3-NEXT: [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
963 ; O3-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
964 ; O3-NEXT: [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
965 ; O3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
966 ; O3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
967 ; O3-NEXT: store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
968 ; O3-NEXT: store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
969 ; O3-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
970 ; O3-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
971 ; O3-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
972 ; O3-NEXT: [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
973 ; O3-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
974 ; O3-NEXT: [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
975 ; O3-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
976 ; O3-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
977 ; O3-NEXT: store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
978 ; O3-NEXT: store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
979 ; O3-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
980 ; O3-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
981 ; O3-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
982 ; O3-NEXT: [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
983 ; O3-NEXT: [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
984 ; O3-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
985 ; O3-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
986 ; O3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
987 ; O3-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
988 ; O3-NEXT: store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
989 ; O3-NEXT: [[TMP46:%.*]] = load i32, ptr [[A]], align 4
990 ; O3-NEXT: ret i32 [[TMP46]]
992 ; O3DEFAULT-LABEL: @nopragma(
993 ; O3DEFAULT-NEXT: entry:
994 ; O3DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
995 ; O3DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
996 ; O3DEFAULT-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
997 ; O3DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
998 ; O3DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
999 ; O3DEFAULT-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1000 ; O3DEFAULT-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
1001 ; O3DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
1002 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP1]], ptr [[A]], align 4
1003 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
1004 ; O3DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
1005 ; O3DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
1006 ; O3DEFAULT-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
1007 ; O3DEFAULT-NEXT: [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
1008 ; O3DEFAULT-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
1009 ; O3DEFAULT-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
1010 ; O3DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
1011 ; O3DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
1012 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
1013 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
1014 ; O3DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
1015 ; O3DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
1016 ; O3DEFAULT-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
1017 ; O3DEFAULT-NEXT: [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
1018 ; O3DEFAULT-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
1019 ; O3DEFAULT-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
1020 ; O3DEFAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
1021 ; O3DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
1022 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
1023 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
1024 ; O3DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
1025 ; O3DEFAULT-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
1026 ; O3DEFAULT-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
1027 ; O3DEFAULT-NEXT: [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
1028 ; O3DEFAULT-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
1029 ; O3DEFAULT-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
1030 ; O3DEFAULT-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
1031 ; O3DEFAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
1032 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
1033 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
1034 ; O3DEFAULT-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
1035 ; O3DEFAULT-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
1036 ; O3DEFAULT-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
1037 ; O3DEFAULT-NEXT: [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
1038 ; O3DEFAULT-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
1039 ; O3DEFAULT-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
1040 ; O3DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
1041 ; O3DEFAULT-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
1042 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
1043 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
1044 ; O3DEFAULT-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
1045 ; O3DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
1046 ; O3DEFAULT-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
1047 ; O3DEFAULT-NEXT: [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
1048 ; O3DEFAULT-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
1049 ; O3DEFAULT-NEXT: [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
1050 ; O3DEFAULT-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
1051 ; O3DEFAULT-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
1052 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
1053 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
1054 ; O3DEFAULT-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
1055 ; O3DEFAULT-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
1056 ; O3DEFAULT-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
1057 ; O3DEFAULT-NEXT: [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
1058 ; O3DEFAULT-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
1059 ; O3DEFAULT-NEXT: [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
1060 ; O3DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
1061 ; O3DEFAULT-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
1062 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
1063 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
1064 ; O3DEFAULT-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
1065 ; O3DEFAULT-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
1066 ; O3DEFAULT-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
1067 ; O3DEFAULT-NEXT: [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
1068 ; O3DEFAULT-NEXT: [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
1069 ; O3DEFAULT-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
1070 ; O3DEFAULT-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
1071 ; O3DEFAULT-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
1072 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
1073 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
1074 ; O3DEFAULT-NEXT: [[TMP46:%.*]] = load i32, ptr [[A]], align 4
1075 ; O3DEFAULT-NEXT: ret i32 [[TMP46]]
1077 ; Os-LABEL: @nopragma(
1079 ; Os-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
1080 ; Os-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1081 ; Os-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 16
1082 ; Os-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[B]], align 4
1083 ; Os-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
1084 ; Os-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1085 ; Os-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
1086 ; Os-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[A:%.*]], i64 16
1087 ; Os-NEXT: store <4 x i32> [[TMP1]], ptr [[A]], align 4
1088 ; Os-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4
1089 ; Os-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
1090 ; Os-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
1091 ; Os-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
1092 ; Os-NEXT: [[WIDE_LOAD1_1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
1093 ; Os-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]]
1094 ; Os-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_1]], [[BROADCAST_SPLAT]]
1095 ; Os-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
1096 ; Os-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
1097 ; Os-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
1098 ; Os-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
1099 ; Os-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
1100 ; Os-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
1101 ; Os-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4
1102 ; Os-NEXT: [[WIDE_LOAD1_2:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
1103 ; Os-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]]
1104 ; Os-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_2]], [[BROADCAST_SPLAT]]
1105 ; Os-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
1106 ; Os-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
1107 ; Os-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4
1108 ; Os-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4
1109 ; Os-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
1110 ; Os-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
1111 ; Os-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
1112 ; Os-NEXT: [[WIDE_LOAD1_3:%.*]] = load <4 x i32>, ptr [[TMP17]], align 4
1113 ; Os-NEXT: [[TMP18:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]]
1114 ; Os-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_3]], [[BROADCAST_SPLAT]]
1115 ; Os-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
1116 ; Os-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
1117 ; Os-NEXT: store <4 x i32> [[TMP18]], ptr [[TMP20]], align 4
1118 ; Os-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4
1119 ; Os-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
1120 ; Os-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
1121 ; Os-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4
1122 ; Os-NEXT: [[WIDE_LOAD1_4:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
1123 ; Os-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]]
1124 ; Os-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_4]], [[BROADCAST_SPLAT]]
1125 ; Os-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
1126 ; Os-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
1127 ; Os-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4
1128 ; Os-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP27]], align 4
1129 ; Os-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
1130 ; Os-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
1131 ; Os-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4
1132 ; Os-NEXT: [[WIDE_LOAD1_5:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4
1133 ; Os-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]]
1134 ; Os-NEXT: [[TMP31:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_5]], [[BROADCAST_SPLAT]]
1135 ; Os-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
1136 ; Os-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
1137 ; Os-NEXT: store <4 x i32> [[TMP30]], ptr [[TMP32]], align 4
1138 ; Os-NEXT: store <4 x i32> [[TMP31]], ptr [[TMP33]], align 4
1139 ; Os-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 192
1140 ; Os-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 208
1141 ; Os-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, ptr [[TMP34]], align 4
1142 ; Os-NEXT: [[WIDE_LOAD1_6:%.*]] = load <4 x i32>, ptr [[TMP35]], align 4
1143 ; Os-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]]
1144 ; Os-NEXT: [[TMP37:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_6]], [[BROADCAST_SPLAT]]
1145 ; Os-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 192
1146 ; Os-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 208
1147 ; Os-NEXT: store <4 x i32> [[TMP36]], ptr [[TMP38]], align 4
1148 ; Os-NEXT: store <4 x i32> [[TMP37]], ptr [[TMP39]], align 4
1149 ; Os-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 224
1150 ; Os-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 240
1151 ; Os-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, ptr [[TMP40]], align 4
1152 ; Os-NEXT: [[WIDE_LOAD1_7:%.*]] = load <4 x i32>, ptr [[TMP41]], align 4
1153 ; Os-NEXT: [[TMP42:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]]
1154 ; Os-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1_7]], [[BROADCAST_SPLAT]]
1155 ; Os-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 224
1156 ; Os-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 240
1157 ; Os-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP44]], align 4
1158 ; Os-NEXT: store <4 x i32> [[TMP43]], ptr [[TMP45]], align 4
1159 ; Os-NEXT: [[TMP46:%.*]] = load i32, ptr [[A]], align 4
1160 ; Os-NEXT: ret i32 [[TMP46]]
1162 ; Oz-LABEL: @nopragma(
1164 ; Oz-NEXT: br label [[FOR_BODY:%.*]]
1166 ; Oz-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1167 ; Oz-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1168 ; Oz-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1169 ; Oz-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1170 ; Oz-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1171 ; Oz-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1172 ; Oz-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1173 ; Oz-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
1174 ; Oz-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
1176 ; Oz-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1177 ; Oz-NEXT: ret i32 [[TMP1]]
1179 ; O1VEC2-LABEL: @nopragma(
1180 ; O1VEC2-NEXT: entry:
1181 ; O1VEC2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1182 ; O1VEC2: vector.ph:
1183 ; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
1184 ; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1185 ; O1VEC2-NEXT: br label [[VECTOR_BODY:%.*]]
1186 ; O1VEC2: vector.body:
1187 ; O1VEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1188 ; O1VEC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1189 ; O1VEC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
1190 ; O1VEC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
1191 ; O1VEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4
1192 ; O1VEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
1193 ; O1VEC2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
1194 ; O1VEC2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1195 ; O1VEC2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
1196 ; O1VEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
1197 ; O1VEC2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
1198 ; O1VEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 4
1199 ; O1VEC2-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP7]], align 4
1200 ; O1VEC2-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP8]], align 4
1201 ; O1VEC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1202 ; O1VEC2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
1203 ; O1VEC2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1204 ; O1VEC2: middle.block:
1205 ; O1VEC2-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1206 ; O1VEC2: scalar.ph:
1207 ; O1VEC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1208 ; O1VEC2-NEXT: br label [[FOR_BODY:%.*]]
1210 ; O1VEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1211 ; O1VEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]]
1212 ; O1VEC2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1213 ; O1VEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[N]]
1214 ; O1VEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV]]
1215 ; O1VEC2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1216 ; O1VEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1217 ; O1VEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
1218 ; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
1220 ; O1VEC2-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4
1221 ; O1VEC2-NEXT: ret i32 [[TMP11]]
1223 ; OzVEC2-LABEL: @nopragma(
1224 ; OzVEC2-NEXT: entry:
1225 ; OzVEC2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1226 ; OzVEC2: vector.ph:
1227 ; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
1228 ; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1229 ; OzVEC2-NEXT: br label [[VECTOR_BODY:%.*]]
1230 ; OzVEC2: vector.body:
1231 ; OzVEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1232 ; OzVEC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1233 ; OzVEC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
1234 ; OzVEC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
1235 ; OzVEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4
1236 ; OzVEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
1237 ; OzVEC2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
1238 ; OzVEC2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1239 ; OzVEC2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]]
1240 ; OzVEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
1241 ; OzVEC2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
1242 ; OzVEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 4
1243 ; OzVEC2-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP7]], align 4
1244 ; OzVEC2-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP8]], align 4
1245 ; OzVEC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1246 ; OzVEC2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
1247 ; OzVEC2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1248 ; OzVEC2: middle.block:
1249 ; OzVEC2-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1250 ; OzVEC2: scalar.ph:
1251 ; OzVEC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1252 ; OzVEC2-NEXT: br label [[FOR_BODY:%.*]]
1254 ; OzVEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1255 ; OzVEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS_IV]]
1256 ; OzVEC2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1257 ; OzVEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[N]]
1258 ; OzVEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS_IV]]
1259 ; OzVEC2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1260 ; OzVEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1261 ; OzVEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
1262 ; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
1264 ; OzVEC2-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4
1265 ; OzVEC2-NEXT: ret i32 [[TMP11]]
1267 ; O3DIS-LABEL: @nopragma(
1268 ; O3DIS-NEXT: entry:
1269 ; O3DIS-NEXT: br label [[FOR_BODY:%.*]]
1271 ; O3DIS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1272 ; O3DIS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1273 ; O3DIS-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1274 ; O3DIS-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1275 ; O3DIS-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1276 ; O3DIS-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1277 ; O3DIS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1278 ; O3DIS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64
1279 ; O3DIS-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
1281 ; O3DIS-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1282 ; O3DIS-NEXT: ret i32 [[TMP1]]
1287 for.body: ; preds = %for.body, %entry
1288 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1289 %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
1290 %0 = load i32, ptr %arrayidx, align 4
1291 %add = add nsw i32 %0, %N
1292 %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
1293 store i32 %add, ptr %arrayidx2, align 4
1294 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1295 %exitcond = icmp eq i64 %indvars.iv.next, 64
1296 br i1 %exitcond, label %for.end, label %for.body
1298 for.end: ; preds = %for.body
1299 %1 = load i32, ptr %a, align 4
1303 define i32 @disabled(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %N) {
1304 ; O1-LABEL: @disabled(
1306 ; O1-NEXT: br label [[FOR_BODY:%.*]]
1308 ; O1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1309 ; O1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1310 ; O1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1311 ; O1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1312 ; O1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1313 ; O1-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1314 ; O1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1315 ; O1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1316 ; O1-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1318 ; O1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1319 ; O1-NEXT: ret i32 [[TMP1]]
1321 ; O2-LABEL: @disabled(
1323 ; O2-NEXT: br label [[FOR_BODY:%.*]]
1325 ; O2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1326 ; O2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1327 ; O2-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1328 ; O2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1329 ; O2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1330 ; O2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1331 ; O2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1332 ; O2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1333 ; O2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1335 ; O2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1336 ; O2-NEXT: ret i32 [[TMP1]]
1338 ; O3-LABEL: @disabled(
1340 ; O3-NEXT: br label [[FOR_BODY:%.*]]
1342 ; O3-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1343 ; O3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1344 ; O3-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1345 ; O3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1346 ; O3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1347 ; O3-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1348 ; O3-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1349 ; O3-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1350 ; O3-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1352 ; O3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1353 ; O3-NEXT: ret i32 [[TMP1]]
1355 ; O3DEFAULT-LABEL: @disabled(
1356 ; O3DEFAULT-NEXT: entry:
1357 ; O3DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
1358 ; O3DEFAULT-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0
1359 ; O3DEFAULT-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
1360 ; O3DEFAULT-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP0]], [[TMP2]]
1361 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP3]], ptr [[A:%.*]], align 4
1362 ; O3DEFAULT-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 16
1363 ; O3DEFAULT-NEXT: [[ARRAYIDX2_4:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 16
1364 ; O3DEFAULT-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_4]], align 4
1365 ; O3DEFAULT-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP2]]
1366 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP5]], ptr [[ARRAYIDX2_4]], align 4
1367 ; O3DEFAULT-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 32
1368 ; O3DEFAULT-NEXT: [[ARRAYIDX2_8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 32
1369 ; O3DEFAULT-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_8]], align 4
1370 ; O3DEFAULT-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP2]]
1371 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP7]], ptr [[ARRAYIDX2_8]], align 4
1372 ; O3DEFAULT-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 48
1373 ; O3DEFAULT-NEXT: [[ARRAYIDX2_12:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 48
1374 ; O3DEFAULT-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_12]], align 4
1375 ; O3DEFAULT-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP2]]
1376 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP9]], ptr [[ARRAYIDX2_12]], align 4
1377 ; O3DEFAULT-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 64
1378 ; O3DEFAULT-NEXT: [[ARRAYIDX2_16:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 64
1379 ; O3DEFAULT-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_16]], align 4
1380 ; O3DEFAULT-NEXT: [[TMP11:%.*]] = add nsw <4 x i32> [[TMP10]], [[TMP2]]
1381 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP11]], ptr [[ARRAYIDX2_16]], align 4
1382 ; O3DEFAULT-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 80
1383 ; O3DEFAULT-NEXT: [[ARRAYIDX2_20:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 80
1384 ; O3DEFAULT-NEXT: [[TMP12:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_20]], align 4
1385 ; O3DEFAULT-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[TMP12]], [[TMP2]]
1386 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP13]], ptr [[ARRAYIDX2_20]], align 4
1387 ; O3DEFAULT-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 96
1388 ; O3DEFAULT-NEXT: [[ARRAYIDX2_24:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 96
1389 ; O3DEFAULT-NEXT: [[TMP14:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_24]], align 4
1390 ; O3DEFAULT-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], [[TMP2]]
1391 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP15]], ptr [[ARRAYIDX2_24]], align 4
1392 ; O3DEFAULT-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 112
1393 ; O3DEFAULT-NEXT: [[ARRAYIDX2_28:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 112
1394 ; O3DEFAULT-NEXT: [[TMP16:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_28]], align 4
1395 ; O3DEFAULT-NEXT: [[TMP17:%.*]] = add nsw <4 x i32> [[TMP16]], [[TMP2]]
1396 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP17]], ptr [[ARRAYIDX2_28]], align 4
1397 ; O3DEFAULT-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 128
1398 ; O3DEFAULT-NEXT: [[ARRAYIDX2_32:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 128
1399 ; O3DEFAULT-NEXT: [[TMP18:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_32]], align 4
1400 ; O3DEFAULT-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[TMP18]], [[TMP2]]
1401 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP19]], ptr [[ARRAYIDX2_32]], align 4
1402 ; O3DEFAULT-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 144
1403 ; O3DEFAULT-NEXT: [[ARRAYIDX2_36:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 144
1404 ; O3DEFAULT-NEXT: [[TMP20:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_36]], align 4
1405 ; O3DEFAULT-NEXT: [[TMP21:%.*]] = add nsw <4 x i32> [[TMP20]], [[TMP2]]
1406 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP21]], ptr [[ARRAYIDX2_36]], align 4
1407 ; O3DEFAULT-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 160
1408 ; O3DEFAULT-NEXT: [[ARRAYIDX2_40:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 160
1409 ; O3DEFAULT-NEXT: [[TMP22:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_40]], align 4
1410 ; O3DEFAULT-NEXT: [[TMP23:%.*]] = add nsw <4 x i32> [[TMP22]], [[TMP2]]
1411 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP23]], ptr [[ARRAYIDX2_40]], align 4
1412 ; O3DEFAULT-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 176
1413 ; O3DEFAULT-NEXT: [[ARRAYIDX2_44:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 176
1414 ; O3DEFAULT-NEXT: [[TMP24:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_44]], align 4
1415 ; O3DEFAULT-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[TMP24]], [[TMP2]]
1416 ; O3DEFAULT-NEXT: store <4 x i32> [[TMP25]], ptr [[ARRAYIDX2_44]], align 4
1417 ; O3DEFAULT-NEXT: [[TMP26:%.*]] = load i32, ptr [[A]], align 4
1418 ; O3DEFAULT-NEXT: ret i32 [[TMP26]]
1420 ; Os-LABEL: @disabled(
1422 ; Os-NEXT: br label [[FOR_BODY:%.*]]
1424 ; Os-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1425 ; Os-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1426 ; Os-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1427 ; Os-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1428 ; Os-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1429 ; Os-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1430 ; Os-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1431 ; Os-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1432 ; Os-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1434 ; Os-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1435 ; Os-NEXT: ret i32 [[TMP1]]
1437 ; Oz-LABEL: @disabled(
1439 ; Oz-NEXT: br label [[FOR_BODY:%.*]]
1441 ; Oz-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1442 ; Oz-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1443 ; Oz-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1444 ; Oz-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1445 ; Oz-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1446 ; Oz-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1447 ; Oz-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1448 ; Oz-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1449 ; Oz-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1451 ; Oz-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1452 ; Oz-NEXT: ret i32 [[TMP1]]
1454 ; O1VEC2-LABEL: @disabled(
1455 ; O1VEC2-NEXT: entry:
1456 ; O1VEC2-NEXT: br label [[FOR_BODY:%.*]]
1458 ; O1VEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1459 ; O1VEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1460 ; O1VEC2-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1461 ; O1VEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1462 ; O1VEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1463 ; O1VEC2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1464 ; O1VEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1465 ; O1VEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1466 ; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
1468 ; O1VEC2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1469 ; O1VEC2-NEXT: ret i32 [[TMP1]]
1471 ; OzVEC2-LABEL: @disabled(
1472 ; OzVEC2-NEXT: entry:
1473 ; OzVEC2-NEXT: br label [[FOR_BODY:%.*]]
1475 ; OzVEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1476 ; OzVEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1477 ; OzVEC2-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1478 ; OzVEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1479 ; OzVEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1480 ; OzVEC2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1481 ; OzVEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1482 ; OzVEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1483 ; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
1485 ; OzVEC2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1486 ; OzVEC2-NEXT: ret i32 [[TMP1]]
1488 ; O3DIS-LABEL: @disabled(
1489 ; O3DIS-NEXT: entry:
1490 ; O3DIS-NEXT: br label [[FOR_BODY:%.*]]
1492 ; O3DIS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1493 ; O3DIS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDVARS_IV]]
1494 ; O3DIS-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1495 ; O3DIS-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]]
1496 ; O3DIS-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
1497 ; O3DIS-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4
1498 ; O3DIS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1499 ; O3DIS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48
1500 ; O3DIS-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
1502 ; O3DIS-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
1503 ; O3DIS-NEXT: ret i32 [[TMP1]]
1508 for.body: ; preds = %for.body, %entry
1509 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1510 %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
1511 %0 = load i32, ptr %arrayidx, align 4
1512 %add = add nsw i32 %0, %N
1513 %arrayidx2 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
1514 store i32 %add, ptr %arrayidx2, align 4
1515 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1516 %exitcond = icmp eq i64 %indvars.iv.next, 48
1517 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
1519 for.end: ; preds = %for.body
1520 %1 = load i32, ptr %a, align 4
1525 !1 = !{!"llvm.loop.vectorize.enable", i1 1}
1527 !3 = !{!"llvm.loop.vectorize.enable", i1 0}