1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -scalarize-min-bits=16 -S | FileCheck %s --check-prefixes=CHECK,MIN16
3 ; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -scalarize-min-bits=32 -S | FileCheck %s --check-prefixes=CHECK,MIN32
4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
6 define void @load_add_store_v2i16(ptr %pa, ptr %pb) {
7 ; MIN16-LABEL: @load_add_store_v2i16(
8 ; MIN16-NEXT: [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1
9 ; MIN16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8
10 ; MIN16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
11 ; MIN16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
12 ; MIN16-NEXT: [[B_I0:%.*]] = load i16, ptr [[PB]], align 8
13 ; MIN16-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2
14 ; MIN16-NEXT: [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]]
15 ; MIN16-NEXT: [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]]
16 ; MIN16-NEXT: store i16 [[C_I0]], ptr [[PA]], align 8
17 ; MIN16-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 2
18 ; MIN16-NEXT: ret void
20 ; MIN32-LABEL: @load_add_store_v2i16(
21 ; MIN32-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8
22 ; MIN32-NEXT: [[B:%.*]] = load <2 x i16>, ptr [[PB:%.*]], align 8
23 ; MIN32-NEXT: [[C:%.*]] = add <2 x i16> [[A]], [[B]]
24 ; MIN32-NEXT: store <2 x i16> [[C]], ptr [[PA]], align 8
25 ; MIN32-NEXT: ret void
27 %a = load <2 x i16>, ptr %pa, align 8
28 %b = load <2 x i16>, ptr %pb, align 8
29 %c = add <2 x i16> %a, %b
30 store <2 x i16> %c, ptr %pa, align 8
34 define void @load_add_store_v3i16(ptr %pa, ptr %pb) {
35 ; MIN16-LABEL: @load_add_store_v3i16(
36 ; MIN16-NEXT: [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1
37 ; MIN16-NEXT: [[PB_I2:%.*]] = getelementptr i16, ptr [[PB]], i32 2
38 ; MIN16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8
39 ; MIN16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
40 ; MIN16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
41 ; MIN16-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2
42 ; MIN16-NEXT: [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4
43 ; MIN16-NEXT: [[B_I0:%.*]] = load i16, ptr [[PB]], align 8
44 ; MIN16-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2
45 ; MIN16-NEXT: [[B_I2:%.*]] = load i16, ptr [[PB_I2]], align 4
46 ; MIN16-NEXT: [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]]
47 ; MIN16-NEXT: [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]]
48 ; MIN16-NEXT: [[C_I2:%.*]] = add i16 [[A_I2]], [[B_I2]]
49 ; MIN16-NEXT: store i16 [[C_I0]], ptr [[PA]], align 8
50 ; MIN16-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 2
51 ; MIN16-NEXT: store i16 [[C_I2]], ptr [[PA_I2]], align 4
52 ; MIN16-NEXT: ret void
54 ; MIN32-LABEL: @load_add_store_v3i16(
55 ; MIN32-NEXT: [[PB_I1:%.*]] = getelementptr <2 x i16>, ptr [[PB:%.*]], i32 1
56 ; MIN32-NEXT: [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8
57 ; MIN32-NEXT: [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1
58 ; MIN32-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 4
59 ; MIN32-NEXT: [[B_I0:%.*]] = load <2 x i16>, ptr [[PB]], align 8
60 ; MIN32-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 4
61 ; MIN32-NEXT: [[C_I0:%.*]] = add <2 x i16> [[A_I0]], [[B_I0]]
62 ; MIN32-NEXT: [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]]
63 ; MIN32-NEXT: store <2 x i16> [[C_I0]], ptr [[PA]], align 8
64 ; MIN32-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 4
65 ; MIN32-NEXT: ret void
67 %a = load <3 x i16>, ptr %pa, align 8
68 %b = load <3 x i16>, ptr %pb, align 8
69 %c = add <3 x i16> %a, %b
70 store <3 x i16> %c, ptr %pa, align 8
74 define void @load_add_store_v4i16(ptr %pa, ptr %pb) {
75 ; MIN16-LABEL: @load_add_store_v4i16(
76 ; MIN16-NEXT: [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1
77 ; MIN16-NEXT: [[PB_I2:%.*]] = getelementptr i16, ptr [[PB]], i32 2
78 ; MIN16-NEXT: [[PB_I3:%.*]] = getelementptr i16, ptr [[PB]], i32 3
79 ; MIN16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8
80 ; MIN16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
81 ; MIN16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
82 ; MIN16-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2
83 ; MIN16-NEXT: [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4
84 ; MIN16-NEXT: [[PA_I3:%.*]] = getelementptr i16, ptr [[PA]], i32 3
85 ; MIN16-NEXT: [[A_I3:%.*]] = load i16, ptr [[PA_I3]], align 2
86 ; MIN16-NEXT: [[B_I0:%.*]] = load i16, ptr [[PB]], align 8
87 ; MIN16-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2
88 ; MIN16-NEXT: [[B_I2:%.*]] = load i16, ptr [[PB_I2]], align 4
89 ; MIN16-NEXT: [[B_I3:%.*]] = load i16, ptr [[PB_I3]], align 2
90 ; MIN16-NEXT: [[C_I0:%.*]] = add i16 [[A_I0]], [[B_I0]]
91 ; MIN16-NEXT: [[C_I1:%.*]] = add i16 [[A_I1]], [[B_I1]]
92 ; MIN16-NEXT: [[C_I2:%.*]] = add i16 [[A_I2]], [[B_I2]]
93 ; MIN16-NEXT: [[C_I3:%.*]] = add i16 [[A_I3]], [[B_I3]]
94 ; MIN16-NEXT: store i16 [[C_I0]], ptr [[PA]], align 8
95 ; MIN16-NEXT: store i16 [[C_I1]], ptr [[PA_I1]], align 2
96 ; MIN16-NEXT: store i16 [[C_I2]], ptr [[PA_I2]], align 4
97 ; MIN16-NEXT: store i16 [[C_I3]], ptr [[PA_I3]], align 2
98 ; MIN16-NEXT: ret void
100 ; MIN32-LABEL: @load_add_store_v4i16(
101 ; MIN32-NEXT: [[PB_I1:%.*]] = getelementptr <2 x i16>, ptr [[PB:%.*]], i32 1
102 ; MIN32-NEXT: [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8
103 ; MIN32-NEXT: [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1
104 ; MIN32-NEXT: [[A_I1:%.*]] = load <2 x i16>, ptr [[PA_I1]], align 4
105 ; MIN32-NEXT: [[B_I0:%.*]] = load <2 x i16>, ptr [[PB]], align 8
106 ; MIN32-NEXT: [[B_I1:%.*]] = load <2 x i16>, ptr [[PB_I1]], align 4
107 ; MIN32-NEXT: [[C_I0:%.*]] = add <2 x i16> [[A_I0]], [[B_I0]]
108 ; MIN32-NEXT: [[C_I1:%.*]] = add <2 x i16> [[A_I1]], [[B_I1]]
109 ; MIN32-NEXT: store <2 x i16> [[C_I0]], ptr [[PA]], align 8
110 ; MIN32-NEXT: store <2 x i16> [[C_I1]], ptr [[PA_I1]], align 4
111 ; MIN32-NEXT: ret void
113 %a = load <4 x i16>, ptr %pa, align 8
114 %b = load <4 x i16>, ptr %pb, align 8
115 %c = add <4 x i16> %a, %b
116 store <4 x i16> %c, ptr %pa, align 8
120 define void @load_add_store_v4i10(ptr %pa, ptr %pb) {
121 ; MIN16-LABEL: @load_add_store_v4i10(
122 ; MIN16-NEXT: [[A:%.*]] = load <4 x i10>, ptr [[PA:%.*]], align 8
123 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x i10> [[A]], i64 0
124 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x i10> [[A]], i64 1
125 ; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x i10> [[A]], i64 2
126 ; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x i10> [[A]], i64 3
127 ; MIN16-NEXT: [[B:%.*]] = load <4 x i10>, ptr [[PB:%.*]], align 8
128 ; MIN16-NEXT: [[B_I0:%.*]] = extractelement <4 x i10> [[B]], i64 0
129 ; MIN16-NEXT: [[C_I0:%.*]] = add i10 [[A_I0]], [[B_I0]]
130 ; MIN16-NEXT: [[B_I1:%.*]] = extractelement <4 x i10> [[B]], i64 1
131 ; MIN16-NEXT: [[C_I1:%.*]] = add i10 [[A_I1]], [[B_I1]]
132 ; MIN16-NEXT: [[B_I2:%.*]] = extractelement <4 x i10> [[B]], i64 2
133 ; MIN16-NEXT: [[C_I2:%.*]] = add i10 [[A_I2]], [[B_I2]]
134 ; MIN16-NEXT: [[B_I3:%.*]] = extractelement <4 x i10> [[B]], i64 3
135 ; MIN16-NEXT: [[C_I3:%.*]] = add i10 [[A_I3]], [[B_I3]]
136 ; MIN16-NEXT: [[C_UPTO0:%.*]] = insertelement <4 x i10> poison, i10 [[C_I0]], i64 0
137 ; MIN16-NEXT: [[C_UPTO1:%.*]] = insertelement <4 x i10> [[C_UPTO0]], i10 [[C_I1]], i64 1
138 ; MIN16-NEXT: [[C_UPTO2:%.*]] = insertelement <4 x i10> [[C_UPTO1]], i10 [[C_I2]], i64 2
139 ; MIN16-NEXT: [[C:%.*]] = insertelement <4 x i10> [[C_UPTO2]], i10 [[C_I3]], i64 3
140 ; MIN16-NEXT: store <4 x i10> [[C]], ptr [[PA]], align 8
141 ; MIN16-NEXT: ret void
143 ; MIN32-LABEL: @load_add_store_v4i10(
144 ; MIN32-NEXT: [[A:%.*]] = load <4 x i10>, ptr [[PA:%.*]], align 8
145 ; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x i10> [[A]], <4 x i10> poison, <3 x i32> <i32 0, i32 1, i32 2>
146 ; MIN32-NEXT: [[A_I1:%.*]] = extractelement <4 x i10> [[A]], i64 3
147 ; MIN32-NEXT: [[B:%.*]] = load <4 x i10>, ptr [[PB:%.*]], align 8
148 ; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <4 x i10> [[B]], <4 x i10> poison, <3 x i32> <i32 0, i32 1, i32 2>
149 ; MIN32-NEXT: [[C_I0:%.*]] = add <3 x i10> [[A_I0]], [[B_I0]]
150 ; MIN32-NEXT: [[B_I1:%.*]] = extractelement <4 x i10> [[B]], i64 3
151 ; MIN32-NEXT: [[C_I1:%.*]] = add i10 [[A_I1]], [[B_I1]]
152 ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <3 x i10> [[C_I0]], <3 x i10> [[C_I0]], <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
153 ; MIN32-NEXT: [[C:%.*]] = insertelement <4 x i10> [[TMP1]], i10 [[C_I1]], i64 3
154 ; MIN32-NEXT: store <4 x i10> [[C]], ptr [[PA]], align 8
155 ; MIN32-NEXT: ret void
157 %a = load <4 x i10>, ptr %pa, align 8
158 %b = load <4 x i10>, ptr %pb, align 8
159 %c = add <4 x i10> %a, %b
160 store <4 x i10> %c, ptr %pa, align 8
164 define <2 x half> @select_uniform_condition_v2f16(<2 x half> %a, <2 x half> %b, i1 %cc) {
165 ; MIN16-LABEL: @select_uniform_condition_v2f16(
166 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0
167 ; MIN16-NEXT: [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0
168 ; MIN16-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]]
169 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1
170 ; MIN16-NEXT: [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1
171 ; MIN16-NEXT: [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]]
172 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0
173 ; MIN16-NEXT: [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1
174 ; MIN16-NEXT: ret <2 x half> [[R]]
176 ; MIN32-LABEL: @select_uniform_condition_v2f16(
177 ; MIN32-NEXT: [[R:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A:%.*]], <2 x half> [[B:%.*]]
178 ; MIN32-NEXT: ret <2 x half> [[R]]
180 %r = select i1 %cc, <2 x half> %a, <2 x half> %b
184 define <3 x half> @select_uniform_condition_v3f16(<3 x half> %a, <3 x half> %b, i1 %cc) {
185 ; MIN16-LABEL: @select_uniform_condition_v3f16(
186 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0
187 ; MIN16-NEXT: [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0
188 ; MIN16-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]]
189 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1
190 ; MIN16-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1
191 ; MIN16-NEXT: [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]]
192 ; MIN16-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2
193 ; MIN16-NEXT: [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2
194 ; MIN16-NEXT: [[R_I2:%.*]] = select i1 [[CC]], half [[A_I2]], half [[B_I2]]
195 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0
196 ; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1
197 ; MIN16-NEXT: [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2
198 ; MIN16-NEXT: ret <3 x half> [[R]]
200 ; MIN32-LABEL: @select_uniform_condition_v3f16(
201 ; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1>
202 ; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <3 x half> [[B:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1>
203 ; MIN32-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A_I0]], <2 x half> [[B_I0]]
204 ; MIN32-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2
205 ; MIN32-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 2
206 ; MIN32-NEXT: [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]]
207 ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
208 ; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
209 ; MIN32-NEXT: ret <3 x half> [[R]]
211 %r = select i1 %cc, <3 x half> %a, <3 x half> %b
215 define <4 x half> @select_uniform_condition_v4f16(<4 x half> %a, <4 x half> %b, i1 %cc) {
216 ; MIN16-LABEL: @select_uniform_condition_v4f16(
217 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
218 ; MIN16-NEXT: [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0
219 ; MIN16-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], half [[A_I0]], half [[B_I0]]
220 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
221 ; MIN16-NEXT: [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1
222 ; MIN16-NEXT: [[R_I1:%.*]] = select i1 [[CC]], half [[A_I1]], half [[B_I1]]
223 ; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
224 ; MIN16-NEXT: [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2
225 ; MIN16-NEXT: [[R_I2:%.*]] = select i1 [[CC]], half [[A_I2]], half [[B_I2]]
226 ; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
227 ; MIN16-NEXT: [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3
228 ; MIN16-NEXT: [[R_I3:%.*]] = select i1 [[CC]], half [[A_I3]], half [[B_I3]]
229 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0
230 ; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1
231 ; MIN16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2
232 ; MIN16-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3
233 ; MIN16-NEXT: ret <4 x half> [[R]]
235 ; MIN32-LABEL: @select_uniform_condition_v4f16(
236 ; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
237 ; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <4 x half> [[B:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
238 ; MIN32-NEXT: [[R_I0:%.*]] = select i1 [[CC:%.*]], <2 x half> [[A_I0]], <2 x half> [[B_I0]]
239 ; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
240 ; MIN32-NEXT: [[B_I1:%.*]] = shufflevector <4 x half> [[B]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
241 ; MIN32-NEXT: [[R_I1:%.*]] = select i1 [[CC]], <2 x half> [[A_I1]], <2 x half> [[B_I1]]
242 ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
243 ; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
244 ; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
245 ; MIN32-NEXT: ret <4 x half> [[R]]
247 %r = select i1 %cc, <4 x half> %a, <4 x half> %b
251 define <4 x half> @select_vector_condition_v4f16(<4 x half> %a, <4 x half> %b, <4 x i1> %cc) {
252 ; CHECK-LABEL: @select_vector_condition_v4f16(
253 ; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[CC:%.*]], <4 x half> [[A:%.*]], <4 x half> [[B:%.*]]
254 ; CHECK-NEXT: ret <4 x half> [[R]]
256 %r = select <4 x i1> %cc, <4 x half> %a, <4 x half> %b
260 define <2 x half> @unary_v2f16(<2 x half> %a) {
261 ; MIN16-LABEL: @unary_v2f16(
262 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0
263 ; MIN16-NEXT: [[R_I0:%.*]] = fneg half [[A_I0]]
264 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1
265 ; MIN16-NEXT: [[R_I1:%.*]] = fneg half [[A_I1]]
266 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0
267 ; MIN16-NEXT: [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1
268 ; MIN16-NEXT: ret <2 x half> [[R]]
270 ; MIN32-LABEL: @unary_v2f16(
271 ; MIN32-NEXT: [[R:%.*]] = fneg <2 x half> [[A:%.*]]
272 ; MIN32-NEXT: ret <2 x half> [[R]]
274 %r = fneg <2 x half> %a
278 define <3 x half> @unary_v3f16(<3 x half> %a) {
279 ; MIN16-LABEL: @unary_v3f16(
280 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0
281 ; MIN16-NEXT: [[R_I0:%.*]] = fneg half [[A_I0]]
282 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1
283 ; MIN16-NEXT: [[R_I1:%.*]] = fneg half [[A_I1]]
284 ; MIN16-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2
285 ; MIN16-NEXT: [[R_I2:%.*]] = fneg half [[A_I2]]
286 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0
287 ; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1
288 ; MIN16-NEXT: [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2
289 ; MIN16-NEXT: ret <3 x half> [[R]]
291 ; MIN32-LABEL: @unary_v3f16(
292 ; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1>
293 ; MIN32-NEXT: [[R_I0:%.*]] = fneg <2 x half> [[A_I0]]
294 ; MIN32-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2
295 ; MIN32-NEXT: [[R_I1:%.*]] = fneg half [[A_I1]]
296 ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
297 ; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
298 ; MIN32-NEXT: ret <3 x half> [[R]]
300 %r = fneg <3 x half> %a
304 define <4 x half> @unary_v4f16(<4 x half> %a) {
305 ; MIN16-LABEL: @unary_v4f16(
306 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
307 ; MIN16-NEXT: [[R_I0:%.*]] = fneg half [[A_I0]]
308 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
309 ; MIN16-NEXT: [[R_I1:%.*]] = fneg half [[A_I1]]
310 ; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
311 ; MIN16-NEXT: [[R_I2:%.*]] = fneg half [[A_I2]]
312 ; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
313 ; MIN16-NEXT: [[R_I3:%.*]] = fneg half [[A_I3]]
314 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0
315 ; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1
316 ; MIN16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2
317 ; MIN16-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3
318 ; MIN16-NEXT: ret <4 x half> [[R]]
320 ; MIN32-LABEL: @unary_v4f16(
321 ; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
322 ; MIN32-NEXT: [[R_I0:%.*]] = fneg <2 x half> [[A_I0]]
323 ; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
324 ; MIN32-NEXT: [[R_I1:%.*]] = fneg <2 x half> [[A_I1]]
325 ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
326 ; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
327 ; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
328 ; MIN32-NEXT: ret <4 x half> [[R]]
330 %r = fneg <4 x half> %a
334 define <2 x half> @binary_v2f16(<2 x half> %a, <2 x half> %b) {
335 ; MIN16-LABEL: @binary_v2f16(
336 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0
337 ; MIN16-NEXT: [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0
338 ; MIN16-NEXT: [[R_I0:%.*]] = fadd half [[A_I0]], [[B_I0]]
339 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1
340 ; MIN16-NEXT: [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1
341 ; MIN16-NEXT: [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]]
342 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0
343 ; MIN16-NEXT: [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1
344 ; MIN16-NEXT: ret <2 x half> [[R]]
346 ; MIN32-LABEL: @binary_v2f16(
347 ; MIN32-NEXT: [[R:%.*]] = fadd <2 x half> [[A:%.*]], [[B:%.*]]
348 ; MIN32-NEXT: ret <2 x half> [[R]]
350 %r = fadd <2 x half> %a, %b
354 define <3 x half> @binary_v3f16(<3 x half> %a, <3 x half> %b) {
355 ; MIN16-LABEL: @binary_v3f16(
356 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0
357 ; MIN16-NEXT: [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0
358 ; MIN16-NEXT: [[R_I0:%.*]] = fadd half [[A_I0]], [[B_I0]]
359 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1
360 ; MIN16-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1
361 ; MIN16-NEXT: [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]]
362 ; MIN16-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2
363 ; MIN16-NEXT: [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2
364 ; MIN16-NEXT: [[R_I2:%.*]] = fadd half [[A_I2]], [[B_I2]]
365 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0
366 ; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1
367 ; MIN16-NEXT: [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2
368 ; MIN16-NEXT: ret <3 x half> [[R]]
370 ; MIN32-LABEL: @binary_v3f16(
371 ; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1>
372 ; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <3 x half> [[B:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1>
373 ; MIN32-NEXT: [[R_I0:%.*]] = fadd <2 x half> [[A_I0]], [[B_I0]]
374 ; MIN32-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2
375 ; MIN32-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 2
376 ; MIN32-NEXT: [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]]
377 ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
378 ; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
379 ; MIN32-NEXT: ret <3 x half> [[R]]
381 %r = fadd <3 x half> %a, %b
385 define <4 x half> @binary_v4f16(<4 x half> %a, <4 x half> %b) {
386 ; MIN16-LABEL: @binary_v4f16(
387 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
388 ; MIN16-NEXT: [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0
389 ; MIN16-NEXT: [[R_I0:%.*]] = fadd half [[A_I0]], [[B_I0]]
390 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
391 ; MIN16-NEXT: [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1
392 ; MIN16-NEXT: [[R_I1:%.*]] = fadd half [[A_I1]], [[B_I1]]
393 ; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
394 ; MIN16-NEXT: [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2
395 ; MIN16-NEXT: [[R_I2:%.*]] = fadd half [[A_I2]], [[B_I2]]
396 ; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
397 ; MIN16-NEXT: [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3
398 ; MIN16-NEXT: [[R_I3:%.*]] = fadd half [[A_I3]], [[B_I3]]
399 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0
400 ; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1
401 ; MIN16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2
402 ; MIN16-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3
403 ; MIN16-NEXT: ret <4 x half> [[R]]
405 ; MIN32-LABEL: @binary_v4f16(
406 ; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
407 ; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <4 x half> [[B:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
408 ; MIN32-NEXT: [[R_I0:%.*]] = fadd <2 x half> [[A_I0]], [[B_I0]]
409 ; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
410 ; MIN32-NEXT: [[B_I1:%.*]] = shufflevector <4 x half> [[B]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
411 ; MIN32-NEXT: [[R_I1:%.*]] = fadd <2 x half> [[A_I1]], [[B_I1]]
412 ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
413 ; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
414 ; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
415 ; MIN32-NEXT: ret <4 x half> [[R]]
417 %r = fadd <4 x half> %a, %b
421 define <2 x i16> @fptosi_v2f16(<2 x half> %a) {
422 ; MIN16-LABEL: @fptosi_v2f16(
423 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0
424 ; MIN16-NEXT: [[R_I0:%.*]] = fptosi half [[A_I0]] to i16
425 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1
426 ; MIN16-NEXT: [[R_I1:%.*]] = fptosi half [[A_I1]] to i16
427 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x i16> poison, i16 [[R_I0]], i64 0
428 ; MIN16-NEXT: [[R:%.*]] = insertelement <2 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1
429 ; MIN16-NEXT: ret <2 x i16> [[R]]
431 ; MIN32-LABEL: @fptosi_v2f16(
432 ; MIN32-NEXT: [[R:%.*]] = fptosi <2 x half> [[A:%.*]] to <2 x i16>
433 ; MIN32-NEXT: ret <2 x i16> [[R]]
435 %r = fptosi <2 x half> %a to <2 x i16>
439 define <3 x i16> @fptosi_v3f16(<3 x half> %a) {
440 ; MIN16-LABEL: @fptosi_v3f16(
441 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0
442 ; MIN16-NEXT: [[R_I0:%.*]] = fptosi half [[A_I0]] to i16
443 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1
444 ; MIN16-NEXT: [[R_I1:%.*]] = fptosi half [[A_I1]] to i16
445 ; MIN16-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2
446 ; MIN16-NEXT: [[R_I2:%.*]] = fptosi half [[A_I2]] to i16
447 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x i16> poison, i16 [[R_I0]], i64 0
448 ; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1
449 ; MIN16-NEXT: [[R:%.*]] = insertelement <3 x i16> [[R_UPTO1]], i16 [[R_I2]], i64 2
450 ; MIN16-NEXT: ret <3 x i16> [[R]]
452 ; MIN32-LABEL: @fptosi_v3f16(
453 ; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1>
454 ; MIN32-NEXT: [[R_I0:%.*]] = fptosi <2 x half> [[A_I0]] to <2 x i16>
455 ; MIN32-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2
456 ; MIN32-NEXT: [[R_I1:%.*]] = fptosi half [[A_I1]] to i16
457 ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[R_I0]], <2 x i16> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
458 ; MIN32-NEXT: [[R:%.*]] = insertelement <3 x i16> [[TMP1]], i16 [[R_I1]], i64 2
459 ; MIN32-NEXT: ret <3 x i16> [[R]]
461 %r = fptosi <3 x half> %a to <3 x i16>
465 define <4 x i16> @fptosi_v4f16(<4 x half> %a) {
466 ; MIN16-LABEL: @fptosi_v4f16(
467 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
468 ; MIN16-NEXT: [[R_I0:%.*]] = fptosi half [[A_I0]] to i16
469 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
470 ; MIN16-NEXT: [[R_I1:%.*]] = fptosi half [[A_I1]] to i16
471 ; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
472 ; MIN16-NEXT: [[R_I2:%.*]] = fptosi half [[A_I2]] to i16
473 ; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
474 ; MIN16-NEXT: [[R_I3:%.*]] = fptosi half [[A_I3]] to i16
475 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x i16> poison, i16 [[R_I0]], i64 0
476 ; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x i16> [[R_UPTO0]], i16 [[R_I1]], i64 1
477 ; MIN16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x i16> [[R_UPTO1]], i16 [[R_I2]], i64 2
478 ; MIN16-NEXT: [[R:%.*]] = insertelement <4 x i16> [[R_UPTO2]], i16 [[R_I3]], i64 3
479 ; MIN16-NEXT: ret <4 x i16> [[R]]
481 ; MIN32-LABEL: @fptosi_v4f16(
482 ; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
483 ; MIN32-NEXT: [[R_I0:%.*]] = fptosi <2 x half> [[A_I0]] to <2 x i16>
484 ; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
485 ; MIN32-NEXT: [[R_I1:%.*]] = fptosi <2 x half> [[A_I1]] to <2 x i16>
486 ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[R_I0]], <2 x i16> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
487 ; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[R_I1]], <2 x i16> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
488 ; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
489 ; MIN32-NEXT: ret <4 x i16> [[R]]
491 %r = fptosi <4 x half> %a to <4 x i16>
495 define <4 x float> @fpext_v4f16(<4 x half> %a) {
496 ; MIN16-LABEL: @fpext_v4f16(
497 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
498 ; MIN16-NEXT: [[R_I0:%.*]] = fpext half [[A_I0]] to float
499 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
500 ; MIN16-NEXT: [[R_I1:%.*]] = fpext half [[A_I1]] to float
501 ; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
502 ; MIN16-NEXT: [[R_I2:%.*]] = fpext half [[A_I2]] to float
503 ; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
504 ; MIN16-NEXT: [[R_I3:%.*]] = fpext half [[A_I3]] to float
505 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x float> poison, float [[R_I0]], i64 0
506 ; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x float> [[R_UPTO0]], float [[R_I1]], i64 1
507 ; MIN16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x float> [[R_UPTO1]], float [[R_I2]], i64 2
508 ; MIN16-NEXT: [[R:%.*]] = insertelement <4 x float> [[R_UPTO2]], float [[R_I3]], i64 3
509 ; MIN16-NEXT: ret <4 x float> [[R]]
511 ; MIN32-LABEL: @fpext_v4f16(
512 ; MIN32-NEXT: [[R:%.*]] = fpext <4 x half> [[A:%.*]] to <4 x float>
513 ; MIN32-NEXT: ret <4 x float> [[R]]
515 %r = fpext <4 x half> %a to <4 x float>
519 define <4 x i1> @icmp_v4f16(<4 x i16> %a, <4 x i16> %b) {
520 ; CHECK-LABEL: @icmp_v4f16(
521 ; CHECK-NEXT: [[R:%.*]] = icmp ugt <4 x i16> [[A:%.*]], [[B:%.*]]
522 ; CHECK-NEXT: ret <4 x i1> [[R]]
524 %r = icmp ugt <4 x i16> %a, %b
528 define <4 x ptr> @gep1_v4(ptr %base, <4 x i16> %a) {
529 ; MIN16-LABEL: @gep1_v4(
530 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0
531 ; MIN16-NEXT: [[P_I0:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i16 [[A_I0]]
532 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1
533 ; MIN16-NEXT: [[P_I1:%.*]] = getelementptr i32, ptr [[BASE]], i16 [[A_I1]]
534 ; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2
535 ; MIN16-NEXT: [[P_I2:%.*]] = getelementptr i32, ptr [[BASE]], i16 [[A_I2]]
536 ; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x i16> [[A]], i64 3
537 ; MIN16-NEXT: [[P_I3:%.*]] = getelementptr i32, ptr [[BASE]], i16 [[A_I3]]
538 ; MIN16-NEXT: [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0
539 ; MIN16-NEXT: [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1
540 ; MIN16-NEXT: [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2
541 ; MIN16-NEXT: [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3
542 ; MIN16-NEXT: ret <4 x ptr> [[P]]
544 ; MIN32-LABEL: @gep1_v4(
545 ; MIN32-NEXT: [[P:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <4 x i16> [[A:%.*]]
546 ; MIN32-NEXT: ret <4 x ptr> [[P]]
548 %p = getelementptr i32, ptr %base, <4 x i16> %a
552 define <4 x ptr> @gep2_v4(<4 x ptr> %base, i16 %a) {
553 ; CHECK-LABEL: @gep2_v4(
554 ; CHECK-NEXT: [[BASE_I0:%.*]] = extractelement <4 x ptr> [[BASE:%.*]], i64 0
555 ; CHECK-NEXT: [[P_I0:%.*]] = getelementptr i32, ptr [[BASE_I0]], i16 [[A:%.*]]
556 ; CHECK-NEXT: [[BASE_I1:%.*]] = extractelement <4 x ptr> [[BASE]], i64 1
557 ; CHECK-NEXT: [[P_I1:%.*]] = getelementptr i32, ptr [[BASE_I1]], i16 [[A]]
558 ; CHECK-NEXT: [[BASE_I2:%.*]] = extractelement <4 x ptr> [[BASE]], i64 2
559 ; CHECK-NEXT: [[P_I2:%.*]] = getelementptr i32, ptr [[BASE_I2]], i16 [[A]]
560 ; CHECK-NEXT: [[BASE_I3:%.*]] = extractelement <4 x ptr> [[BASE]], i64 3
561 ; CHECK-NEXT: [[P_I3:%.*]] = getelementptr i32, ptr [[BASE_I3]], i16 [[A]]
562 ; CHECK-NEXT: [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0
563 ; CHECK-NEXT: [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1
564 ; CHECK-NEXT: [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2
565 ; CHECK-NEXT: [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3
566 ; CHECK-NEXT: ret <4 x ptr> [[P]]
568 %p = getelementptr i32, <4 x ptr> %base, i16 %a
572 define <4 x ptr> @gep3_v4(<4 x ptr> %base, <4 x i16> %a) {
573 ; MIN16-LABEL: @gep3_v4(
574 ; MIN16-NEXT: [[BASE_I0:%.*]] = extractelement <4 x ptr> [[BASE:%.*]], i64 0
575 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0
576 ; MIN16-NEXT: [[P_I0:%.*]] = getelementptr i32, ptr [[BASE_I0]], i16 [[A_I0]]
577 ; MIN16-NEXT: [[BASE_I1:%.*]] = extractelement <4 x ptr> [[BASE]], i64 1
578 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1
579 ; MIN16-NEXT: [[P_I1:%.*]] = getelementptr i32, ptr [[BASE_I1]], i16 [[A_I1]]
580 ; MIN16-NEXT: [[BASE_I2:%.*]] = extractelement <4 x ptr> [[BASE]], i64 2
581 ; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2
582 ; MIN16-NEXT: [[P_I2:%.*]] = getelementptr i32, ptr [[BASE_I2]], i16 [[A_I2]]
583 ; MIN16-NEXT: [[BASE_I3:%.*]] = extractelement <4 x ptr> [[BASE]], i64 3
584 ; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x i16> [[A]], i64 3
585 ; MIN16-NEXT: [[P_I3:%.*]] = getelementptr i32, ptr [[BASE_I3]], i16 [[A_I3]]
586 ; MIN16-NEXT: [[P_UPTO0:%.*]] = insertelement <4 x ptr> poison, ptr [[P_I0]], i64 0
587 ; MIN16-NEXT: [[P_UPTO1:%.*]] = insertelement <4 x ptr> [[P_UPTO0]], ptr [[P_I1]], i64 1
588 ; MIN16-NEXT: [[P_UPTO2:%.*]] = insertelement <4 x ptr> [[P_UPTO1]], ptr [[P_I2]], i64 2
589 ; MIN16-NEXT: [[P:%.*]] = insertelement <4 x ptr> [[P_UPTO2]], ptr [[P_I3]], i64 3
590 ; MIN16-NEXT: ret <4 x ptr> [[P]]
592 ; MIN32-LABEL: @gep3_v4(
593 ; MIN32-NEXT: [[P:%.*]] = getelementptr i32, <4 x ptr> [[BASE:%.*]], <4 x i16> [[A:%.*]]
594 ; MIN32-NEXT: ret <4 x ptr> [[P]]
596 %p = getelementptr i32, <4 x ptr> %base, <4 x i16> %a
600 define void @insertelement_v2i16(ptr %p, <2 x i16> %a, i16 %b) {
601 ; MIN16-LABEL: @insertelement_v2i16(
602 ; MIN16-NEXT: [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
603 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <2 x i16> [[A:%.*]], i64 0
604 ; MIN16-NEXT: store i16 [[A_I0]], ptr [[P]], align 4
605 ; MIN16-NEXT: store i16 [[B:%.*]], ptr [[P_I1]], align 2
606 ; MIN16-NEXT: ret void
608 ; MIN32-LABEL: @insertelement_v2i16(
609 ; MIN32-NEXT: [[R:%.*]] = insertelement <2 x i16> [[A:%.*]], i16 [[B:%.*]], i64 1
610 ; MIN32-NEXT: store <2 x i16> [[R]], ptr [[P:%.*]], align 4
611 ; MIN32-NEXT: ret void
613 %r = insertelement <2 x i16> %a, i16 %b, i64 1
614 store <2 x i16> %r, ptr %p
618 define void @insertelement_v3i16(ptr %p, <3 x i16> %a, i16 %b) {
619 ; MIN16-LABEL: @insertelement_v3i16(
620 ; MIN16-NEXT: [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
621 ; MIN16-NEXT: [[P_I2:%.*]] = getelementptr i16, ptr [[P]], i32 2
622 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <3 x i16> [[A:%.*]], i64 0
623 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <3 x i16> [[A]], i64 1
624 ; MIN16-NEXT: store i16 [[A_I0]], ptr [[P]], align 8
625 ; MIN16-NEXT: store i16 [[A_I1]], ptr [[P_I1]], align 2
626 ; MIN16-NEXT: store i16 [[B:%.*]], ptr [[P_I2]], align 4
627 ; MIN16-NEXT: ret void
629 ; MIN32-LABEL: @insertelement_v3i16(
630 ; MIN32-NEXT: [[P_I1:%.*]] = getelementptr <2 x i16>, ptr [[P:%.*]], i32 1
631 ; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x i16> [[A:%.*]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
632 ; MIN32-NEXT: store <2 x i16> [[A_I0]], ptr [[P]], align 8
633 ; MIN32-NEXT: store i16 [[B:%.*]], ptr [[P_I1]], align 4
634 ; MIN32-NEXT: ret void
636 %r = insertelement <3 x i16> %a, i16 %b, i64 2
637 store <3 x i16> %r, ptr %p
641 define void @insertelement_v4i16(ptr %p, <4 x i16> %a, i16 %b) {
642 ; MIN16-LABEL: @insertelement_v4i16(
643 ; MIN16-NEXT: [[P_I1:%.*]] = getelementptr i16, ptr [[P:%.*]], i32 1
644 ; MIN16-NEXT: [[P_I2:%.*]] = getelementptr i16, ptr [[P]], i32 2
645 ; MIN16-NEXT: [[P_I3:%.*]] = getelementptr i16, ptr [[P]], i32 3
646 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x i16> [[A:%.*]], i64 0
647 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x i16> [[A]], i64 1
648 ; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x i16> [[A]], i64 2
649 ; MIN16-NEXT: store i16 [[A_I0]], ptr [[P]], align 8
650 ; MIN16-NEXT: store i16 [[A_I1]], ptr [[P_I1]], align 2
651 ; MIN16-NEXT: store i16 [[A_I2]], ptr [[P_I2]], align 4
652 ; MIN16-NEXT: store i16 [[B:%.*]], ptr [[P_I3]], align 2
653 ; MIN16-NEXT: ret void
655 ; MIN32-LABEL: @insertelement_v4i16(
656 ; MIN32-NEXT: [[P_I1:%.*]] = getelementptr <2 x i16>, ptr [[P:%.*]], i32 1
657 ; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x i16> [[A:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
658 ; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
659 ; MIN32-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> [[A_I1]], i16 [[B:%.*]], i64 1
660 ; MIN32-NEXT: store <2 x i16> [[A_I0]], ptr [[P]], align 8
661 ; MIN32-NEXT: store <2 x i16> [[TMP1]], ptr [[P_I1]], align 4
662 ; MIN32-NEXT: ret void
664 %r = insertelement <4 x i16> %a, i16 %b, i64 3
665 store <4 x i16> %r, ptr %p
669 define <2 x i16> @load_insertelement_v2i16(ptr %pa, i16 %b) {
670 ; MIN16-LABEL: @load_insertelement_v2i16(
671 ; MIN16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 4
672 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x i16> poison, i16 [[A_I0]], i64 0
673 ; MIN16-NEXT: [[R:%.*]] = insertelement <2 x i16> [[R_UPTO0]], i16 [[B:%.*]], i64 1
674 ; MIN16-NEXT: ret <2 x i16> [[R]]
676 ; MIN32-LABEL: @load_insertelement_v2i16(
677 ; MIN32-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 4
678 ; MIN32-NEXT: [[R:%.*]] = insertelement <2 x i16> [[A]], i16 [[B:%.*]], i64 1
679 ; MIN32-NEXT: ret <2 x i16> [[R]]
681 %a = load <2 x i16>, ptr %pa
682 %r = insertelement <2 x i16> %a, i16 %b, i64 1
686 define <3 x i16> @load_insertelement_v3i16(ptr %pa, i16 %b) {
687 ; MIN16-LABEL: @load_insertelement_v3i16(
688 ; MIN16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8
689 ; MIN16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
690 ; MIN16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
691 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x i16> poison, i16 [[A_I0]], i64 0
692 ; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x i16> [[R_UPTO0]], i16 [[A_I1]], i64 1
693 ; MIN16-NEXT: [[R:%.*]] = insertelement <3 x i16> [[R_UPTO1]], i16 [[B:%.*]], i64 2
694 ; MIN16-NEXT: ret <3 x i16> [[R]]
696 ; MIN32-LABEL: @load_insertelement_v3i16(
697 ; MIN32-NEXT: [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8
698 ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
699 ; MIN32-NEXT: [[R:%.*]] = insertelement <3 x i16> [[TMP1]], i16 [[B:%.*]], i64 2
700 ; MIN32-NEXT: ret <3 x i16> [[R]]
702 %a = load <3 x i16>, ptr %pa
703 %r = insertelement <3 x i16> %a, i16 %b, i64 2
707 define <4 x i16> @load_insertelement_v4i16(ptr %pa, i16 %b) {
708 ; MIN16-LABEL: @load_insertelement_v4i16(
709 ; MIN16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA:%.*]], align 8
710 ; MIN16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
711 ; MIN16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
712 ; MIN16-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2
713 ; MIN16-NEXT: [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4
714 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x i16> poison, i16 [[A_I0]], i64 0
715 ; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x i16> [[R_UPTO0]], i16 [[A_I1]], i64 1
716 ; MIN16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x i16> [[R_UPTO1]], i16 [[A_I2]], i64 2
717 ; MIN16-NEXT: [[R:%.*]] = insertelement <4 x i16> [[R_UPTO2]], i16 [[B:%.*]], i64 3
718 ; MIN16-NEXT: ret <4 x i16> [[R]]
720 ; MIN32-LABEL: @load_insertelement_v4i16(
721 ; MIN32-NEXT: [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8
722 ; MIN32-NEXT: [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1
723 ; MIN32-NEXT: [[A_I1:%.*]] = load <2 x i16>, ptr [[PA_I1]], align 4
724 ; MIN32-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> [[A_I1]], i16 [[B:%.*]], i64 1
725 ; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
726 ; MIN32-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
727 ; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
728 ; MIN32-NEXT: ret <4 x i16> [[R]]
730 %a = load <4 x i16>, ptr %pa
731 %r = insertelement <4 x i16> %a, i16 %b, i64 3
735 define void @shufflevector_grow(ptr %pa, ptr %pb) {
736 ; MIN16-LABEL: @shufflevector_grow(
737 ; MIN16-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA:%.*]], i32 2
738 ; MIN16-NEXT: [[PA_I3:%.*]] = getelementptr i16, ptr [[PA]], i32 3
739 ; MIN16-NEXT: [[PB_I1:%.*]] = getelementptr i16, ptr [[PB:%.*]], i32 1
740 ; MIN16-NEXT: [[A_I0:%.*]] = load i16, ptr [[PA]], align 4
741 ; MIN16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA]], i32 1
742 ; MIN16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
743 ; MIN16-NEXT: [[B_I0:%.*]] = load i16, ptr [[PB]], align 4
744 ; MIN16-NEXT: [[B_I1:%.*]] = load i16, ptr [[PB_I1]], align 2
745 ; MIN16-NEXT: store i16 [[A_I0]], ptr [[PA]], align 8
746 ; MIN16-NEXT: store i16 [[A_I1]], ptr [[PA_I1]], align 2
747 ; MIN16-NEXT: store i16 [[B_I0]], ptr [[PA_I2]], align 4
748 ; MIN16-NEXT: store i16 [[B_I1]], ptr [[PA_I3]], align 2
749 ; MIN16-NEXT: ret void
751 ; MIN32-LABEL: @shufflevector_grow(
752 ; MIN32-NEXT: [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA:%.*]], i32 1
753 ; MIN32-NEXT: [[A:%.*]] = load <2 x i16>, ptr [[PA]], align 4
754 ; MIN32-NEXT: [[B:%.*]] = load <2 x i16>, ptr [[PB:%.*]], align 4
755 ; MIN32-NEXT: [[R:%.*]] = shufflevector <2 x i16> [[A]], <2 x i16> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
756 ; MIN32-NEXT: [[R_I0:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
757 ; MIN32-NEXT: store <2 x i16> [[R_I0]], ptr [[PA]], align 8
758 ; MIN32-NEXT: [[R_I1:%.*]] = shufflevector <4 x i16> [[R]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
759 ; MIN32-NEXT: store <2 x i16> [[R_I1]], ptr [[PA_I1]], align 4
760 ; MIN32-NEXT: ret void
762 %a = load <2 x i16>, ptr %pa
763 %b = load <2 x i16>, ptr %pb
764 %r = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
765 store <4 x i16> %r, ptr %pa
769 define void @shufflevector_shrink(ptr %pa) {
770 ; MIN16-LABEL: @shufflevector_shrink(
771 ; MIN16-NEXT: [[PA_I1:%.*]] = getelementptr i16, ptr [[PA:%.*]], i32 1
772 ; MIN16-NEXT: [[A_I1:%.*]] = load i16, ptr [[PA_I1]], align 2
773 ; MIN16-NEXT: [[PA_I2:%.*]] = getelementptr i16, ptr [[PA]], i32 2
774 ; MIN16-NEXT: [[A_I2:%.*]] = load i16, ptr [[PA_I2]], align 4
775 ; MIN16-NEXT: store i16 [[A_I1]], ptr [[PA]], align 4
776 ; MIN16-NEXT: store i16 [[A_I2]], ptr [[PA_I1]], align 2
777 ; MIN16-NEXT: ret void
779 ; MIN32-LABEL: @shufflevector_shrink(
780 ; MIN32-NEXT: [[A_I0:%.*]] = load <2 x i16>, ptr [[PA:%.*]], align 8
781 ; MIN32-NEXT: [[PA_I1:%.*]] = getelementptr <2 x i16>, ptr [[PA]], i32 1
782 ; MIN32-NEXT: [[A_I1:%.*]] = load <2 x i16>, ptr [[PA_I1]], align 4
783 ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[A_I0]], <2 x i16> [[A_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
784 ; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[A_I1]], <2 x i16> [[A_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
785 ; MIN32-NEXT: [[A:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
786 ; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> poison, <2 x i32> <i32 1, i32 2>
787 ; MIN32-NEXT: store <2 x i16> [[R]], ptr [[PA]], align 4
788 ; MIN32-NEXT: ret void
790 %a = load <4 x i16>, ptr %pa
791 %r = shufflevector <4 x i16> %a, <4 x i16> poison, <2 x i32> <i32 1, i32 2>
792 store <2 x i16> %r, ptr %pa
796 define void @phi_v2f16(ptr %base, i64 %bound) {
797 ; MIN16-LABEL: @phi_v2f16(
799 ; MIN16-NEXT: [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1
800 ; MIN16-NEXT: br label [[LOOP:%.*]]
802 ; MIN16-NEXT: [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ]
803 ; MIN16-NEXT: [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ]
804 ; MIN16-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
805 ; MIN16-NEXT: [[P:%.*]] = getelementptr <2 x half>, ptr [[BASE]], i64 [[IDX]]
806 ; MIN16-NEXT: [[A_I0:%.*]] = load half, ptr [[P]], align 2
807 ; MIN16-NEXT: [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1
808 ; MIN16-NEXT: [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2
809 ; MIN16-NEXT: [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]]
810 ; MIN16-NEXT: [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]]
811 ; MIN16-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1
812 ; MIN16-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
813 ; MIN16-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
815 ; MIN16-NEXT: store half [[X_NEXT_I0]], ptr [[BASE]], align 4
816 ; MIN16-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2
817 ; MIN16-NEXT: ret void
819 ; MIN32-LABEL: @phi_v2f16(
821 ; MIN32-NEXT: br label [[LOOP:%.*]]
823 ; MIN32-NEXT: [[X:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ]
824 ; MIN32-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
825 ; MIN32-NEXT: [[P:%.*]] = getelementptr <2 x half>, ptr [[BASE:%.*]], i64 [[IDX]]
826 ; MIN32-NEXT: [[A:%.*]] = load <2 x half>, ptr [[P]], align 2
827 ; MIN32-NEXT: [[X_NEXT]] = fadd <2 x half> [[X]], [[A]]
828 ; MIN32-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1
829 ; MIN32-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
830 ; MIN32-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
832 ; MIN32-NEXT: store <2 x half> [[X_NEXT]], ptr [[BASE]], align 4
833 ; MIN32-NEXT: ret void
839 %x = phi <2 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ]
840 %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ]
841 %p = getelementptr <2 x half>, ptr %base, i64 %idx
842 %a = load <2 x half>, ptr %p, align 2
843 %x.next = fadd <2 x half> %x, %a
844 %idx.next = add i64 %idx, 1
845 %cc = icmp ult i64 %idx.next, %bound
846 br i1 %cc, label %loop, label %end
849 store <2 x half> %x.next, ptr %base
853 define void @phi_v3f16(ptr %base, i64 %bound) {
854 ; MIN16-LABEL: @phi_v3f16(
856 ; MIN16-NEXT: [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1
857 ; MIN16-NEXT: [[BASE_I2:%.*]] = getelementptr half, ptr [[BASE]], i32 2
858 ; MIN16-NEXT: br label [[LOOP:%.*]]
860 ; MIN16-NEXT: [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ]
861 ; MIN16-NEXT: [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ]
862 ; MIN16-NEXT: [[X_I2:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I2:%.*]], [[LOOP]] ]
863 ; MIN16-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
864 ; MIN16-NEXT: [[P:%.*]] = getelementptr <3 x half>, ptr [[BASE]], i64 [[IDX]]
865 ; MIN16-NEXT: [[A_I0:%.*]] = load half, ptr [[P]], align 2
866 ; MIN16-NEXT: [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1
867 ; MIN16-NEXT: [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2
868 ; MIN16-NEXT: [[P_I2:%.*]] = getelementptr half, ptr [[P]], i32 2
869 ; MIN16-NEXT: [[A_I2:%.*]] = load half, ptr [[P_I2]], align 2
870 ; MIN16-NEXT: [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]]
871 ; MIN16-NEXT: [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]]
872 ; MIN16-NEXT: [[X_NEXT_I2]] = fadd half [[X_I2]], [[A_I2]]
873 ; MIN16-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1
874 ; MIN16-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
875 ; MIN16-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
877 ; MIN16-NEXT: store half [[X_NEXT_I0]], ptr [[BASE]], align 8
878 ; MIN16-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2
879 ; MIN16-NEXT: store half [[X_NEXT_I2]], ptr [[BASE_I2]], align 4
880 ; MIN16-NEXT: ret void
882 ; MIN32-LABEL: @phi_v3f16(
884 ; MIN32-NEXT: [[BASE_I1:%.*]] = getelementptr <2 x half>, ptr [[BASE:%.*]], i32 1
885 ; MIN32-NEXT: br label [[LOOP:%.*]]
887 ; MIN32-NEXT: [[X_I0:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ]
888 ; MIN32-NEXT: [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ]
889 ; MIN32-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
890 ; MIN32-NEXT: [[P:%.*]] = getelementptr <3 x half>, ptr [[BASE]], i64 [[IDX]]
891 ; MIN32-NEXT: [[A_I0:%.*]] = load <2 x half>, ptr [[P]], align 2
892 ; MIN32-NEXT: [[P_I1:%.*]] = getelementptr <2 x half>, ptr [[P]], i32 1
893 ; MIN32-NEXT: [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2
894 ; MIN32-NEXT: [[X_NEXT_I0]] = fadd <2 x half> [[X_I0]], [[A_I0]]
895 ; MIN32-NEXT: [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]]
896 ; MIN32-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1
897 ; MIN32-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
898 ; MIN32-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
900 ; MIN32-NEXT: store <2 x half> [[X_NEXT_I0]], ptr [[BASE]], align 8
901 ; MIN32-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 4
902 ; MIN32-NEXT: ret void
908 %x = phi <3 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ]
909 %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ]
910 %p = getelementptr <3 x half>, ptr %base, i64 %idx
911 %a = load <3 x half>, ptr %p, align 2
912 %x.next = fadd <3 x half> %x, %a
913 %idx.next = add i64 %idx, 1
914 %cc = icmp ult i64 %idx.next, %bound
915 br i1 %cc, label %loop, label %end
918 store <3 x half> %x.next, ptr %base
922 define void @phi_v4f16(ptr %base, i64 %bound) {
923 ; MIN16-LABEL: @phi_v4f16(
925 ; MIN16-NEXT: [[BASE_I1:%.*]] = getelementptr half, ptr [[BASE:%.*]], i32 1
926 ; MIN16-NEXT: [[BASE_I2:%.*]] = getelementptr half, ptr [[BASE]], i32 2
927 ; MIN16-NEXT: [[BASE_I3:%.*]] = getelementptr half, ptr [[BASE]], i32 3
928 ; MIN16-NEXT: br label [[LOOP:%.*]]
930 ; MIN16-NEXT: [[X_I0:%.*]] = phi half [ 0xH0000, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ]
931 ; MIN16-NEXT: [[X_I1:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ]
932 ; MIN16-NEXT: [[X_I2:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I2:%.*]], [[LOOP]] ]
933 ; MIN16-NEXT: [[X_I3:%.*]] = phi half [ 0xH0000, [[ENTRY]] ], [ [[X_NEXT_I3:%.*]], [[LOOP]] ]
934 ; MIN16-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
935 ; MIN16-NEXT: [[P:%.*]] = getelementptr <4 x half>, ptr [[BASE]], i64 [[IDX]]
936 ; MIN16-NEXT: [[A_I0:%.*]] = load half, ptr [[P]], align 2
937 ; MIN16-NEXT: [[P_I1:%.*]] = getelementptr half, ptr [[P]], i32 1
938 ; MIN16-NEXT: [[A_I1:%.*]] = load half, ptr [[P_I1]], align 2
939 ; MIN16-NEXT: [[P_I2:%.*]] = getelementptr half, ptr [[P]], i32 2
940 ; MIN16-NEXT: [[A_I2:%.*]] = load half, ptr [[P_I2]], align 2
941 ; MIN16-NEXT: [[P_I3:%.*]] = getelementptr half, ptr [[P]], i32 3
942 ; MIN16-NEXT: [[A_I3:%.*]] = load half, ptr [[P_I3]], align 2
943 ; MIN16-NEXT: [[X_NEXT_I0]] = fadd half [[X_I0]], [[A_I0]]
944 ; MIN16-NEXT: [[X_NEXT_I1]] = fadd half [[X_I1]], [[A_I1]]
945 ; MIN16-NEXT: [[X_NEXT_I2]] = fadd half [[X_I2]], [[A_I2]]
946 ; MIN16-NEXT: [[X_NEXT_I3]] = fadd half [[X_I3]], [[A_I3]]
947 ; MIN16-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1
948 ; MIN16-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
949 ; MIN16-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
951 ; MIN16-NEXT: store half [[X_NEXT_I0]], ptr [[BASE]], align 8
952 ; MIN16-NEXT: store half [[X_NEXT_I1]], ptr [[BASE_I1]], align 2
953 ; MIN16-NEXT: store half [[X_NEXT_I2]], ptr [[BASE_I2]], align 4
954 ; MIN16-NEXT: store half [[X_NEXT_I3]], ptr [[BASE_I3]], align 2
955 ; MIN16-NEXT: ret void
957 ; MIN32-LABEL: @phi_v4f16(
959 ; MIN32-NEXT: [[BASE_I1:%.*]] = getelementptr <2 x half>, ptr [[BASE:%.*]], i32 1
960 ; MIN32-NEXT: br label [[LOOP:%.*]]
962 ; MIN32-NEXT: [[X_I0:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[X_NEXT_I0:%.*]], [[LOOP]] ]
963 ; MIN32-NEXT: [[X_I1:%.*]] = phi <2 x half> [ zeroinitializer, [[ENTRY]] ], [ [[X_NEXT_I1:%.*]], [[LOOP]] ]
964 ; MIN32-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
965 ; MIN32-NEXT: [[P:%.*]] = getelementptr <4 x half>, ptr [[BASE]], i64 [[IDX]]
966 ; MIN32-NEXT: [[A_I0:%.*]] = load <2 x half>, ptr [[P]], align 2
967 ; MIN32-NEXT: [[P_I1:%.*]] = getelementptr <2 x half>, ptr [[P]], i32 1
968 ; MIN32-NEXT: [[A_I1:%.*]] = load <2 x half>, ptr [[P_I1]], align 2
969 ; MIN32-NEXT: [[X_NEXT_I0]] = fadd <2 x half> [[X_I0]], [[A_I0]]
970 ; MIN32-NEXT: [[X_NEXT_I1]] = fadd <2 x half> [[X_I1]], [[A_I1]]
971 ; MIN32-NEXT: [[IDX_NEXT]] = add i64 [[IDX]], 1
972 ; MIN32-NEXT: [[CC:%.*]] = icmp ult i64 [[IDX_NEXT]], [[BOUND:%.*]]
973 ; MIN32-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]]
975 ; MIN32-NEXT: store <2 x half> [[X_NEXT_I0]], ptr [[BASE]], align 8
976 ; MIN32-NEXT: store <2 x half> [[X_NEXT_I1]], ptr [[BASE_I1]], align 4
977 ; MIN32-NEXT: ret void
983 %x = phi <4 x half> [ zeroinitializer, %entry ], [ %x.next, %loop ]
984 %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ]
985 %p = getelementptr <4 x half>, ptr %base, i64 %idx
986 %a = load <4 x half>, ptr %p, align 2
987 %x.next = fadd <4 x half> %x, %a
988 %idx.next = add i64 %idx, 1
989 %cc = icmp ult i64 %idx.next, %bound
990 br i1 %cc, label %loop, label %end
993 store <4 x half> %x.next, ptr %base
997 define <2 x half> @call_v2f16(<2 x half> %a, <2 x half> %b) {
998 ; MIN16-LABEL: @call_v2f16(
999 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <2 x half> [[A:%.*]], i64 0
1000 ; MIN16-NEXT: [[B_I0:%.*]] = extractelement <2 x half> [[B:%.*]], i64 0
1001 ; MIN16-NEXT: [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]])
1002 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <2 x half> [[A]], i64 1
1003 ; MIN16-NEXT: [[B_I1:%.*]] = extractelement <2 x half> [[B]], i64 1
1004 ; MIN16-NEXT: [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]])
1005 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <2 x half> poison, half [[R_I0]], i64 0
1006 ; MIN16-NEXT: [[R:%.*]] = insertelement <2 x half> [[R_UPTO0]], half [[R_I1]], i64 1
1007 ; MIN16-NEXT: ret <2 x half> [[R]]
1009 ; MIN32-LABEL: @call_v2f16(
1010 ; MIN32-NEXT: [[R:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A:%.*]], <2 x half> [[B:%.*]])
1011 ; MIN32-NEXT: ret <2 x half> [[R]]
1013 %r = call <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
1017 define <3 x half> @call_v3f16(<3 x half> %a, <3 x half> %b) {
1018 ; MIN16-LABEL: @call_v3f16(
1019 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <3 x half> [[A:%.*]], i64 0
1020 ; MIN16-NEXT: [[B_I0:%.*]] = extractelement <3 x half> [[B:%.*]], i64 0
1021 ; MIN16-NEXT: [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]])
1022 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 1
1023 ; MIN16-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 1
1024 ; MIN16-NEXT: [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]])
1025 ; MIN16-NEXT: [[A_I2:%.*]] = extractelement <3 x half> [[A]], i64 2
1026 ; MIN16-NEXT: [[B_I2:%.*]] = extractelement <3 x half> [[B]], i64 2
1027 ; MIN16-NEXT: [[R_I2:%.*]] = call half @llvm.minnum.f16(half [[A_I2]], half [[B_I2]])
1028 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <3 x half> poison, half [[R_I0]], i64 0
1029 ; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <3 x half> [[R_UPTO0]], half [[R_I1]], i64 1
1030 ; MIN16-NEXT: [[R:%.*]] = insertelement <3 x half> [[R_UPTO1]], half [[R_I2]], i64 2
1031 ; MIN16-NEXT: ret <3 x half> [[R]]
1033 ; MIN32-LABEL: @call_v3f16(
1034 ; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <3 x half> [[A:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1>
1035 ; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <3 x half> [[B:%.*]], <3 x half> poison, <2 x i32> <i32 0, i32 1>
1036 ; MIN32-NEXT: [[R_I0:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A_I0]], <2 x half> [[B_I0]])
1037 ; MIN32-NEXT: [[A_I1:%.*]] = extractelement <3 x half> [[A]], i64 2
1038 ; MIN32-NEXT: [[B_I1:%.*]] = extractelement <3 x half> [[B]], i64 2
1039 ; MIN32-NEXT: [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]])
1040 ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <3 x i32> <i32 0, i32 1, i32 poison>
1041 ; MIN32-NEXT: [[R:%.*]] = insertelement <3 x half> [[TMP1]], half [[R_I1]], i64 2
1042 ; MIN32-NEXT: ret <3 x half> [[R]]
1044 %r = call <3 x half> @llvm.minnum.v3f16(<3 x half> %a, <3 x half> %b)
1048 define <4 x half> @call_v4f16(<4 x half> %a, <4 x half> %b) {
1049 ; MIN16-LABEL: @call_v4f16(
1050 ; MIN16-NEXT: [[A_I0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0
1051 ; MIN16-NEXT: [[B_I0:%.*]] = extractelement <4 x half> [[B:%.*]], i64 0
1052 ; MIN16-NEXT: [[R_I0:%.*]] = call half @llvm.minnum.f16(half [[A_I0]], half [[B_I0]])
1053 ; MIN16-NEXT: [[A_I1:%.*]] = extractelement <4 x half> [[A]], i64 1
1054 ; MIN16-NEXT: [[B_I1:%.*]] = extractelement <4 x half> [[B]], i64 1
1055 ; MIN16-NEXT: [[R_I1:%.*]] = call half @llvm.minnum.f16(half [[A_I1]], half [[B_I1]])
1056 ; MIN16-NEXT: [[A_I2:%.*]] = extractelement <4 x half> [[A]], i64 2
1057 ; MIN16-NEXT: [[B_I2:%.*]] = extractelement <4 x half> [[B]], i64 2
1058 ; MIN16-NEXT: [[R_I2:%.*]] = call half @llvm.minnum.f16(half [[A_I2]], half [[B_I2]])
1059 ; MIN16-NEXT: [[A_I3:%.*]] = extractelement <4 x half> [[A]], i64 3
1060 ; MIN16-NEXT: [[B_I3:%.*]] = extractelement <4 x half> [[B]], i64 3
1061 ; MIN16-NEXT: [[R_I3:%.*]] = call half @llvm.minnum.f16(half [[A_I3]], half [[B_I3]])
1062 ; MIN16-NEXT: [[R_UPTO0:%.*]] = insertelement <4 x half> poison, half [[R_I0]], i64 0
1063 ; MIN16-NEXT: [[R_UPTO1:%.*]] = insertelement <4 x half> [[R_UPTO0]], half [[R_I1]], i64 1
1064 ; MIN16-NEXT: [[R_UPTO2:%.*]] = insertelement <4 x half> [[R_UPTO1]], half [[R_I2]], i64 2
1065 ; MIN16-NEXT: [[R:%.*]] = insertelement <4 x half> [[R_UPTO2]], half [[R_I3]], i64 3
1066 ; MIN16-NEXT: ret <4 x half> [[R]]
1068 ; MIN32-LABEL: @call_v4f16(
1069 ; MIN32-NEXT: [[A_I0:%.*]] = shufflevector <4 x half> [[A:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
1070 ; MIN32-NEXT: [[B_I0:%.*]] = shufflevector <4 x half> [[B:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
1071 ; MIN32-NEXT: [[R_I0:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A_I0]], <2 x half> [[B_I0]])
1072 ; MIN32-NEXT: [[A_I1:%.*]] = shufflevector <4 x half> [[A]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
1073 ; MIN32-NEXT: [[B_I1:%.*]] = shufflevector <4 x half> [[B]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
1074 ; MIN32-NEXT: [[R_I1:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[A_I1]], <2 x half> [[B_I1]])
1075 ; MIN32-NEXT: [[TMP1:%.*]] = shufflevector <2 x half> [[R_I0]], <2 x half> [[R_I0]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
1076 ; MIN32-NEXT: [[TMP2:%.*]] = shufflevector <2 x half> [[R_I1]], <2 x half> [[R_I1]], <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
1077 ; MIN32-NEXT: [[R:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1078 ; MIN32-NEXT: ret <4 x half> [[R]]
1080 %r = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b)
1084 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
1085 declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>)
1086 declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>)