1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s
3 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s
6 define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
7 ; GFX7-LABEL: @uadd_sat_v2i16(
9 ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
10 ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
11 ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
12 ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
13 ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
14 ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
15 ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
16 ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
17 ; GFX7-NEXT: ret <2 x i16> [[INS_1]]
19 ; GFX8-LABEL: @uadd_sat_v2i16(
21 ; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
22 ; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
23 ; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
24 ; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
25 ; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
26 ; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
27 ; GFX8-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
28 ; GFX8-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
29 ; GFX8-NEXT: ret <2 x i16> [[INS_1]]
31 ; GFX9-LABEL: @uadd_sat_v2i16(
33 ; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
34 ; GFX9-NEXT: ret <2 x i16> [[TMP0]]
37 %arg0.0 = extractelement <2 x i16> %arg0, i64 0
38 %arg0.1 = extractelement <2 x i16> %arg0, i64 1
39 %arg1.0 = extractelement <2 x i16> %arg1, i64 0
40 %arg1.1 = extractelement <2 x i16> %arg1, i64 1
41 %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0)
42 %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1)
43 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
44 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
48 define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
49 ; GFX7-LABEL: @usub_sat_v2i16(
51 ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
52 ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
53 ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
54 ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
55 ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
56 ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
57 ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
58 ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
59 ; GFX7-NEXT: ret <2 x i16> [[INS_1]]
61 ; GFX8-LABEL: @usub_sat_v2i16(
63 ; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
64 ; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
65 ; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
66 ; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
67 ; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
68 ; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
69 ; GFX8-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
70 ; GFX8-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
71 ; GFX8-NEXT: ret <2 x i16> [[INS_1]]
73 ; GFX9-LABEL: @usub_sat_v2i16(
75 ; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
76 ; GFX9-NEXT: ret <2 x i16> [[TMP0]]
79 %arg0.0 = extractelement <2 x i16> %arg0, i64 0
80 %arg0.1 = extractelement <2 x i16> %arg0, i64 1
81 %arg1.0 = extractelement <2 x i16> %arg1, i64 0
82 %arg1.1 = extractelement <2 x i16> %arg1, i64 1
83 %add.0 = call i16 @llvm.umax.i16(i16 %arg0.0, i16 %arg1.0)
84 %add.1 = call i16 @llvm.umax.i16(i16 %arg0.1, i16 %arg1.1)
85 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
86 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
90 define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
91 ; GFX7-LABEL: @sadd_sat_v2i16(
93 ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
94 ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
95 ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
96 ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
97 ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
98 ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
99 ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
100 ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
101 ; GFX7-NEXT: ret <2 x i16> [[INS_1]]
103 ; GFX8-LABEL: @sadd_sat_v2i16(
105 ; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
106 ; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
107 ; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
108 ; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
109 ; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
110 ; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
111 ; GFX8-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
112 ; GFX8-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
113 ; GFX8-NEXT: ret <2 x i16> [[INS_1]]
115 ; GFX9-LABEL: @sadd_sat_v2i16(
117 ; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
118 ; GFX9-NEXT: ret <2 x i16> [[TMP0]]
121 %arg0.0 = extractelement <2 x i16> %arg0, i64 0
122 %arg0.1 = extractelement <2 x i16> %arg0, i64 1
123 %arg1.0 = extractelement <2 x i16> %arg1, i64 0
124 %arg1.1 = extractelement <2 x i16> %arg1, i64 1
125 %add.0 = call i16 @llvm.smin.i16(i16 %arg0.0, i16 %arg1.0)
126 %add.1 = call i16 @llvm.smin.i16(i16 %arg0.1, i16 %arg1.1)
127 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
128 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
132 define <2 x i16> @ssub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
133 ; GFX7-LABEL: @ssub_sat_v2i16(
135 ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
136 ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
137 ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
138 ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
139 ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
140 ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
141 ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
142 ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
143 ; GFX7-NEXT: ret <2 x i16> [[INS_1]]
145 ; GFX8-LABEL: @ssub_sat_v2i16(
147 ; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
148 ; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
149 ; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
150 ; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
151 ; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
152 ; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
153 ; GFX8-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
154 ; GFX8-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
155 ; GFX8-NEXT: ret <2 x i16> [[INS_1]]
157 ; GFX9-LABEL: @ssub_sat_v2i16(
159 ; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
160 ; GFX9-NEXT: ret <2 x i16> [[TMP0]]
163 %arg0.0 = extractelement <2 x i16> %arg0, i64 0
164 %arg0.1 = extractelement <2 x i16> %arg0, i64 1
165 %arg1.0 = extractelement <2 x i16> %arg1, i64 0
166 %arg1.1 = extractelement <2 x i16> %arg1, i64 1
167 %add.0 = call i16 @llvm.smax.i16(i16 %arg0.0, i16 %arg1.0)
168 %add.1 = call i16 @llvm.smax.i16(i16 %arg0.1, i16 %arg1.1)
169 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
170 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
174 define <2 x i32> @uadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
175 ; GCN-LABEL: @uadd_sat_v2i32(
177 ; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
178 ; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
179 ; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
180 ; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
181 ; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.umin.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
182 ; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.umin.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
183 ; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
184 ; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
185 ; GCN-NEXT: ret <2 x i32> [[INS_1]]
188 %arg0.0 = extractelement <2 x i32> %arg0, i64 0
189 %arg0.1 = extractelement <2 x i32> %arg0, i64 1
190 %arg1.0 = extractelement <2 x i32> %arg1, i64 0
191 %arg1.1 = extractelement <2 x i32> %arg1, i64 1
192 %add.0 = call i32 @llvm.umin.i32(i32 %arg0.0, i32 %arg1.0)
193 %add.1 = call i32 @llvm.umin.i32(i32 %arg0.1, i32 %arg1.1)
194 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
195 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
199 define <2 x i32> @usub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
200 ; GCN-LABEL: @usub_sat_v2i32(
202 ; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
203 ; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
204 ; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
205 ; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
206 ; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.umax.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
207 ; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.umax.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
208 ; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
209 ; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
210 ; GCN-NEXT: ret <2 x i32> [[INS_1]]
213 %arg0.0 = extractelement <2 x i32> %arg0, i64 0
214 %arg0.1 = extractelement <2 x i32> %arg0, i64 1
215 %arg1.0 = extractelement <2 x i32> %arg1, i64 0
216 %arg1.1 = extractelement <2 x i32> %arg1, i64 1
217 %add.0 = call i32 @llvm.umax.i32(i32 %arg0.0, i32 %arg1.0)
218 %add.1 = call i32 @llvm.umax.i32(i32 %arg0.1, i32 %arg1.1)
219 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
220 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
224 define <2 x i32> @sadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
225 ; GCN-LABEL: @sadd_sat_v2i32(
227 ; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
228 ; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
229 ; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
230 ; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
231 ; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.smin.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
232 ; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.smin.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
233 ; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
234 ; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
235 ; GCN-NEXT: ret <2 x i32> [[INS_1]]
238 %arg0.0 = extractelement <2 x i32> %arg0, i64 0
239 %arg0.1 = extractelement <2 x i32> %arg0, i64 1
240 %arg1.0 = extractelement <2 x i32> %arg1, i64 0
241 %arg1.1 = extractelement <2 x i32> %arg1, i64 1
242 %add.0 = call i32 @llvm.smin.i32(i32 %arg0.0, i32 %arg1.0)
243 %add.1 = call i32 @llvm.smin.i32(i32 %arg0.1, i32 %arg1.1)
244 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
245 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
249 define <2 x i32> @ssub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
250 ; GCN-LABEL: @ssub_sat_v2i32(
252 ; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
253 ; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
254 ; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
255 ; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
256 ; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.smax.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
257 ; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.smax.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
258 ; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
259 ; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
260 ; GCN-NEXT: ret <2 x i32> [[INS_1]]
263 %arg0.0 = extractelement <2 x i32> %arg0, i64 0
264 %arg0.1 = extractelement <2 x i32> %arg0, i64 1
265 %arg1.0 = extractelement <2 x i32> %arg1, i64 0
266 %arg1.1 = extractelement <2 x i32> %arg1, i64 1
267 %add.0 = call i32 @llvm.smax.i32(i32 %arg0.0, i32 %arg1.0)
268 %add.1 = call i32 @llvm.smax.i32(i32 %arg0.1, i32 %arg1.1)
269 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
270 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
274 define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
275 ; GFX7-LABEL: @uadd_sat_v3i16(
277 ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0
278 ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1
279 ; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2
280 ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0
281 ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1
282 ; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2
283 ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
284 ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
285 ; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
286 ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[ADD_0]], i64 0
287 ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
288 ; GFX7-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
289 ; GFX7-NEXT: ret <3 x i16> [[INS_2]]
291 ; GFX8-LABEL: @uadd_sat_v3i16(
293 ; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0
294 ; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1
295 ; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2
296 ; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0
297 ; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1
298 ; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2
299 ; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
300 ; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
301 ; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
302 ; GFX8-NEXT: [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[ADD_0]], i64 0
303 ; GFX8-NEXT: [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
304 ; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
305 ; GFX8-NEXT: ret <3 x i16> [[INS_2]]
307 ; GFX9-LABEL: @uadd_sat_v3i16(
309 ; GFX9-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
310 ; GFX9-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
311 ; GFX9-NEXT: [[TMP0:%.*]] = call <3 x i16> @llvm.umin.v3i16(<3 x i16> [[ARG0]], <3 x i16> [[ARG1]])
312 ; GFX9-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
313 ; GFX9-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP0]], i16 [[ADD_2]], i64 2
314 ; GFX9-NEXT: ret <3 x i16> [[INS_2]]
317 %arg0.0 = extractelement <3 x i16> %arg0, i64 0
318 %arg0.1 = extractelement <3 x i16> %arg0, i64 1
319 %arg0.2 = extractelement <3 x i16> %arg0, i64 2
320 %arg1.0 = extractelement <3 x i16> %arg1, i64 0
321 %arg1.1 = extractelement <3 x i16> %arg1, i64 1
322 %arg1.2 = extractelement <3 x i16> %arg1, i64 2
323 %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0)
324 %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1)
325 %add.2 = call i16 @llvm.umin.i16(i16 %arg0.2, i16 %arg1.2)
326 %ins.0 = insertelement <3 x i16> undef, i16 %add.0, i64 0
327 %ins.1 = insertelement <3 x i16> %ins.0, i16 %add.1, i64 1
328 %ins.2 = insertelement <3 x i16> %ins.1, i16 %add.2, i64 2
332 define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
333 ; GFX7-LABEL: @uadd_sat_v4i16(
335 ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0
336 ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1
337 ; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0]], i64 2
338 ; GFX7-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3
339 ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0
340 ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1
341 ; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1]], i64 2
342 ; GFX7-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3
343 ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
344 ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
345 ; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
346 ; GFX7-NEXT: [[ADD_3:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_3]], i16 [[ARG1_3]])
347 ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0
348 ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
349 ; GFX7-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
350 ; GFX7-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3
351 ; GFX7-NEXT: ret <4 x i16> [[INS_3]]
353 ; GFX8-LABEL: @uadd_sat_v4i16(
355 ; GFX8-NEXT: [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0
356 ; GFX8-NEXT: [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1
357 ; GFX8-NEXT: [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0
358 ; GFX8-NEXT: [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1
359 ; GFX8-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
360 ; GFX8-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
361 ; GFX8-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG1]])
362 ; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
363 ; GFX8-NEXT: [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0
364 ; GFX8-NEXT: [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
365 ; GFX8-NEXT: [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
366 ; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[INS_1]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
367 ; GFX8-NEXT: ret <4 x i16> [[INS_31]]
369 ; GFX9-LABEL: @uadd_sat_v4i16(
371 ; GFX9-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0:%.*]], <4 x i16> [[ARG1:%.*]])
372 ; GFX9-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG1]])
373 ; GFX9-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
374 ; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
375 ; GFX9-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
376 ; GFX9-NEXT: ret <4 x i16> [[INS_31]]
379 %arg0.0 = extractelement <4 x i16> %arg0, i64 0
380 %arg0.1 = extractelement <4 x i16> %arg0, i64 1
381 %arg0.2 = extractelement <4 x i16> %arg0, i64 2
382 %arg0.3 = extractelement <4 x i16> %arg0, i64 3
383 %arg1.0 = extractelement <4 x i16> %arg1, i64 0
384 %arg1.1 = extractelement <4 x i16> %arg1, i64 1
385 %arg1.2 = extractelement <4 x i16> %arg1, i64 2
386 %arg1.3 = extractelement <4 x i16> %arg1, i64 3
387 %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0)
388 %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1)
389 %add.2 = call i16 @llvm.umin.i16(i16 %arg0.2, i16 %arg1.2)
390 %add.3 = call i16 @llvm.umin.i16(i16 %arg0.3, i16 %arg1.3)
391 %ins.0 = insertelement <4 x i16> undef, i16 %add.0, i64 0
392 %ins.1 = insertelement <4 x i16> %ins.0, i16 %add.1, i64 1
393 %ins.2 = insertelement <4 x i16> %ins.1, i16 %add.2, i64 2
394 %ins.3 = insertelement <4 x i16> %ins.2, i16 %add.3, i64 3