1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s
3 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s
6 define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
7 ; GFX7-LABEL: @uadd_sat_v2i16(
9 ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
10 ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
11 ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
12 ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
13 ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
14 ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
15 ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
16 ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
17 ; GFX7-NEXT: ret <2 x i16> [[INS_1]]
19 ; GFX8-LABEL: @uadd_sat_v2i16(
21 ; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
22 ; GFX8-NEXT: ret <2 x i16> [[TMP0]]
24 ; GFX9-LABEL: @uadd_sat_v2i16(
26 ; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
27 ; GFX9-NEXT: ret <2 x i16> [[TMP0]]
30 %arg0.0 = extractelement <2 x i16> %arg0, i64 0
31 %arg0.1 = extractelement <2 x i16> %arg0, i64 1
32 %arg1.0 = extractelement <2 x i16> %arg1, i64 0
33 %arg1.1 = extractelement <2 x i16> %arg1, i64 1
34 %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
35 %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
36 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
37 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
41 define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
42 ; GFX7-LABEL: @usub_sat_v2i16(
44 ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
45 ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
46 ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
47 ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
48 ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
49 ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
50 ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
51 ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
52 ; GFX7-NEXT: ret <2 x i16> [[INS_1]]
54 ; GFX8-LABEL: @usub_sat_v2i16(
56 ; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
57 ; GFX8-NEXT: ret <2 x i16> [[TMP0]]
59 ; GFX9-LABEL: @usub_sat_v2i16(
61 ; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
62 ; GFX9-NEXT: ret <2 x i16> [[TMP0]]
65 %arg0.0 = extractelement <2 x i16> %arg0, i64 0
66 %arg0.1 = extractelement <2 x i16> %arg0, i64 1
67 %arg1.0 = extractelement <2 x i16> %arg1, i64 0
68 %arg1.1 = extractelement <2 x i16> %arg1, i64 1
69 %add.0 = call i16 @llvm.usub.sat.i16(i16 %arg0.0, i16 %arg1.0)
70 %add.1 = call i16 @llvm.usub.sat.i16(i16 %arg0.1, i16 %arg1.1)
71 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
72 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
76 define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
77 ; GFX7-LABEL: @sadd_sat_v2i16(
79 ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
80 ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
81 ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
82 ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
83 ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
84 ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
85 ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
86 ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
87 ; GFX7-NEXT: ret <2 x i16> [[INS_1]]
89 ; GFX8-LABEL: @sadd_sat_v2i16(
91 ; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
92 ; GFX8-NEXT: ret <2 x i16> [[TMP0]]
94 ; GFX9-LABEL: @sadd_sat_v2i16(
96 ; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
97 ; GFX9-NEXT: ret <2 x i16> [[TMP0]]
100 %arg0.0 = extractelement <2 x i16> %arg0, i64 0
101 %arg0.1 = extractelement <2 x i16> %arg0, i64 1
102 %arg1.0 = extractelement <2 x i16> %arg1, i64 0
103 %arg1.1 = extractelement <2 x i16> %arg1, i64 1
104 %add.0 = call i16 @llvm.sadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
105 %add.1 = call i16 @llvm.sadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
106 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
107 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
111 define <2 x i16> @ssub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
112 ; GFX7-LABEL: @ssub_sat_v2i16(
114 ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
115 ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
116 ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
117 ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
118 ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
119 ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
120 ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
121 ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
122 ; GFX7-NEXT: ret <2 x i16> [[INS_1]]
124 ; GFX8-LABEL: @ssub_sat_v2i16(
126 ; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
127 ; GFX8-NEXT: ret <2 x i16> [[TMP0]]
129 ; GFX9-LABEL: @ssub_sat_v2i16(
131 ; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
132 ; GFX9-NEXT: ret <2 x i16> [[TMP0]]
135 %arg0.0 = extractelement <2 x i16> %arg0, i64 0
136 %arg0.1 = extractelement <2 x i16> %arg0, i64 1
137 %arg1.0 = extractelement <2 x i16> %arg1, i64 0
138 %arg1.1 = extractelement <2 x i16> %arg1, i64 1
139 %add.0 = call i16 @llvm.ssub.sat.i16(i16 %arg0.0, i16 %arg1.0)
140 %add.1 = call i16 @llvm.ssub.sat.i16(i16 %arg0.1, i16 %arg1.1)
141 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
142 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
146 define <2 x i32> @uadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
147 ; GCN-LABEL: @uadd_sat_v2i32(
149 ; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
150 ; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
151 ; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
152 ; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
153 ; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
154 ; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
155 ; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
156 ; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
157 ; GCN-NEXT: ret <2 x i32> [[INS_1]]
160 %arg0.0 = extractelement <2 x i32> %arg0, i64 0
161 %arg0.1 = extractelement <2 x i32> %arg0, i64 1
162 %arg1.0 = extractelement <2 x i32> %arg1, i64 0
163 %arg1.1 = extractelement <2 x i32> %arg1, i64 1
164 %add.0 = call i32 @llvm.uadd.sat.i32(i32 %arg0.0, i32 %arg1.0)
165 %add.1 = call i32 @llvm.uadd.sat.i32(i32 %arg0.1, i32 %arg1.1)
166 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
167 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
171 define <2 x i32> @usub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
172 ; GCN-LABEL: @usub_sat_v2i32(
174 ; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
175 ; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
176 ; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
177 ; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
178 ; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
179 ; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
180 ; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
181 ; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
182 ; GCN-NEXT: ret <2 x i32> [[INS_1]]
185 %arg0.0 = extractelement <2 x i32> %arg0, i64 0
186 %arg0.1 = extractelement <2 x i32> %arg0, i64 1
187 %arg1.0 = extractelement <2 x i32> %arg1, i64 0
188 %arg1.1 = extractelement <2 x i32> %arg1, i64 1
189 %add.0 = call i32 @llvm.usub.sat.i32(i32 %arg0.0, i32 %arg1.0)
190 %add.1 = call i32 @llvm.usub.sat.i32(i32 %arg0.1, i32 %arg1.1)
191 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
192 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
196 define <2 x i32> @sadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
197 ; GCN-LABEL: @sadd_sat_v2i32(
199 ; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
200 ; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
201 ; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
202 ; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
203 ; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
204 ; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
205 ; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
206 ; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
207 ; GCN-NEXT: ret <2 x i32> [[INS_1]]
210 %arg0.0 = extractelement <2 x i32> %arg0, i64 0
211 %arg0.1 = extractelement <2 x i32> %arg0, i64 1
212 %arg1.0 = extractelement <2 x i32> %arg1, i64 0
213 %arg1.1 = extractelement <2 x i32> %arg1, i64 1
214 %add.0 = call i32 @llvm.sadd.sat.i32(i32 %arg0.0, i32 %arg1.0)
215 %add.1 = call i32 @llvm.sadd.sat.i32(i32 %arg0.1, i32 %arg1.1)
216 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
217 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
221 define <2 x i32> @ssub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
222 ; GCN-LABEL: @ssub_sat_v2i32(
224 ; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
225 ; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
226 ; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
227 ; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
228 ; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
229 ; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
230 ; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
231 ; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
232 ; GCN-NEXT: ret <2 x i32> [[INS_1]]
235 %arg0.0 = extractelement <2 x i32> %arg0, i64 0
236 %arg0.1 = extractelement <2 x i32> %arg0, i64 1
237 %arg1.0 = extractelement <2 x i32> %arg1, i64 0
238 %arg1.1 = extractelement <2 x i32> %arg1, i64 1
239 %add.0 = call i32 @llvm.ssub.sat.i32(i32 %arg0.0, i32 %arg1.0)
240 %add.1 = call i32 @llvm.ssub.sat.i32(i32 %arg0.1, i32 %arg1.1)
241 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
242 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
246 define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
247 ; GFX7-LABEL: @uadd_sat_v3i16(
249 ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0
250 ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1
251 ; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2
252 ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0
253 ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1
254 ; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2
255 ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
256 ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
257 ; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
258 ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[ADD_0]], i64 0
259 ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
260 ; GFX7-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
261 ; GFX7-NEXT: ret <3 x i16> [[INS_2]]
263 ; GFX8-LABEL: @uadd_sat_v3i16(
265 ; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
266 ; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
267 ; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
268 ; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
269 ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
270 ; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
271 ; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 poison>
272 ; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
273 ; GFX8-NEXT: ret <3 x i16> [[INS_2]]
275 ; GFX9-LABEL: @uadd_sat_v3i16(
277 ; GFX9-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
278 ; GFX9-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
279 ; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
280 ; GFX9-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
281 ; GFX9-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
282 ; GFX9-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
283 ; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 poison>
284 ; GFX9-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
285 ; GFX9-NEXT: ret <3 x i16> [[INS_2]]
288 %arg0.0 = extractelement <3 x i16> %arg0, i64 0
289 %arg0.1 = extractelement <3 x i16> %arg0, i64 1
290 %arg0.2 = extractelement <3 x i16> %arg0, i64 2
291 %arg1.0 = extractelement <3 x i16> %arg1, i64 0
292 %arg1.1 = extractelement <3 x i16> %arg1, i64 1
293 %arg1.2 = extractelement <3 x i16> %arg1, i64 2
294 %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
295 %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
296 %add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2)
297 %ins.0 = insertelement <3 x i16> undef, i16 %add.0, i64 0
298 %ins.1 = insertelement <3 x i16> %ins.0, i16 %add.1, i64 1
299 %ins.2 = insertelement <3 x i16> %ins.1, i16 %add.2, i64 2
303 define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
304 ; GFX7-LABEL: @uadd_sat_v4i16(
306 ; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0
307 ; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1
308 ; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0]], i64 2
309 ; GFX7-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3
310 ; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0
311 ; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1
312 ; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1]], i64 2
313 ; GFX7-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3
314 ; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
315 ; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
316 ; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
317 ; GFX7-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]])
318 ; GFX7-NEXT: [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0
319 ; GFX7-NEXT: [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
320 ; GFX7-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
321 ; GFX7-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3
322 ; GFX7-NEXT: ret <4 x i16> [[INS_3]]
324 ; GFX8-LABEL: @uadd_sat_v4i16(
326 ; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
327 ; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
328 ; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
329 ; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
330 ; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
331 ; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
332 ; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
333 ; GFX8-NEXT: ret <4 x i16> [[INS_31]]
335 ; GFX9-LABEL: @uadd_sat_v4i16(
337 ; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
338 ; GFX9-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
339 ; GFX9-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
340 ; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
341 ; GFX9-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
342 ; GFX9-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
343 ; GFX9-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
344 ; GFX9-NEXT: ret <4 x i16> [[INS_31]]
347 %arg0.0 = extractelement <4 x i16> %arg0, i64 0
348 %arg0.1 = extractelement <4 x i16> %arg0, i64 1
349 %arg0.2 = extractelement <4 x i16> %arg0, i64 2
350 %arg0.3 = extractelement <4 x i16> %arg0, i64 3
351 %arg1.0 = extractelement <4 x i16> %arg1, i64 0
352 %arg1.1 = extractelement <4 x i16> %arg1, i64 1
353 %arg1.2 = extractelement <4 x i16> %arg1, i64 2
354 %arg1.3 = extractelement <4 x i16> %arg1, i64 3
355 %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
356 %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
357 %add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2)
358 %add.3 = call i16 @llvm.uadd.sat.i16(i16 %arg0.3, i16 %arg1.3)
359 %ins.0 = insertelement <4 x i16> undef, i16 %add.0, i64 0
360 %ins.1 = insertelement <4 x i16> %ins.0, i16 %add.1, i64 1
361 %ins.2 = insertelement <4 x i16> %ins.1, i16 %add.2, i64 2
362 %ins.3 = insertelement <4 x i16> %ins.2, i16 %add.3, i64 3
366 declare i16 @llvm.uadd.sat.i16(i16, i16) #0
367 declare i16 @llvm.usub.sat.i16(i16, i16) #0
368 declare i16 @llvm.sadd.sat.i16(i16, i16) #0
369 declare i16 @llvm.ssub.sat.i16(i16, i16) #0
371 declare i32 @llvm.uadd.sat.i32(i32, i32) #0
372 declare i32 @llvm.usub.sat.i32(i32, i32) #0
373 declare i32 @llvm.sadd.sat.i32(i32, i32) #0
374 declare i32 @llvm.ssub.sat.i32(i32, i32) #0
376 attributes #0 = { nounwind readnone speculatable willreturn }