1 ;; Copyright (C) 2016-2024 Free Software Foundation, Inc.
3 ;; This file is free software; you can redistribute it and/or modify it under
4 ;; the terms of the GNU General Public License as published by the Free
5 ;; Software Foundation; either version 3 of the License, or (at your option)
8 ;; This file is distributed in the hope that it will be useful, but WITHOUT
9 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 ;; You should have received a copy of the GNU General Public License
14 ;; along with GCC; see the file COPYING3. If not see
15 ;; <http://www.gnu.org/licenses/>.
17 ;; {{{ Vector iterators
18 ; SV iterators include both scalar and vector modes.
20 ; Vector modes for specific types
21 (define_mode_iterator V_QI
22 [V2QI V4QI V8QI V16QI V32QI V64QI])
23 (define_mode_iterator V_HI
24 [V2HI V4HI V8HI V16HI V32HI V64HI])
25 (define_mode_iterator V_HF
26 [V2HF V4HF V8HF V16HF V32HF V64HF])
27 (define_mode_iterator V_SI
28 [V2SI V4SI V8SI V16SI V32SI V64SI])
29 (define_mode_iterator V_SF
30 [V2SF V4SF V8SF V16SF V32SF V64SF])
31 (define_mode_iterator V_DI
32 [V2DI V4DI V8DI V16DI V32DI V64DI])
33 (define_mode_iterator V_DF
34 [V2DF V4DF V8DF V16DF V32DF V64DF])
36 ; Vector modes for sub-dword modes
37 (define_mode_iterator V_QIHI
45 ; Vector modes for one vector register
46 (define_mode_iterator V_1REG
47 [V2QI V2HI V2SI V2HF V2SF
48 V4QI V4HI V4SI V4HF V4SF
49 V8QI V8HI V8SI V8HF V8SF
50 V16QI V16HI V16SI V16HF V16SF
51 V32QI V32HI V32SI V32HF V32SF
52 V64QI V64HI V64SI V64HF V64SF])
53 (define_mode_iterator V_1REG_ALT
54 [V2QI V2HI V2SI V2HF V2SF
55 V4QI V4HI V4SI V4HF V4SF
56 V8QI V8HI V8SI V8HF V8SF
57 V16QI V16HI V16SI V16HF V16SF
58 V32QI V32HI V32SI V32HF V32SF
59 V64QI V64HI V64SI V64HF V64SF])
61 (define_mode_iterator V_INT_1REG
68 (define_mode_iterator V_INT_1REG_ALT
75 (define_mode_iterator V_FP_1REG
83 ; Vector modes for two vector registers
84 (define_mode_iterator V_2REG
91 (define_mode_iterator V_2REG_ALT
99 ; Vector modes for four vector registers
100 (define_mode_iterator V_4REG [V2TI V4TI V8TI V16TI V32TI V64TI])
101 (define_mode_iterator V_4REG_ALT [V2TI V4TI V8TI V16TI V32TI V64TI])
103 ; Vector modes with native support
104 (define_mode_iterator V_noQI
105 [V2HI V2HF V2SI V2SF V2DI V2DF
106 V4HI V4HF V4SI V4SF V4DI V4DF
107 V8HI V8HF V8SI V8SF V8DI V8DF
108 V16HI V16HF V16SI V16SF V16DI V16DF
109 V32HI V32HF V32SI V32SF V32DI V32DF
110 V64HI V64HF V64SI V64SF V64DI V64DF])
111 (define_mode_iterator V_noHI
112 [V2HF V2SI V2SF V2DI V2DF
113 V4HF V4SI V4SF V4DI V4DF
114 V8HF V8SI V8SF V8DI V8DF
115 V16HF V16SI V16SF V16DI V16DF
116 V32HF V32SI V32SF V32DI V32DF
117 V64HF V64SI V64SF V64DI V64DF])
119 (define_mode_iterator V_INT_noQI
126 (define_mode_iterator V_INT_noHI
134 (define_mode_iterator SV_SFDF
143 ; All modes in which we want to do more than just moves.
144 (define_mode_iterator V_ALL
145 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
146 V4QI V4HI V4HF V4SI V4SF V4DI V4DF
147 V8QI V8HI V8HF V8SI V8SF V8DI V8DF
148 V16QI V16HI V16HF V16SI V16SF V16DI V16DF
149 V32QI V32HI V32HF V32SI V32SF V32DI V32DF
150 V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
151 (define_mode_iterator V_ALL_ALT
152 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
153 V4QI V4HI V4HF V4SI V4SF V4DI V4DF
154 V8QI V8HI V8HF V8SI V8SF V8DI V8DF
155 V16QI V16HI V16HF V16SI V16SF V16DI V16DF
156 V32QI V32HI V32HF V32SI V32SF V32DI V32DF
157 V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
159 (define_mode_iterator V_INT
163 V16QI V16HI V16SI V16DI
164 V32QI V32HI V32SI V32DI
165 V64QI V64HI V64SI V64DI])
166 (define_mode_iterator V_FP
173 (define_mode_iterator SV_FP
182 ; All modes that need moves, including those without many insns.
183 (define_mode_iterator V_MOV
184 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI
185 V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI
186 V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI
187 V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI
188 V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI
189 V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI])
190 (define_mode_iterator V_MOV_ALT
191 [V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI
192 V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI
193 V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI
194 V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI
195 V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI
196 V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI])
198 (define_mode_attr scalar_mode
199 [(QI "qi") (HI "hi") (SI "si") (TI "ti")
200 (HF "hf") (SF "sf") (DI "di") (DF "df")
201 (V2QI "qi") (V2HI "hi") (V2SI "si") (V2TI "ti")
202 (V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df")
203 (V4QI "qi") (V4HI "hi") (V4SI "si") (V4TI "ti")
204 (V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df")
205 (V8QI "qi") (V8HI "hi") (V8SI "si") (V8TI "ti")
206 (V8HF "hf") (V8SF "sf") (V8DI "di") (V8DF "df")
207 (V16QI "qi") (V16HI "hi") (V16SI "si") (V16TI "ti")
208 (V16HF "hf") (V16SF "sf") (V16DI "di") (V16DF "df")
209 (V32QI "qi") (V32HI "hi") (V32SI "si") (V32TI "ti")
210 (V32HF "hf") (V32SF "sf") (V32DI "di") (V32DF "df")
211 (V64QI "qi") (V64HI "hi") (V64SI "si") (V64TI "ti")
212 (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
214 (define_mode_attr SCALAR_MODE
215 [(QI "QI") (HI "HI") (SI "SI") (TI "TI")
216 (HF "HF") (SF "SF") (DI "DI") (DF "DF")
217 (V2QI "QI") (V2HI "HI") (V2SI "SI") (V2TI "TI")
218 (V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF")
219 (V4QI "QI") (V4HI "HI") (V4SI "SI") (V4TI "TI")
220 (V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF")
221 (V8QI "QI") (V8HI "HI") (V8SI "SI") (V8TI "TI")
222 (V8HF "HF") (V8SF "SF") (V8DI "DI") (V8DF "DF")
223 (V16QI "QI") (V16HI "HI") (V16SI "SI") (V16TI "TI")
224 (V16HF "HF") (V16SF "SF") (V16DI "DI") (V16DF "DF")
225 (V32QI "QI") (V32HI "HI") (V32SI "SI") (V32TI "TI")
226 (V32HF "HF") (V32SF "SF") (V32DI "DI") (V32DF "DF")
227 (V64QI "QI") (V64HI "HI") (V64SI "SI") (V64TI "TI")
228 (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
230 (define_mode_attr vnsi
231 [(QI "si") (HI "si") (SI "si") (TI "si")
232 (HF "si") (SF "si") (DI "si") (DF "si")
233 (V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si")
234 (V2SF "v2si") (V2DI "v2si") (V2DF "v2si") (V2TI "v2si")
235 (V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si")
236 (V4SF "v4si") (V4DI "v4si") (V4DF "v4si") (V4TI "v4si")
237 (V8QI "v8si") (V8HI "v8si") (V8HF "v8si") (V8SI "v8si")
238 (V8SF "v8si") (V8DI "v8si") (V8DF "v8si") (V8TI "v8si")
239 (V16QI "v16si") (V16HI "v16si") (V16HF "v16si") (V16SI "v16si")
240 (V16SF "v16si") (V16DI "v16si") (V16DF "v16si") (V16TI "v16si")
241 (V32QI "v32si") (V32HI "v32si") (V32HF "v32si") (V32SI "v32si")
242 (V32SF "v32si") (V32DI "v32si") (V32DF "v32si") (V32TI "v32si")
243 (V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
244 (V64SF "v64si") (V64DI "v64si") (V64DF "v64si") (V64TI "v64si")])
246 (define_mode_attr VnSI
247 [(QI "SI") (HI "SI") (SI "SI") (TI "SI")
248 (HF "SI") (SF "SI") (DI "SI") (DF "SI")
249 (V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI")
250 (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI") (V2TI "V2SI")
251 (V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI")
252 (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI") (V4TI "V4SI")
253 (V8QI "V8SI") (V8HI "V8SI") (V8HF "V8SI") (V8SI "V8SI")
254 (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI") (V8TI "V8SI")
255 (V16QI "V16SI") (V16HI "V16SI") (V16HF "V16SI") (V16SI "V16SI")
256 (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI") (V16TI "V16SI")
257 (V32QI "V32SI") (V32HI "V32SI") (V32HF "V32SI") (V32SI "V32SI")
258 (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI") (V32TI "V32SI")
259 (V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
260 (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI") (V64TI "V64SI")])
262 (define_mode_attr vndi
263 [(V2QI "v2di") (V2HI "v2di") (V2HF "v2di") (V2SI "v2di")
264 (V2SF "v2di") (V2DI "v2di") (V2DF "v2di") (V2TI "v2di")
265 (V4QI "v4di") (V4HI "v4di") (V4HF "v4di") (V4SI "v4di")
266 (V4SF "v4di") (V4DI "v4di") (V4DF "v4di") (V4TI "v4di")
267 (V8QI "v8di") (V8HI "v8di") (V8HF "v8di") (V8SI "v8di")
268 (V8SF "v8di") (V8DI "v8di") (V8DF "v8di") (V8TI "v8di")
269 (V16QI "v16di") (V16HI "v16di") (V16HF "v16di") (V16SI "v16di")
270 (V16SF "v16di") (V16DI "v16di") (V16DF "v16di") (V16TI "v16di")
271 (V32QI "v32di") (V32HI "v32di") (V32HF "v32di") (V32SI "v32di")
272 (V32SF "v32di") (V32DI "v32di") (V32DF "v32di") (V32TI "v32di")
273 (V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
274 (V64SF "v64di") (V64DI "v64di") (V64DF "v64di") (V64TI "v64di")])
276 (define_mode_attr VnDI
277 [(V2QI "V2DI") (V2HI "V2DI") (V2HF "V2DI") (V2SI "V2DI")
278 (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI") (V2TI "V2DI")
279 (V4QI "V4DI") (V4HI "V4DI") (V4HF "V4DI") (V4SI "V4DI")
280 (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI") (V4TI "V4DI")
281 (V8QI "V8DI") (V8HI "V8DI") (V8HF "V8DI") (V8SI "V8DI")
282 (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI") (V8TI "V8DI")
283 (V16QI "V16DI") (V16HI "V16DI") (V16HF "V16DI") (V16SI "V16DI")
284 (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI") (V16TI "V16DI")
285 (V32QI "V32DI") (V32HI "V32DI") (V32HF "V32DI") (V32SI "V32DI")
286 (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI") (V32TI "V32DI")
287 (V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
288 (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI") (V64TI "V64DI")])
290 (define_mode_attr sdwa
291 [(V2QI "BYTE_0") (V2HI "WORD_0") (V2SI "DWORD")
292 (V4QI "BYTE_0") (V4HI "WORD_0") (V4SI "DWORD")
293 (V8QI "BYTE_0") (V8HI "WORD_0") (V8SI "DWORD")
294 (V16QI "BYTE_0") (V16HI "WORD_0") (V16SI "DWORD")
295 (V32QI "BYTE_0") (V32HI "WORD_0") (V32SI "DWORD")
296 (V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
301 (define_subst_attr "exec" "vec_merge"
303 (define_subst_attr "exec_clobber" "vec_merge_with_clobber"
305 (define_subst_attr "exec_vcc" "vec_merge_with_vcc"
307 (define_subst_attr "exec_scatter" "scatter_store"
310 (define_subst "vec_merge"
311 [(set (match_operand:V_MOV 0)
312 (match_operand:V_MOV 1))]
317 (match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0")
318 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
320 (define_subst "vec_merge_with_clobber"
321 [(set (match_operand:V_MOV 0)
322 (match_operand:V_MOV 1))
323 (clobber (match_operand 2))]
328 (match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0")
329 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
330 (clobber (match_dup 2))])
332 (define_subst "vec_merge_with_vcc"
333 [(set (match_operand:V_MOV 0)
334 (match_operand:V_MOV 1))
335 (set (match_operand:DI 2)
336 (match_operand:DI 3))]
342 (match_operand:V_MOV 4 "gcn_register_or_unspec_operand" "U0")
343 (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
345 (and:DI (match_dup 3)
346 (reg:DI EXEC_REG)))])])
348 (define_subst "scatter_store"
349 [(set (mem:BLK (scratch))
357 [(set (mem:BLK (scratch))
363 (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
369 ; This is the entry point for all vector register moves. Memory accesses can
370 ; come this way also, but will more usually use the reload_in/out,
371 ; gather/scatter, maskload/store, etc.
373 (define_expand "mov<mode>"
374 [(set (match_operand:V_MOV 0 "nonimmediate_operand")
375 (match_operand:V_MOV 1 "general_operand"))]
378 /* Bitwise reinterpret casts via SUBREG don't work with GCN vector
379 registers, but we can convert the MEM to a mode that does work. */
380 if (MEM_P (operands[0]) && !SUBREG_P (operands[0])
381 && SUBREG_P (operands[1])
382 && GET_MODE_SIZE (GET_MODE (operands[1]))
383 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))))
385 rtx src = SUBREG_REG (operands[1]);
386 rtx mem = copy_rtx (operands[0]);
387 PUT_MODE_RAW (mem, GET_MODE (src));
388 emit_move_insn (mem, src);
391 if (MEM_P (operands[1]) && !SUBREG_P (operands[1])
392 && SUBREG_P (operands[0])
393 && GET_MODE_SIZE (GET_MODE (operands[0]))
394 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0]))))
396 rtx dest = SUBREG_REG (operands[0]);
397 rtx mem = copy_rtx (operands[1]);
398 PUT_MODE_RAW (mem, GET_MODE (dest));
399 emit_move_insn (dest, mem);
403 /* SUBREG of MEM is not supported. */
404 gcc_assert ((!SUBREG_P (operands[0])
405 || !MEM_P (SUBREG_REG (operands[0])))
406 && (!SUBREG_P (operands[1])
407 || !MEM_P (SUBREG_REG (operands[1]))));
409 if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
411 operands[1] = force_reg (<MODE>mode, operands[1]);
412 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
413 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
414 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
415 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
418 emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
421 else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
423 rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
424 rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
425 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
426 rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
429 emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
432 else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
434 gcc_assert (!reload_completed);
435 rtx scratch = gen_reg_rtx (<VnDI>mode);
436 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
441 ; A pseudo instruction that helps LRA use the "U0" constraint.
443 (define_insn "mov<mode>_unspec"
444 [(set (match_operand:V_MOV 0 "nonimmediate_operand" "=v")
445 (match_operand:V_MOV 1 "gcn_unspec_operand" " U"))]
448 [(set_attr "type" "unknown")
449 (set_attr "length" "0")])
451 (define_insn "*mov<mode>"
452 [(set (match_operand:V_1REG 0 "nonimmediate_operand")
453 (match_operand:V_1REG 1 "general_operand"))]
455 {@ [cons: =0, 1; attrs: type, length, cdna]
456 [v ,vA;vop1 ,4,* ] v_mov_b32\t%0, %1
458 [v ,a ;vop3p_mai,8,* ] v_accvgpr_read_b32\t%0, %1
459 [$a ,v ;vop3p_mai,8,* ] v_accvgpr_write_b32\t%0, %1
460 [a ,a ;vop1 ,4,cdna2] v_accvgpr_mov_b32\t%0, %1
463 (define_insn "mov<mode>_exec"
464 [(set (match_operand:V_1REG 0 "nonimmediate_operand")
466 (match_operand:V_1REG 1 "general_operand")
467 (match_operand:V_1REG 2 "gcn_alu_or_unspec_operand")
468 (match_operand:DI 3 "register_operand")))
469 (clobber (match_scratch:<VnDI> 4))]
470 "!MEM_P (operands[0]) || REG_P (operands[1])"
471 {@ [cons: =0, 1, 2, 3, =4; attrs: type, length]
472 [v,vA,U0,e ,X ;vop1 ,4 ] v_mov_b32\t%0, %1
473 [v,B ,U0,e ,X ;vop1 ,8 ] v_mov_b32\t%0, %1
474 [v,v ,vA,cV,X ;vop2 ,4 ] v_cndmask_b32\t%0, %2, %1, vcc
475 [v,vA,vA,Sv,X ;vop3a,8 ] v_cndmask_b32\t%0, %2, %1, %3
476 [v,m ,U0,e ,&v;* ,16] #
477 [m,v ,U0,e ,&v;* ,16] #
480 ; This variant does not accept an unspec, but does permit MEM
481 ; read/modify/write which is necessary for maskstore.
483 ;(define_insn "*mov<mode>_exec_match"
484 ; [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v, m")
486 ; (match_operand:V_1REG 1 "general_operand" "vA,B, m, v")
488 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
489 ; (clobber (match_scratch:<VnDI> 3 "=X,X,&v,&v"))]
490 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
496 ; [(set_attr "type" "vop1,vop1,*,*")
497 ; (set_attr "length" "4,8,16,16")])
499 (define_insn "*mov<mode>"
500 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v,$a,a")
501 (match_operand:V_2REG 1 "general_operand" "vDB,a, v,a"))]
504 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
505 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
507 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
508 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
509 return \"v_accvgpr_read_b32\t%L0, %L1\;v_accvgpr_read_b32\t%H0, %H1\"; \
511 return \"v_accvgpr_read_b32\t%H0, %H1\;v_accvgpr_read_b32\t%L0, %L1\";
512 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
513 return \"v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, %H1\"; \
515 return \"v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%L0, %L1\";
516 * if (REGNO (operands[0]) <= REGNO (operands[1])) \
517 return \"v_accvgpr_mov_b32\t%L0, %L1\;v_accvgpr_mov_b32\t%H0, %H1\"; \
519 return \"v_accvgpr_mov_b32\t%H0, %H1\;v_accvgpr_mov_b32\t%L0, %L1\";"
520 [(set_attr "type" "vmult,vmult,vmult,vmult")
521 (set_attr "length" "16,16,16,8")
522 (set_attr "cdna" "*,*,*,cdna2")])
524 (define_insn "mov<mode>_exec"
525 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m")
527 (match_operand:V_2REG 1 "general_operand" "vDB, v0, v0, m, v")
528 (match_operand:V_2REG 2 "gcn_alu_or_unspec_operand"
529 " U0,vDA0,vDA0,U0,U0")
530 (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
531 (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))]
532 "!MEM_P (operands[0]) || REG_P (operands[1])"
534 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
535 switch (which_alternative)
538 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
540 return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
541 "v_cndmask_b32\t%H0, %H2, %H1, vcc";
543 return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
544 "v_cndmask_b32\t%H0, %H2, %H1, %3";
547 switch (which_alternative)
550 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
552 return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
553 "v_cndmask_b32\t%L0, %L2, %L1, vcc";
555 return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
556 "v_cndmask_b32\t%L0, %L2, %L1, %3";
561 [(set_attr "type" "vmult,vmult,vmult,*,*")
562 (set_attr "length" "16,16,16,16,16")])
564 (define_insn "*mov<mode>_4reg"
565 [(set (match_operand:V_4REG 0 "nonimmediate_operand")
566 (match_operand:V_4REG 1 "general_operand"))]
568 {@ [cons: =0, 1; attrs: type, length, cdna]
569 [v ,vDB;vmult,16,* ] v_mov_b32\t%L0, %L1\; v_mov_b32\t%H0, %H1\; v_mov_b32\t%J0, %J1\; v_mov_b32\t%K0, %K1
570 [v ,a ;vmult,32,* ] v_accvgpr_read_b32\t%L0, %L1\; v_accvgpr_read_b32\t%H0, %H1\; v_accvgpr_read_b32\t%J0, %J1\; v_accvgpr_read_b32\t%K0, %K1
571 [$a,v ;vmult,32,* ] v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%J0, %J1\;v_accvgpr_write_b32\t%K0, %K1
572 [a ,a ;vmult,32,cdna2] v_accvgpr_mov_b32\t%L0, %L1\; v_accvgpr_mov_b32\t%H0, %H1\; v_accvgpr_mov_b32\t%J0, %J1\; v_accvgpr_mov_b32\t%K0, %K1
575 (define_insn "mov<mode>_exec"
576 [(set (match_operand:V_4REG 0 "nonimmediate_operand" "= v, v, v, v, m")
578 (match_operand:V_4REG 1 "general_operand" "vDB, v0, v0, m, v")
579 (match_operand:V_4REG 2 "gcn_alu_or_unspec_operand"
580 " U0,vDA0,vDA0,U0,U0")
581 (match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
582 (clobber (match_scratch:<VnDI> 4 "= X, X, X,&v,&v"))]
583 "!MEM_P (operands[0]) || REG_P (operands[1])"
585 if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
586 switch (which_alternative)
589 return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;"
590 "v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1";
592 return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
593 "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
594 "v_cndmask_b32\t%J0, %J2, %J1, vcc\;"
595 "v_cndmask_b32\t%K0, %K2, %K1, vcc";
597 return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
598 "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
599 "v_cndmask_b32\t%J0, %J2, %J1, %3\;"
600 "v_cndmask_b32\t%K0, %K2, %K1, %3";
603 switch (which_alternative)
606 return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\;"
607 "v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1";
609 return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
610 "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
611 "v_cndmask_b32\t%J0, %J2, %J1, vcc\;"
612 "v_cndmask_b32\t%K0, %K2, %K1, vcc";
614 return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
615 "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
616 "v_cndmask_b32\t%J0, %J2, %J1, %3\;"
617 "v_cndmask_b32\t%K0, %K2, %K1, %3";
622 [(set_attr "type" "vmult,vmult,vmult,*,*")
623 (set_attr "length" "32")])
625 ; This variant does not accept an unspec, but does permit MEM
626 ; read/modify/write which is necessary for maskstore.
628 ;(define_insn "*mov<mode>_exec_match"
629 ; [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v, m")
631 ; (match_operand:V_2REG 1 "general_operand" "vDB, m, v")
633 ; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
634 ; (clobber (match_scratch:<VnDI> 3 "=X,&v,&v"))]
635 ; "!MEM_P (operands[0]) || REG_P (operands[1])"
637 ; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
638 ; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
640 ; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
643 ; [(set_attr "type" "vmult,*,*")
644 ; (set_attr "length" "16,16,16")])
646 ; A SGPR-base load looks like:
649 ; There's no hardware instruction that corresponds to this, but vector base
650 ; addresses are placed in an SGPR because it is easier to add to a vector.
651 ; We also have a temporary vT, and the vector v1 holding numbered lanes.
654 ; vT = v1 << log2(element-size)
658 (define_insn "@mov<mode>_sgprbase"
659 [(set (match_operand:V_1REG 0 "nonimmediate_operand")
661 [(match_operand:V_1REG 1 "general_operand")]
663 (clobber (match_operand:<VnDI> 2 "register_operand"))]
664 "lra_in_progress || reload_completed"
665 {@ [cons: =0, 1, =2; attrs: type, length, cdna]
666 [v,vA,&v;vop1,4 ,* ] v_mov_b32\t%0, %1
667 [v,vB,&v;vop1,8 ,* ] ^
670 [a,m ,&v;* ,12,cdna2] #
671 [m,a ,&v;* ,12,cdna2] #
674 (define_insn "@mov<mode>_sgprbase"
675 [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m, a, m")
677 [(match_operand:V_2REG 1 "general_operand" "vDB, m, v, m, a")]
679 (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v,&v"))]
680 "lra_in_progress || reload_completed"
682 * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
683 return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
685 return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
690 [(set_attr "type" "vmult,*,*,*,*")
691 (set_attr "length" "8,12,12,12,12")
692 (set_attr "cdna" "*,*,*,cdna2,cdna2")])
694 (define_insn "@mov<mode>_sgprbase"
695 [(set (match_operand:V_4REG 0 "nonimmediate_operand")
697 [(match_operand:V_4REG 1 "general_operand")]
699 (clobber (match_operand:<VnDI> 2 "register_operand"))]
700 "lra_in_progress || reload_completed"
701 {@ [cons: =0, 1, =2; attrs: type, length]
702 [v,vDB,&v;vmult,8 ] v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1
707 ; Expand scalar addresses into gather/scatter patterns
710 [(set (match_operand:V_MOV 0 "memory_operand")
712 [(match_operand:V_MOV 1 "general_operand")]
714 (clobber (match_scratch:<VnDI> 2))]
716 [(set (mem:BLK (scratch))
717 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
720 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
723 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
724 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
728 [(set (match_operand:V_MOV 0 "memory_operand")
730 (match_operand:V_MOV 1 "general_operand")
731 (match_operand:V_MOV 2 "")
732 (match_operand:DI 3 "gcn_exec_reg_operand")))
733 (clobber (match_scratch:<VnDI> 4))]
735 [(set (mem:BLK (scratch))
736 (unspec:BLK [(match_dup 5) (match_dup 1)
737 (match_dup 6) (match_dup 7) (match_dup 3)]
740 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
744 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
745 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
749 [(set (match_operand:V_MOV 0 "nonimmediate_operand")
751 [(match_operand:V_MOV 1 "memory_operand")]
753 (clobber (match_scratch:<VnDI> 2))]
756 (unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7)
760 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
763 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
764 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
768 [(set (match_operand:V_MOV 0 "nonimmediate_operand")
770 (match_operand:V_MOV 1 "memory_operand")
771 (match_operand:V_MOV 2 "")
772 (match_operand:DI 3 "gcn_exec_reg_operand")))
773 (clobber (match_scratch:<VnDI> 4))]
777 (unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7)
783 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
787 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
788 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
791 ; TODO: Add zero/sign extending variants.
796 ; v_writelane and v_readlane work regardless of exec flags.
797 ; We allow source to be scratch.
799 ; FIXME these should take A immediates
801 (define_insn "*vec_set<mode>"
802 [(set (match_operand:V_1REG 0 "register_operand" "= v")
804 (vec_duplicate:V_1REG
805 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
806 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" " U0")
807 (ashift (const_int 1)
808 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
810 "v_writelane_b32 %0, %1, %2"
811 [(set_attr "type" "vop3a")
812 (set_attr "length" "8")
813 (set_attr "exec" "none")
814 (set_attr "laneselect" "yes")])
816 ; FIXME: 64bit operations really should be splitters, but I am not sure how
817 ; to represent vertical subregs.
818 (define_insn "*vec_set<mode>"
819 [(set (match_operand:V_2REG 0 "register_operand" "= v")
821 (vec_duplicate:V_2REG
822 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv"))
823 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" " U0")
824 (ashift (const_int 1)
825 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
827 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
828 [(set_attr "type" "vmult")
829 (set_attr "length" "16")
830 (set_attr "exec" "none")
831 (set_attr "laneselect" "yes")])
833 (define_expand "vec_set<mode>"
834 [(set (match_operand:V_MOV 0 "register_operand")
837 (match_operand:<SCALAR_MODE> 1 "register_operand"))
839 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
842 (define_insn "*vec_set<mode>_1"
843 [(set (match_operand:V_1REG 0 "register_operand" "=v")
845 (vec_duplicate:V_1REG
846 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
847 (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" "U0")
848 (match_operand:SI 2 "const_int_operand" " i")))]
849 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
851 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
852 return "v_writelane_b32 %0, %1, %2";
854 [(set_attr "type" "vop3a")
855 (set_attr "length" "8")
856 (set_attr "exec" "none")
857 (set_attr "laneselect" "yes")])
859 (define_insn "*vec_set<mode>_1"
860 [(set (match_operand:V_2REG 0 "register_operand" "=v")
862 (vec_duplicate:V_2REG
863 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv"))
864 (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" "U0")
865 (match_operand:SI 2 "const_int_operand" " i")))]
866 "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
868 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
869 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
871 [(set_attr "type" "vmult")
872 (set_attr "length" "16")
873 (set_attr "exec" "none")
874 (set_attr "laneselect" "yes")])
876 (define_insn "vec_duplicate<mode><exec>"
877 [(set (match_operand:V_1REG 0 "register_operand" "=v")
878 (vec_duplicate:V_1REG
879 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
882 [(set_attr "type" "vop3a")
883 (set_attr "length" "8")])
885 (define_insn "vec_duplicate<mode><exec>"
886 [(set (match_operand:V_2REG 0 "register_operand" "= v")
887 (vec_duplicate:V_2REG
888 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
890 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
891 [(set_attr "type" "vop3a")
892 (set_attr "length" "16")])
894 (define_insn "vec_duplicate<mode><exec>"
895 [(set (match_operand:V_4REG 0 "register_operand" "= v")
896 (vec_duplicate:V_4REG
897 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
899 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1"
900 [(set_attr "type" "mult")
901 (set_attr "length" "32")])
903 (define_insn "vec_extract<mode><scalar_mode>"
904 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg")
905 (vec_select:<SCALAR_MODE>
906 (match_operand:V_1REG 1 "register_operand" " v")
907 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
909 "v_readlane_b32 %0, %1, %2"
910 [(set_attr "type" "vop3a")
911 (set_attr "length" "8")
912 (set_attr "exec" "none")
913 (set_attr "laneselect" "yes")])
915 (define_insn "vec_extract<mode><scalar_mode>"
916 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
917 (vec_select:<SCALAR_MODE>
918 (match_operand:V_2REG 1 "register_operand" " v")
919 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
921 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
922 [(set_attr "type" "vmult")
923 (set_attr "length" "16")
924 (set_attr "exec" "none")
925 (set_attr "laneselect" "yes")])
927 (define_insn "vec_extract<mode><scalar_mode>"
928 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=&Sg")
929 (vec_select:<SCALAR_MODE>
930 (match_operand:V_4REG 1 "register_operand" " v")
931 (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
933 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2\;v_readlane_b32 %J0, %J1, %2\;v_readlane_b32 %K0, %K1, %2"
934 [(set_attr "type" "vmult")
935 (set_attr "length" "32")
936 (set_attr "exec" "none")
937 (set_attr "laneselect" "yes")])
939 (define_insn "vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop"
940 [(set (match_operand:V_1REG_ALT 0 "register_operand" "=v,v")
941 (vec_select:V_1REG_ALT
942 (match_operand:V_1REG 1 "register_operand" " 0,v")
943 (match_operand 2 "ascending_zero_int_parallel" "")))]
944 "MODE_VF (<V_1REG_ALT:MODE>mode) < MODE_VF (<V_1REG:MODE>mode)
945 && <V_1REG_ALT:SCALAR_MODE>mode == <V_1REG:SCALAR_MODE>mode
946 /* This comment silences a warning for operands[2]. */"
948 ; in-place extract %0
950 [(set_attr "type" "vmult")
951 (set_attr "length" "0,8")])
953 (define_insn "vec_extract<V_2REG:mode><V_2REG_ALT:mode>_nop"
954 [(set (match_operand:V_2REG_ALT 0 "register_operand" "=v,v")
955 (vec_select:V_2REG_ALT
956 (match_operand:V_2REG 1 "register_operand" " 0,v")
957 (match_operand 2 "ascending_zero_int_parallel" "")))]
958 "MODE_VF (<V_2REG_ALT:MODE>mode) < MODE_VF (<V_2REG:MODE>mode)
959 && <V_2REG_ALT:SCALAR_MODE>mode == <V_2REG:SCALAR_MODE>mode
960 /* This comment silences a warning for operands[2]. */"
962 ; in-place extract %0
963 v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
964 [(set_attr "type" "vmult")
965 (set_attr "length" "0,8")])
967 (define_insn "vec_extract<V_4REG:mode><V_4REG_ALT:mode>_nop"
968 [(set (match_operand:V_4REG_ALT 0 "register_operand" "=v,v")
969 (vec_select:V_4REG_ALT
970 (match_operand:V_4REG 1 "register_operand" " 0,v")
971 (match_operand 2 "ascending_zero_int_parallel" "")))]
972 "MODE_VF (<V_4REG_ALT:MODE>mode) < MODE_VF (<V_4REG:MODE>mode)
973 && <V_4REG_ALT:SCALAR_MODE>mode == <V_4REG:SCALAR_MODE>mode"
975 ; in-place extract %0
976 v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1"
977 [(set_attr "type" "vmult")
978 (set_attr "length" "0,16")])
980 (define_expand "vec_extract<V_MOV:mode><V_MOV_ALT:mode>"
981 [(match_operand:V_MOV_ALT 0 "register_operand")
982 (match_operand:V_MOV 1 "register_operand")
983 (match_operand 2 "immediate_operand")]
984 "MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode)
985 && <V_MOV_ALT:SCALAR_MODE>mode == <V_MOV:SCALAR_MODE>mode
986 && (!TARGET_WAVE64_COMPAT || MODE_VF (<V_MOV:MODE>mode) <= 32)"
988 int numlanes = GET_MODE_NUNITS (<V_MOV_ALT:MODE>mode);
989 int firstlane = INTVAL (operands[2]) * numlanes;
994 rtx parallel = gen_rtx_PARALLEL (<V_MOV:MODE>mode,
995 rtvec_alloc (numlanes));
996 for (int i = 0; i < numlanes; i++)
997 XVECEXP (parallel, 0, i) = GEN_INT (i);
998 emit_insn (gen_vec_extract<V_MOV:mode><V_MOV_ALT:mode>_nop
999 (operands[0], operands[1], parallel));
1001 /* FIXME: optimize this by using DPP where available. */
1003 rtx permutation = gen_reg_rtx (<V_MOV:VnSI>mode);
1004 emit_insn (gen_vec_series<V_MOV:vnsi> (permutation,
1005 GEN_INT (firstlane*4),
1008 tmp = gen_reg_rtx (<V_MOV:MODE>mode);
1009 emit_insn (gen_ds_bpermute<V_MOV:mode> (tmp, permutation, operands[1],
1010 get_exec (<V_MOV:MODE>mode)));
1012 emit_move_insn (operands[0],
1013 gen_rtx_SUBREG (<V_MOV_ALT:MODE>mode, tmp, 0));
1018 (define_expand "extract_last_<mode>"
1019 [(match_operand:<SCALAR_MODE> 0 "register_operand")
1020 (match_operand:DI 1 "gcn_alu_operand")
1021 (match_operand:V_MOV 2 "register_operand")]
1022 "can_create_pseudo_p ()"
1024 rtx dst = operands[0];
1025 rtx mask = operands[1];
1026 rtx vect = operands[2];
1027 rtx tmpreg = gen_reg_rtx (SImode);
1029 emit_insn (gen_clzdi2 (tmpreg, mask));
1030 emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
1031 emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg));
1035 (define_expand "fold_extract_last_<mode>"
1036 [(match_operand:<SCALAR_MODE> 0 "register_operand")
1037 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
1038 (match_operand:DI 2 "gcn_alu_operand")
1039 (match_operand:V_MOV 3 "register_operand")]
1040 "can_create_pseudo_p ()"
1042 rtx dst = operands[0];
1043 rtx default_value = operands[1];
1044 rtx mask = operands[2];
1045 rtx vect = operands[3];
1046 rtx else_label = gen_label_rtx ();
1047 rtx end_label = gen_label_rtx ();
1049 rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
1050 emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
1051 emit_insn (gen_extract_last_<mode> (dst, mask, vect));
1052 emit_jump_insn (gen_jump (end_label));
1054 emit_label (else_label);
1055 emit_move_insn (dst, default_value);
1056 emit_label (end_label);
1060 (define_expand "vec_init<mode><scalar_mode>"
1061 [(match_operand:V_MOV 0 "register_operand")
1065 gcn_expand_vector_init (operands[0], operands[1]);
1069 (define_expand "vec_init<V_MOV:mode><V_MOV_ALT:mode>"
1070 [(match_operand:V_MOV 0 "register_operand")
1071 (match_operand:V_MOV_ALT 1)]
1072 "<V_MOV:SCALAR_MODE>mode == <V_MOV_ALT:SCALAR_MODE>mode
1073 && MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode)"
1075 gcn_expand_vector_init (operands[0], operands[1]);
1080 ;; {{{ Scatter / Gather
1082 ;; GCN does not have an instruction for loading a vector from contiguous
1083 ;; memory so *all* loads and stores are eventually converted to scatter
1086 ;; GCC does not permit MEM to hold vectors of addresses, so we must use an
1087 ;; unspec. The unspec formats are as follows:
1090 ;; [(<address expression>)
1093 ;; (mem:BLK (scratch))]
1097 ;; [(<address expression>)
1098 ;; (<source register>)
1104 ;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
1105 ;; - The mem:BLK does not contain any real information, but indicates that an
1106 ;; unknown memory read is taking place. Stores are expected to use a similar
1107 ;; mem:BLK outside the unspec.
1108 ;; - The address space and glc (volatile) fields are there to replace the
1109 ;; fields normally found in a MEM.
1110 ;; - Multiple forms of address expression are supported, below.
1112 ;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on
1114 (define_expand "gather_load<mode><vnsi>"
1115 [(match_operand:V_MOV 0 "register_operand")
1116 (match_operand:DI 1 "register_operand")
1117 (match_operand:<VnSI> 2 "register_operand")
1118 (match_operand 3 "immediate_operand")
1119 (match_operand:SI 4 "gcn_alu_operand")]
1122 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
1123 operands[2], operands[4],
1124 INTVAL (operands[3]), NULL);
1126 if (GET_MODE (addr) == <VnDI>mode)
1127 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
1128 const0_rtx, const0_rtx));
1130 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
1131 addr, const0_rtx, const0_rtx,
1136 ; Allow any address expression
1137 (define_expand "gather<mode>_expr<exec>"
1138 [(set (match_operand:V_MOV 0 "register_operand")
1140 [(match_operand 1 "")
1141 (match_operand 2 "immediate_operand")
1142 (match_operand 3 "immediate_operand")
1143 (mem:BLK (scratch))]
1148 (define_insn "gather<mode>_insn_1offset<exec>"
1149 [(set (match_operand:V_MOV 0 "register_operand" "=v,a,&v,&a")
1151 [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v,v, v, v")
1152 (vec_duplicate:<VnDI>
1153 (match_operand 2 "immediate_operand" " n,n, n, n")))
1154 (match_operand 3 "immediate_operand" " n,n, n, n")
1155 (match_operand 4 "immediate_operand" " n,n, n, n")
1156 (mem:BLK (scratch))]
1158 "(AS_FLAT_P (INTVAL (operands[3]))
1159 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000))
1160 || (AS_GLOBAL_P (INTVAL (operands[3]))
1161 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
1163 addr_space_t as = INTVAL (operands[3]);
1164 const char *glc = INTVAL (operands[4]) ? " glc" : "";
1166 static char buf[200];
1168 sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0", glc);
1169 else if (AS_GLOBAL_P (as))
1170 sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;"
1171 "s_waitcnt\tvmcnt(0)", glc);
1177 [(set_attr "type" "flat")
1178 (set_attr "length" "12")
1179 (set_attr "cdna" "*,cdna2,*,cdna2")
1180 (set_attr "xnack" "off,off,on,on")])
1182 (define_insn "gather<mode>_insn_1offset_ds<exec>"
1183 [(set (match_operand:V_MOV 0 "register_operand" "=v,a")
1185 [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v,v")
1186 (vec_duplicate:<VnSI>
1187 (match_operand 2 "immediate_operand" " n,n")))
1188 (match_operand 3 "immediate_operand" " n,n")
1189 (match_operand 4 "immediate_operand" " n,n")
1190 (mem:BLK (scratch))]
1192 "(AS_ANY_DS_P (INTVAL (operands[3]))
1193 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
1195 addr_space_t as = INTVAL (operands[3]);
1196 static char buf[200];
1197 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
1198 (AS_GDS_P (as) ? " gds" : ""));
1201 [(set_attr "type" "ds")
1202 (set_attr "length" "12")
1203 (set_attr "cdna" "*,cdna2")])
1205 (define_insn "gather<mode>_insn_2offsets<exec>"
1206 [(set (match_operand:V_MOV 0 "register_operand" "=v,a,&v,&a")
1210 (vec_duplicate:<VnDI>
1211 (match_operand:DI 1 "register_operand" "Sv,Sv,Sv,Sv"))
1213 (match_operand:<VnSI> 2 "register_operand" " v, v, v, v")))
1214 (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand"
1216 (match_operand 4 "immediate_operand" " n, n, n, n")
1217 (match_operand 5 "immediate_operand" " n, n, n, n")
1218 (mem:BLK (scratch))]
1220 "(AS_GLOBAL_P (INTVAL (operands[4]))
1221 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
1223 addr_space_t as = INTVAL (operands[4]);
1224 const char *glc = INTVAL (operands[5]) ? " glc" : "";
1226 static char buf[200];
1227 if (AS_GLOBAL_P (as))
1228 sprintf (buf, "global_load%%o0\t%%0, %%2, %%1 offset:%%3%s\;"
1229 "s_waitcnt\tvmcnt(0)", glc);
1235 [(set_attr "type" "flat")
1236 (set_attr "length" "12")
1237 (set_attr "cdna" "*,cdna2,*,cdna2")
1238 (set_attr "xnack" "off,off,on,on")])
1240 (define_expand "scatter_store<mode><vnsi>"
1241 [(match_operand:DI 0 "register_operand")
1242 (match_operand:<VnSI> 1 "register_operand")
1243 (match_operand 2 "immediate_operand")
1244 (match_operand:SI 3 "gcn_alu_operand")
1245 (match_operand:V_MOV 4 "register_operand")]
1248 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
1249 operands[1], operands[3],
1250 INTVAL (operands[2]), NULL);
1252 if (GET_MODE (addr) == <VnDI>mode)
1253 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
1254 const0_rtx, const0_rtx));
1256 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
1257 const0_rtx, operands[4],
1258 const0_rtx, const0_rtx));
1262 ; Allow any address expression
1263 (define_expand "scatter<mode>_expr<exec_scatter>"
1264 [(set (mem:BLK (scratch))
1266 [(match_operand:<VnDI> 0 "")
1267 (match_operand:V_MOV 1 "register_operand")
1268 (match_operand 2 "immediate_operand")
1269 (match_operand 3 "immediate_operand")]
1274 (define_insn "scatter<mode>_insn_1offset<exec_scatter>"
1275 [(set (mem:BLK (scratch))
1277 [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v,v")
1278 (vec_duplicate:<VnDI>
1279 (match_operand 1 "immediate_operand" "n,n")))
1280 (match_operand:V_MOV 2 "register_operand" "v,a")
1281 (match_operand 3 "immediate_operand" "n,n")
1282 (match_operand 4 "immediate_operand" "n,n")]
1284 "(AS_FLAT_P (INTVAL (operands[3]))
1285 && (INTVAL(operands[1]) == 0
1286 || ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
1287 || (AS_GLOBAL_P (INTVAL (operands[3]))
1288 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
1290 addr_space_t as = INTVAL (operands[3]);
1291 const char *glc = INTVAL (operands[4]) ? " glc" : "";
1293 static char buf[200];
1295 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
1296 else if (AS_GLOBAL_P (as))
1297 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
1303 [(set_attr "type" "flat")
1304 (set_attr "length" "12")
1305 (set_attr "cdna" "*,cdna2")])
1307 (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
1308 [(set (mem:BLK (scratch))
1310 [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v,v")
1311 (vec_duplicate:<VnSI>
1312 (match_operand 1 "immediate_operand" "n,n")))
1313 (match_operand:V_MOV 2 "register_operand" "v,a")
1314 (match_operand 3 "immediate_operand" "n,n")
1315 (match_operand 4 "immediate_operand" "n,n")]
1317 "(AS_ANY_DS_P (INTVAL (operands[3]))
1318 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
1320 addr_space_t as = INTVAL (operands[3]);
1321 static char buf[200];
1322 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)",
1323 (AS_GDS_P (as) ? " gds" : ""));
1326 [(set_attr "type" "ds")
1327 (set_attr "length" "12")
1328 (set_attr "cdna" "*,cdna2")])
1330 (define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
1331 [(set (mem:BLK (scratch))
1335 (vec_duplicate:<VnDI>
1336 (match_operand:DI 0 "register_operand" "Sv,Sv"))
1338 (match_operand:<VnSI> 1 "register_operand" "v,v")))
1339 (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" "n,n")))
1340 (match_operand:V_MOV 3 "register_operand" "v,a")
1341 (match_operand 4 "immediate_operand" "n,n")
1342 (match_operand 5 "immediate_operand" "n,n")]
1344 "(AS_GLOBAL_P (INTVAL (operands[4]))
1345 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
1347 addr_space_t as = INTVAL (operands[4]);
1348 const char *glc = INTVAL (operands[5]) ? " glc" : "";
1350 static char buf[200];
1351 if (AS_GLOBAL_P (as))
1352 sprintf (buf, "global_store%%s3\t%%1, %%3, %%0 offset:%%2%s", glc);
1358 [(set_attr "type" "flat")
1359 (set_attr "length" "12")
1360 (set_attr "cdna" "*,cdna2")])
1365 (define_insn "ds_bpermute<mode>"
1366 [(set (match_operand:V_1REG 0 "register_operand" "=v")
1368 [(match_operand:V_1REG 2 "register_operand" " v")
1369 (match_operand:<VnSI> 1 "register_operand" " v")
1370 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
1373 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
1374 [(set_attr "type" "vop2")
1375 (set_attr "length" "12")])
1377 (define_insn_and_split "ds_bpermute<mode>"
1378 [(set (match_operand:V_2REG 0 "register_operand" "=&v")
1380 [(match_operand:V_2REG 2 "register_operand" " v0")
1381 (match_operand:<VnSI> 1 "register_operand" " v")
1382 (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
1387 [(set (match_dup 4) (unspec:<VnSI>
1388 [(match_dup 6) (match_dup 1) (match_dup 3)]
1390 (set (match_dup 5) (unspec:<VnSI>
1391 [(match_dup 7) (match_dup 1) (match_dup 3)]
1394 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
1395 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
1396 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
1397 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
1399 [(set_attr "type" "vmult")
1400 (set_attr "length" "24")])
1402 (define_insn "@dpp_move<mode>"
1403 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1405 [(match_operand:V_noHI 1 "register_operand" " v")
1406 (match_operand:SI 2 "const_int_operand" " n")]
1407 UNSPEC_MOV_DPP_SHR))]
1410 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32",
1411 UNSPEC_MOV_DPP_SHR, INTVAL (operands[2]));
1413 [(set_attr "type" "vop_dpp")
1414 (set_attr "length" "16")])
1416 (define_insn "@dpp_swap_pairs<mode>"
1417 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1419 [(match_operand:V_noHI 1 "register_operand" " v")]
1420 UNSPEC_MOV_DPP_SWAP_PAIRS))]
1423 return gcn_expand_dpp_swap_pairs_insn (<MODE>mode, "v_mov_b32",
1424 UNSPEC_MOV_DPP_SWAP_PAIRS);
1426 [(set_attr "type" "vop_dpp")
1427 (set_attr "length" "16")])
1429 (define_insn "@dpp_distribute_even<mode>"
1430 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1432 [(match_operand:V_noHI 1 "register_operand" " v")]
1433 UNSPEC_MOV_DPP_DISTRIBUTE_EVEN))]
1436 return gcn_expand_dpp_distribute_even_insn (<MODE>mode, "v_mov_b32",
1437 UNSPEC_MOV_DPP_DISTRIBUTE_EVEN);
1439 [(set_attr "type" "vop_dpp")
1440 (set_attr "length" "16")])
1442 (define_insn "@dpp_distribute_odd<mode>"
1443 [(set (match_operand:V_noHI 0 "register_operand" "=v")
1445 [(match_operand:V_noHI 1 "register_operand" " v")]
1446 UNSPEC_MOV_DPP_DISTRIBUTE_EVEN))]
1449 return gcn_expand_dpp_distribute_odd_insn (<MODE>mode, "v_mov_b32",
1450 UNSPEC_MOV_DPP_DISTRIBUTE_ODD);
1452 [(set_attr "type" "vop_dpp")
1453 (set_attr "length" "16")])
1456 ;; {{{ ALU special case: add/sub
1458 (define_insn "add<mode>3<exec_clobber>"
1459 [(set (match_operand:V_INT_1REG 0 "register_operand")
1461 (match_operand:V_INT_1REG 1 "register_operand")
1462 (match_operand:V_INT_1REG 2 "gcn_alu_operand")))
1463 (clobber (reg:DI VCC_REG))]
1465 {@ [cons: =0, %1, 2; attrs: type, length]
1466 [v,v,vSvA;vop2,4] v_add_co_u32\t%0, vcc, %2, %1
1470 (define_insn "add<mode>3_dup<exec_clobber>"
1471 [(set (match_operand:V_INT_1REG 0 "register_operand")
1473 (vec_duplicate:V_INT_1REG
1474 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"))
1475 (match_operand:V_INT_1REG 1 "register_operand")))
1476 (clobber (reg:DI VCC_REG))]
1478 {@ [cons: =0, 1, 2; attrs: type, length]
1479 [v,v,SvA;vop2,4] v_add_co_u32\t%0, vcc, %2, %1
1483 (define_insn "add<mode>3_vcc<exec_vcc>"
1484 [(set (match_operand:V_SI 0 "register_operand")
1486 (match_operand:V_SI 1 "register_operand")
1487 (match_operand:V_SI 2 "gcn_alu_operand")))
1488 (set (match_operand:DI 3 "register_operand")
1489 (ltu:DI (plus:V_SI (match_dup 1) (match_dup 2))
1492 {@ [cons: =0, %1, 2, =3; attrs: type, length]
1493 [v,v,vSvA,cV;vop2 ,4] v_add_co_u32\t%0, %3, %2, %1
1494 [v,v,vSvB,cV;vop2 ,8] ^
1495 [v,v,vSvA,Sg;vop3b,8] ^
1498 ; This pattern only changes the VCC bits when the corresponding lane is
1499 ; enabled, so the set must be described as an ior.
1501 (define_insn "add<mode>3_vcc_dup<exec_vcc>"
1502 [(set (match_operand:V_SI 0 "register_operand")
1505 (match_operand:SI 1 "gcn_alu_operand"))
1506 (match_operand:V_SI 2 "register_operand")))
1507 (set (match_operand:DI 3 "register_operand")
1508 (ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2))
1510 (vec_duplicate:V_SI (match_dup 2))))]
1512 {@ [cons: =0, 1, 2, =3; attrs: type, length]
1513 [v,SvA,v,cV;vop2 ,4] v_add_co_u32\t%0, %3, %1, %2
1514 [v,SvB,v,cV;vop2 ,8] ^
1515 [v,SvA,v,Sg;vop3b,8] ^
1518 ; v_addc does not accept an SGPR because the VCC read already counts as an
1519 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1520 ; accept "B" immediate constants due to a related bus conflict.
1522 (define_insn "addc<mode>3<exec_vcc>"
1523 [(set (match_operand:V_SI 0 "register_operand" "=v, v")
1527 (vec_duplicate:V_SI (const_int 1))
1528 (vec_duplicate:V_SI (const_int 0))
1529 (match_operand:DI 3 "register_operand" " cV,cVSv"))
1530 (match_operand:V_SI 1 "gcn_alu_operand" "% v, vA"))
1531 (match_operand:V_SI 2 "gcn_alu_operand" " vA, vA")))
1532 (set (match_operand:DI 4 "register_operand" "=cV,cVSg")
1533 (ior:DI (ltu:DI (plus:V_SI
1536 (vec_duplicate:V_SI (const_int 1))
1537 (vec_duplicate:V_SI (const_int 0))
1544 (vec_duplicate:V_SI (const_int 1))
1545 (vec_duplicate:V_SI (const_int 0))
1550 "{v_addc_co_u32|v_add_co_ci_u32}\t%0, %4, %2, %1, %3"
1551 [(set_attr "type" "vop2,vop3b")
1552 (set_attr "length" "4,8")])
1554 (define_insn "sub<mode>3<exec_clobber>"
1555 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v, v")
1557 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "vSvB, v")
1558 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " v,vSvB")))
1559 (clobber (reg:DI VCC_REG))]
1562 v_sub_co_u32\t%0, vcc, %1, %2
1563 v_subrev_co_u32\t%0, vcc, %2, %1"
1564 [(set_attr "type" "vop2")
1565 (set_attr "length" "8,8")])
1567 (define_insn "sub<mode>3_vcc<exec_vcc>"
1568 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1570 (match_operand:V_SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
1571 (match_operand:V_SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
1572 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
1573 (gtu:DI (minus:V_SI (match_dup 1) (match_dup 2))
1577 v_sub_co_u32\t%0, %3, %1, %2
1578 v_sub_co_u32\t%0, %3, %1, %2
1579 v_subrev_co_u32\t%0, %3, %2, %1
1580 v_subrev_co_u32\t%0, %3, %2, %1"
1581 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1582 (set_attr "length" "8")])
1584 ; v_subb does not accept an SGPR because the VCC read already counts as an
1585 ; SGPR use and the number of SGPR operands is limited to 1. It does not
1586 ; accept "B" immediate constants due to a related bus conflict.
1588 (define_insn "subc<mode>3<exec_vcc>"
1589 [(set (match_operand:V_SI 0 "register_operand" "= v, v, v, v")
1593 (vec_duplicate:V_SI (const_int 1))
1594 (vec_duplicate:V_SI (const_int 0))
1595 (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv"))
1596 (match_operand:V_SI 1 "gcn_alu_operand" " vA, vA, v, vA"))
1597 (match_operand:V_SI 2 "gcn_alu_operand" " v, vA,vA, vA")))
1598 (set (match_operand:DI 4 "register_operand" "=cV,cVSg,cV,cVSg")
1599 (ior:DI (gtu:DI (minus:V_SI (minus:V_SI
1601 (vec_duplicate:V_SI (const_int 1))
1602 (vec_duplicate:V_SI (const_int 0))
1607 (ltu:DI (minus:V_SI (vec_merge:V_SI
1608 (vec_duplicate:V_SI (const_int 1))
1609 (vec_duplicate:V_SI (const_int 0))
1615 {v_subb_co_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3
1616 {v_subb_co_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3
1617 {v_subbrev_co_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3
1618 {v_subbrev_co_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3"
1619 [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1620 (set_attr "length" "4,8,4,8")])
1622 (define_insn_and_split "add<mode>3"
1623 [(set (match_operand:V_DI 0 "register_operand" "= v")
1625 (match_operand:V_DI 1 "register_operand" "%vDb")
1626 (match_operand:V_DI 2 "gcn_alu_operand" " vDb")))
1627 (clobber (reg:DI VCC_REG))]
1630 "gcn_can_split_p (<MODE>mode, operands[0])
1631 && gcn_can_split_p (<MODE>mode, operands[1])
1632 && gcn_can_split_p (<MODE>mode, operands[2])"
1635 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1636 emit_insn (gen_add<vnsi>3_vcc
1637 (gcn_operand_part (<MODE>mode, operands[0], 0),
1638 gcn_operand_part (<MODE>mode, operands[1], 0),
1639 gcn_operand_part (<MODE>mode, operands[2], 0),
1641 emit_insn (gen_addc<vnsi>3
1642 (gcn_operand_part (<MODE>mode, operands[0], 1),
1643 gcn_operand_part (<MODE>mode, operands[1], 1),
1644 gcn_operand_part (<MODE>mode, operands[2], 1),
1648 [(set_attr "type" "vmult")
1649 (set_attr "length" "8")])
1651 (define_insn_and_split "add<mode>3_exec"
1652 [(set (match_operand:V_DI 0 "register_operand" "= v")
1655 (match_operand:V_DI 1 "register_operand" "%vDb")
1656 (match_operand:V_DI 2 "gcn_alu_operand" " vDb"))
1657 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1658 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
1659 (clobber (reg:DI VCC_REG))]
1662 "gcn_can_split_p (<MODE>mode, operands[0])
1663 && gcn_can_split_p (<MODE>mode, operands[1])
1664 && gcn_can_split_p (<MODE>mode, operands[2])
1665 && gcn_can_split_p (<MODE>mode, operands[4])"
1668 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1669 emit_insn (gen_add<vnsi>3_vcc_exec
1670 (gcn_operand_part (<MODE>mode, operands[0], 0),
1671 gcn_operand_part (<MODE>mode, operands[1], 0),
1672 gcn_operand_part (<MODE>mode, operands[2], 0),
1674 gcn_operand_part (<MODE>mode, operands[3], 0),
1676 emit_insn (gen_addc<vnsi>3_exec
1677 (gcn_operand_part (<MODE>mode, operands[0], 1),
1678 gcn_operand_part (<MODE>mode, operands[1], 1),
1679 gcn_operand_part (<MODE>mode, operands[2], 1),
1681 gcn_operand_part (<MODE>mode, operands[3], 1),
1685 [(set_attr "type" "vmult")
1686 (set_attr "length" "8")])
1688 (define_insn_and_split "sub<mode>3"
1689 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1691 (match_operand:V_DI 1 "gcn_alu_operand" "vDb, v")
1692 (match_operand:V_DI 2 "gcn_alu_operand" " v,vDb")))
1693 (clobber (reg:DI VCC_REG))]
1696 "gcn_can_split_p (<MODE>mode, operands[0])
1697 && gcn_can_split_p (<MODE>mode, operands[1])
1698 && gcn_can_split_p (<MODE>mode, operands[2])"
1701 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1702 emit_insn (gen_sub<vnsi>3_vcc
1703 (gcn_operand_part (<MODE>mode, operands[0], 0),
1704 gcn_operand_part (<MODE>mode, operands[1], 0),
1705 gcn_operand_part (<MODE>mode, operands[2], 0),
1707 emit_insn (gen_subc<vnsi>3
1708 (gcn_operand_part (<MODE>mode, operands[0], 1),
1709 gcn_operand_part (<MODE>mode, operands[1], 1),
1710 gcn_operand_part (<MODE>mode, operands[2], 1),
1714 [(set_attr "type" "vmult")
1715 (set_attr "length" "8")])
1717 (define_insn_and_split "sub<mode>3_exec"
1718 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1721 (match_operand:V_DI 1 "gcn_alu_operand" "vSvB, v")
1722 (match_operand:V_DI 2 "gcn_alu_operand" " v,vSvB"))
1723 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1724 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1725 (clobber (reg:DI VCC_REG))]
1726 "register_operand (operands[1], VOIDmode)
1727 || register_operand (operands[2], VOIDmode)"
1729 "gcn_can_split_p (<MODE>mode, operands[0])
1730 && gcn_can_split_p (<MODE>mode, operands[1])
1731 && gcn_can_split_p (<MODE>mode, operands[2])
1732 && gcn_can_split_p (<MODE>mode, operands[3])"
1735 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1736 emit_insn (gen_sub<vnsi>3_vcc_exec
1737 (gcn_operand_part (<MODE>mode, operands[0], 0),
1738 gcn_operand_part (<MODE>mode, operands[1], 0),
1739 gcn_operand_part (<MODE>mode, operands[2], 0),
1741 gcn_operand_part (<MODE>mode, operands[3], 0),
1743 emit_insn (gen_subc<vnsi>3_exec
1744 (gcn_operand_part (<MODE>mode, operands[0], 1),
1745 gcn_operand_part (<MODE>mode, operands[1], 1),
1746 gcn_operand_part (<MODE>mode, operands[2], 1),
1748 gcn_operand_part (<MODE>mode, operands[3], 1),
1752 [(set_attr "type" "vmult")
1753 (set_attr "length" "8")])
1755 (define_insn_and_split "add<mode>3_zext"
1756 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1759 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1760 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA")))
1761 (clobber (reg:DI VCC_REG))]
1764 "gcn_can_split_p (<MODE>mode, operands[0])
1765 && gcn_can_split_p (<MODE>mode, operands[2])"
1768 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1769 emit_insn (gen_add<vnsi>3_vcc
1770 (gcn_operand_part (<MODE>mode, operands[0], 0),
1772 gcn_operand_part (<MODE>mode, operands[2], 0),
1774 emit_insn (gen_addc<vnsi>3
1775 (gcn_operand_part (<MODE>mode, operands[0], 1),
1776 gcn_operand_part (<MODE>mode, operands[2], 1),
1777 const0_rtx, vcc, vcc));
1780 [(set_attr "type" "vmult")
1781 (set_attr "length" "8")])
1783 (define_insn_and_split "add<mode>3_zext_exec"
1784 [(set (match_operand:V_DI 0 "register_operand" "= v, v")
1788 (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1789 (match_operand:V_DI 2 "gcn_alu_operand" "vDb,vDA"))
1790 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1791 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
1792 (clobber (reg:DI VCC_REG))]
1795 "gcn_can_split_p (<MODE>mode, operands[0])
1796 && gcn_can_split_p (<MODE>mode, operands[2])
1797 && gcn_can_split_p (<MODE>mode, operands[3])"
1800 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1801 emit_insn (gen_add<vnsi>3_vcc_exec
1802 (gcn_operand_part (<MODE>mode, operands[0], 0),
1804 gcn_operand_part (<MODE>mode, operands[2], 0),
1806 gcn_operand_part (<MODE>mode, operands[3], 0),
1808 emit_insn (gen_addc<vnsi>3_exec
1809 (gcn_operand_part (<MODE>mode, operands[0], 1),
1810 gcn_operand_part (<MODE>mode, operands[2], 1),
1811 const0_rtx, vcc, vcc,
1812 gcn_operand_part (<MODE>mode, operands[3], 1),
1816 [(set_attr "type" "vmult")
1817 (set_attr "length" "8")])
1819 (define_insn_and_split "add<mode>3_vcc_zext_dup"
1820 [(set (match_operand:V_DI 0 "register_operand")
1823 (vec_duplicate:<VnSI>
1824 (match_operand:SI 1 "gcn_alu_operand")))
1825 (match_operand:V_DI 2 "gcn_alu_operand")))
1826 (set (match_operand:DI 3 "register_operand")
1828 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1832 {@ [cons: =0, 1, 2, =3]
1836 "gcn_can_split_p (<MODE>mode, operands[0])
1837 && gcn_can_split_p (<MODE>mode, operands[2])"
1840 emit_insn (gen_add<vnsi>3_vcc_dup
1841 (gcn_operand_part (<MODE>mode, operands[0], 0),
1842 gcn_operand_part (DImode, operands[1], 0),
1843 gcn_operand_part (<MODE>mode, operands[2], 0),
1845 emit_insn (gen_addc<vnsi>3
1846 (gcn_operand_part (<MODE>mode, operands[0], 1),
1847 gcn_operand_part (<MODE>mode, operands[2], 1),
1848 const0_rtx, operands[3], operands[3]));
1851 [(set_attr "type" "vmult")
1852 (set_attr "length" "8")])
1854 (define_expand "add<mode>3_zext_dup"
1855 [(match_operand:V_DI 0 "register_operand")
1856 (match_operand:SI 1 "gcn_alu_operand")
1857 (match_operand:V_DI 2 "gcn_alu_operand")]
1860 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1861 emit_insn (gen_add<mode>3_vcc_zext_dup (operands[0], operands[1],
1866 (define_insn_and_split "add<mode>3_vcc_zext_dup_exec"
1867 [(set (match_operand:V_DI 0 "register_operand")
1871 (vec_duplicate:<VnSI>
1872 (match_operand:SI 1 "gcn_alu_operand")))
1873 (match_operand:V_DI 2 "gcn_alu_operand"))
1874 (match_operand:V_DI 4 "gcn_register_or_unspec_operand")
1875 (match_operand:DI 5 "gcn_exec_reg_operand")))
1876 (set (match_operand:DI 3 "register_operand")
1879 (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1884 {@ [cons: =0, 1, 2, =3, 4, 5]
1885 [v,ASv,v,&Sg,U0,e] #
1886 [v,BSv,v,&cV,U0,e] ^
1888 "gcn_can_split_p (<MODE>mode, operands[0])
1889 && gcn_can_split_p (<MODE>mode, operands[2])
1890 && gcn_can_split_p (<MODE>mode, operands[4])"
1893 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1894 (gcn_operand_part (<MODE>mode, operands[0], 0),
1895 gcn_operand_part (DImode, operands[1], 0),
1896 gcn_operand_part (<MODE>mode, operands[2], 0),
1898 gcn_operand_part (<MODE>mode, operands[4], 0),
1900 emit_insn (gen_addc<vnsi>3_exec
1901 (gcn_operand_part (<MODE>mode, operands[0], 1),
1902 gcn_operand_part (<MODE>mode, operands[2], 1),
1903 const0_rtx, operands[3], operands[3],
1904 gcn_operand_part (<MODE>mode, operands[4], 1),
1908 [(set_attr "type" "vmult")
1909 (set_attr "length" "8")])
1911 (define_expand "add<mode>3_zext_dup_exec"
1912 [(match_operand:V_DI 0 "register_operand")
1913 (match_operand:SI 1 "gcn_alu_operand")
1914 (match_operand:V_DI 2 "gcn_alu_operand")
1915 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
1916 (match_operand:DI 4 "gcn_exec_reg_operand")]
1919 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1920 emit_insn (gen_add<mode>3_vcc_zext_dup_exec (operands[0], operands[1],
1921 operands[2], vcc, operands[3],
1926 (define_insn_and_split "add<mode>3_vcc_zext_dup2"
1927 [(set (match_operand:V_DI 0 "register_operand")
1929 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand"))
1930 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand"))))
1931 (set (match_operand:DI 3 "register_operand")
1933 (zero_extend:V_DI (match_dup 1))
1934 (vec_duplicate:V_DI (match_dup 2)))
1937 {@ [cons: =0, 1, 2, =3]
1941 "gcn_can_split_p (<MODE>mode, operands[0])"
1944 emit_insn (gen_add<vnsi>3_vcc_dup
1945 (gcn_operand_part (<MODE>mode, operands[0], 0),
1946 gcn_operand_part (DImode, operands[2], 0),
1949 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1950 emit_insn (gen_vec_duplicate<vnsi>
1951 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
1952 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, const0_rtx, operands[3],
1956 [(set_attr "type" "vmult")
1957 (set_attr "length" "8")])
1959 (define_expand "add<mode>3_zext_dup2"
1960 [(match_operand:V_DI 0 "register_operand")
1961 (match_operand:<VnSI> 1 "gcn_alu_operand")
1962 (match_operand:DI 2 "gcn_alu_operand")]
1965 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1966 emit_insn (gen_add<mode>3_vcc_zext_dup2 (operands[0], operands[1],
1971 (define_insn_and_split "add<mode>3_vcc_zext_dup2_exec"
1972 [(set (match_operand:V_DI 0 "register_operand")
1975 (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand"))
1976 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand")))
1977 (match_operand:V_DI 4 "gcn_register_or_unspec_operand")
1978 (match_operand:DI 5 "gcn_exec_reg_operand")))
1979 (set (match_operand:DI 3 "register_operand")
1982 (zero_extend:V_DI (match_dup 1))
1983 (vec_duplicate:V_DI (match_dup 2)))
1987 {@ [cons: =0, 1, 2, =3, 4, 5]
1988 [v,v,ASv,&Sg,U0,e] #
1989 [v,v,BSv,&cV,U0,e] ^
1991 "gcn_can_split_p (<MODE>mode, operands[0])
1992 && gcn_can_split_p (<MODE>mode, operands[4])"
1995 emit_insn (gen_add<vnsi>3_vcc_dup_exec
1996 (gcn_operand_part (<MODE>mode, operands[0], 0),
1997 gcn_operand_part (DImode, operands[2], 0),
2000 gcn_operand_part (<MODE>mode, operands[4], 0),
2002 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
2003 emit_insn (gen_vec_duplicate<vnsi>_exec
2004 (dsthi, gcn_operand_part (DImode, operands[2], 1),
2005 gcn_operand_part (<MODE>mode, operands[4], 1),
2007 emit_insn (gen_addc<vnsi>3_exec
2008 (dsthi, dsthi, const0_rtx, operands[3], operands[3],
2009 gcn_operand_part (<MODE>mode, operands[4], 1),
2013 [(set_attr "type" "vmult")
2014 (set_attr "length" "8")])
2016 (define_expand "add<mode>3_zext_dup2_exec"
2017 [(match_operand:V_DI 0 "register_operand")
2018 (match_operand:<VnSI> 1 "gcn_alu_operand")
2019 (match_operand:DI 2 "gcn_alu_operand")
2020 (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
2021 (match_operand:DI 4 "gcn_exec_reg_operand")]
2024 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2025 emit_insn (gen_add<mode>3_vcc_zext_dup2_exec (operands[0], operands[1],
2027 operands[3], operands[4]));
2031 (define_insn_and_split "add<mode>3_sext_dup2"
2032 [(set (match_operand:V_DI 0 "register_operand" "= v")
2034 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
2035 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
2036 (clobber (match_scratch:<VnSI> 3 "=&v"))
2037 (clobber (reg:DI VCC_REG))]
2040 "gcn_can_split_p (<MODE>mode, operands[0])"
2043 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2044 emit_insn (gen_ashr<vnsi>3 (operands[3], operands[1], GEN_INT (31)));
2045 emit_insn (gen_add<vnsi>3_vcc_dup
2046 (gcn_operand_part (<MODE>mode, operands[0], 0),
2047 gcn_operand_part (DImode, operands[2], 0),
2050 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
2051 emit_insn (gen_vec_duplicate<vnsi>
2052 (dsthi, gcn_operand_part (DImode, operands[2], 1)));
2053 emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, operands[3], vcc, vcc));
2056 [(set_attr "type" "vmult")
2057 (set_attr "length" "8")])
2059 (define_insn_and_split "add<mode>3_sext_dup2_exec"
2060 [(set (match_operand:V_DI 0 "register_operand" "= v")
2063 (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
2064 (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
2065 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2066 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2067 (clobber (match_scratch:<VnSI> 5 "=&v"))
2068 (clobber (reg:DI VCC_REG))]
2071 "gcn_can_split_p (<MODE>mode, operands[0])
2072 && gcn_can_split_p (<MODE>mode, operands[3])"
2075 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2076 emit_insn (gen_ashr<vnsi>3_exec (operands[5], operands[1], GEN_INT (31),
2077 gcn_gen_undef (<VnSI>mode), operands[4]));
2078 emit_insn (gen_add<vnsi>3_vcc_dup_exec
2079 (gcn_operand_part (<MODE>mode, operands[0], 0),
2080 gcn_operand_part (DImode, operands[2], 0),
2083 gcn_operand_part (<MODE>mode, operands[3], 0),
2085 rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
2086 emit_insn (gen_vec_duplicate<vnsi>_exec
2087 (dsthi, gcn_operand_part (DImode, operands[2], 1),
2088 gcn_operand_part (<MODE>mode, operands[3], 1),
2090 emit_insn (gen_addc<vnsi>3_exec
2091 (dsthi, dsthi, operands[5], vcc, vcc,
2092 gcn_operand_part (<MODE>mode, operands[3], 1),
2096 [(set_attr "type" "vmult")
2097 (set_attr "length" "8")])
2100 ;; {{{ DS memory ALU: add/sub
2102 (define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
2103 (define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
2105 ;; FIXME: the vector patterns probably need RD expanded to a vector of
2106 ;; addresses. For now, the only way a vector can get into LDS is
2107 ;; if the user puts it there manually.
2109 ;; FIXME: the scalar patterns are probably fine in themselves, but need to be
2110 ;; checked to see if anything can ever use them.
2112 (define_insn "add<mode>3_ds<exec>"
2113 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
2115 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
2116 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
2117 "rtx_equal_p (operands[0], operands[1])"
2118 "ds_add%u0\t%A0, %2%O0"
2119 [(set_attr "type" "ds")
2120 (set_attr "length" "8")])
2122 (define_insn "add<mode>3_ds_scalar"
2123 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
2124 (plus:DS_ARITH_SCALAR_MODE
2125 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
2127 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
2128 "rtx_equal_p (operands[0], operands[1])"
2129 "ds_add%u0\t%A0, %2%O0"
2130 [(set_attr "type" "ds")
2131 (set_attr "length" "8")])
2133 (define_insn "sub<mode>3_ds<exec>"
2134 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
2135 (minus:DS_ARITH_MODE
2136 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
2137 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
2138 "rtx_equal_p (operands[0], operands[1])"
2139 "ds_sub%u0\t%A0, %2%O0"
2140 [(set_attr "type" "ds")
2141 (set_attr "length" "8")])
2143 (define_insn "sub<mode>3_ds_scalar"
2144 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
2145 (minus:DS_ARITH_SCALAR_MODE
2146 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
2148 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
2149 "rtx_equal_p (operands[0], operands[1])"
2150 "ds_sub%u0\t%A0, %2%O0"
2151 [(set_attr "type" "ds")
2152 (set_attr "length" "8")])
2154 (define_insn "subr<mode>3_ds<exec>"
2155 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
2156 (minus:DS_ARITH_MODE
2157 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
2158 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
2159 "rtx_equal_p (operands[0], operands[1])"
2160 "ds_rsub%u0\t%A0, %2%O0"
2161 [(set_attr "type" "ds")
2162 (set_attr "length" "8")])
2164 (define_insn "subr<mode>3_ds_scalar"
2165 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
2166 (minus:DS_ARITH_SCALAR_MODE
2167 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
2168 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
2170 "rtx_equal_p (operands[0], operands[1])"
2171 "ds_rsub%u0\t%A0, %2%O0"
2172 [(set_attr "type" "ds")
2173 (set_attr "length" "8")])
2176 ;; {{{ ALU special case: mult
2178 (define_insn "<su>mul<mode>3_highpart<exec>"
2179 [(set (match_operand:V_SI 0 "register_operand" "= v")
2184 (match_operand:V_SI 1 "gcn_alu_operand" " %v"))
2186 (match_operand:V_SI 2 "gcn_alu_operand" "vSvA")))
2189 "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
2190 [(set_attr "type" "vop3a")
2191 (set_attr "length" "8")])
2193 (define_insn "mul<mode>3<exec>"
2194 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
2196 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
2197 (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vSvA")))]
2199 "v_mul_lo_u32\t%0, %1, %2"
2200 [(set_attr "type" "vop3a")
2201 (set_attr "length" "8")])
2203 (define_insn "mul<mode>3_dup<exec>"
2204 [(set (match_operand:V_INT_1REG 0 "register_operand" "= v")
2206 (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
2207 (vec_duplicate:V_INT_1REG
2208 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" " SvA"))))]
2210 "v_mul_lo_u32\t%0, %1, %2"
2211 [(set_attr "type" "vop3a")
2212 (set_attr "length" "8")])
2214 (define_insn_and_split "mul<mode>3"
2215 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2217 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
2218 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
2219 (clobber (match_scratch:<VnSI> 3 "=&v"))]
2225 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2226 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2227 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
2228 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
2229 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2230 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2231 rtx tmp = operands[3];
2233 emit_insn (gen_mul<vnsi>3 (out_lo, left_lo, right_lo));
2234 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left_lo, right_lo));
2235 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_lo));
2236 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2237 emit_insn (gen_mul<vnsi>3 (tmp, left_lo, right_hi));
2238 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2239 emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_hi));
2240 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2244 (define_insn_and_split "mul<mode>3_exec"
2245 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2248 (match_operand:V_DI 1 "gcn_alu_operand" "% v")
2249 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
2250 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2251 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2252 (clobber (match_scratch:<VnSI> 5 "=&v"))]
2258 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2259 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2260 rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
2261 rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
2262 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2263 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2264 rtx exec = operands[4];
2265 rtx tmp = operands[5];
2268 if (GET_CODE (operands[3]) == UNSPEC)
2270 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
2274 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2275 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
2278 rtx undef = gcn_gen_undef (<VnSI>mode);
2280 emit_insn (gen_mul<vnsi>3_exec (out_lo, left_lo, right_lo, old_lo, exec));
2281 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left_lo, right_lo,
2283 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_lo, undef, exec));
2284 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2285 emit_insn (gen_mul<vnsi>3_exec (tmp, left_lo, right_hi, undef, exec));
2286 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2287 emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_hi, undef, exec));
2288 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2292 (define_insn_and_split "mul<mode>3_zext"
2293 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2296 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2297 (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
2298 (clobber (match_scratch:<VnSI> 3 "=&v"))]
2304 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2305 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2306 rtx left = operands[1];
2307 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2308 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2309 rtx tmp = operands[3];
2311 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
2312 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
2313 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
2314 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2318 (define_insn_and_split "mul<mode>3_zext_exec"
2319 [(set (match_operand:V_DI 0 "register_operand" "=&v")
2323 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2324 (match_operand:V_DI 2 "gcn_alu_operand" "vDA"))
2325 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2326 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2327 (clobber (match_scratch:<VnSI> 5 "=&v"))]
2333 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2334 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2335 rtx left = operands[1];
2336 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2337 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2338 rtx exec = operands[4];
2339 rtx tmp = operands[5];
2342 if (GET_CODE (operands[3]) == UNSPEC)
2344 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
2348 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2349 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
2352 rtx undef = gcn_gen_undef (<VnSI>mode);
2354 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
2355 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
2357 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
2358 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2362 (define_insn_and_split "mul<mode>3_zext_dup2"
2363 [(set (match_operand:V_DI 0 "register_operand" "= &v")
2366 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2368 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
2369 (clobber (match_scratch:<VnSI> 3 "= &v"))]
2375 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2376 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2377 rtx left = operands[1];
2378 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2379 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2380 rtx tmp = operands[3];
2382 emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
2383 emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
2384 emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
2385 emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
2389 (define_insn_and_split "mul<mode>3_zext_dup2_exec"
2390 [(set (match_operand:V_DI 0 "register_operand" "= &v")
2394 (match_operand:<VnSI> 1 "gcn_alu_operand" " v"))
2396 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
2397 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2398 (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
2399 (clobber (match_scratch:<VnSI> 5 "= &v"))]
2405 rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
2406 rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
2407 rtx left = operands[1];
2408 rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
2409 rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
2410 rtx exec = operands[4];
2411 rtx tmp = operands[5];
2414 if (GET_CODE (operands[3]) == UNSPEC)
2416 old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
2420 old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
2421 old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
2424 rtx undef = gcn_gen_undef (<VnSI>mode);
2426 emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
2427 emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
2429 emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
2430 emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
2434 (define_int_iterator UNSPEC_CMUL_OP [UNSPEC_CMUL UNSPEC_CMUL_CONJ])
2435 (define_int_attr conj_op [(UNSPEC_CMUL "") (UNSPEC_CMUL_CONJ "_conj")])
2436 (define_int_attr cmul_subadd [(UNSPEC_CMUL "sub") (UNSPEC_CMUL_CONJ "add")])
2437 (define_int_attr cmul_addsub [(UNSPEC_CMUL "add") (UNSPEC_CMUL_CONJ "sub")])
2439 (define_expand "cmul<conj_op><mode>3"
2440 [(set (match_operand:V_noHI 0 "register_operand" "=&v")
2442 [(match_operand:V_noHI 1 "register_operand" "v")
2443 (match_operand:V_noHI 2 "register_operand" "v")]
2449 rtx t1 = gen_reg_rtx (<MODE>mode);
2450 emit_insn (gen_mul<mode>3 (t1, operands[1], operands[2])); // a*c b*d
2452 rtx s2_perm = gen_reg_rtx (<MODE>mode);
2453 emit_insn (gen_dpp_swap_pairs<mode> (s2_perm, operands[2])); // d c
2455 rtx t2 = gen_reg_rtx (<MODE>mode);
2456 emit_insn (gen_mul<mode>3 (t2, operands[1], s2_perm)); // a*d b*c
2458 rtx t1_perm = gen_reg_rtx (<MODE>mode);
2459 emit_insn (gen_dpp_swap_pairs<mode> (t1_perm, t1)); // b*d a*c
2461 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2462 emit_move_insn (even, get_exec (0x5555555555555555UL));
2463 rtx dest = operands[0];
2464 emit_insn (gen_<cmul_subadd><mode>3_exec (dest, t1, t1_perm,
2465 gcn_gen_undef (<MODE>mode),
2466 even)); // a*c-b*d 0
2468 rtx t2_perm = gen_reg_rtx (<MODE>mode);
2469 emit_insn (gen_dpp_swap_pairs<mode> (t2_perm, t2)); // b*c a*d
2471 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2472 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2473 emit_insn (gen_<cmul_addsub><mode>3_exec (dest, t2, t2_perm, dest, odd));
2478 (define_code_iterator addsub [plus minus])
2479 (define_code_attr addsub_as [(plus "a") (minus "s")])
2481 (define_expand "cml<addsub_as><mode>4"
2482 [(set (match_operand:V_FP 0 "register_operand" "=&v")
2485 [(match_operand:V_FP 1 "register_operand" "v")
2486 (match_operand:V_FP 2 "register_operand" "v")]
2488 (match_operand:V_FP 3 "register_operand" "v")))]
2491 rtx a = gen_reg_rtx (<MODE>mode);
2492 emit_insn (gen_dpp_distribute_even<mode> (a, operands[1])); // a a
2494 rtx t1 = gen_reg_rtx (<MODE>mode);
2495 emit_insn (gen_fm<addsub_as><mode>4 (t1, a, operands[2], operands[3]));
2498 rtx b = gen_reg_rtx (<MODE>mode);
2499 emit_insn (gen_dpp_distribute_odd<mode> (b, operands[1])); // b b
2501 rtx t2 = gen_reg_rtx (<MODE>mode);
2502 emit_insn (gen_mul<mode>3 (t2, b, operands[2])); // b*c b*d
2504 rtx t2_perm = gen_reg_rtx (<MODE>mode);
2505 emit_insn (gen_dpp_swap_pairs<mode> (t2_perm, t2)); // b*d b*c
2507 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2508 emit_move_insn (even, get_exec (0x5555555555555555UL));
2509 rtx dest = operands[0];
2510 emit_insn (gen_sub<mode>3_exec (dest, t1, t2_perm,
2511 gcn_gen_undef (<MODE>mode), even));
2513 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2514 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2515 emit_insn (gen_add<mode>3_exec (dest, t1, t2_perm, dest, odd));
2520 (define_expand "vec_addsub<mode>3"
2521 [(set (match_operand:V_noHI 0 "register_operand" "=&v")
2524 (match_operand:V_noHI 1 "register_operand" "v")
2525 (match_operand:V_noHI 2 "register_operand" "v"))
2526 (plus:V_noHI (match_dup 1) (match_dup 2))
2527 (const_int 6148914691236517205)))]
2530 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2531 emit_move_insn (even, get_exec (0x5555555555555555UL));
2532 rtx dest = operands[0];
2533 rtx x = operands[1];
2534 rtx y = operands[2];
2535 emit_insn (gen_sub<mode>3_exec (dest, x, y, gcn_gen_undef (<MODE>mode),
2537 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2538 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2539 emit_insn (gen_add<mode>3_exec (dest, x, y, dest, odd));
2544 (define_int_iterator CADD [UNSPEC_CADD90 UNSPEC_CADD270])
2545 (define_int_attr rot [(UNSPEC_CADD90 "90") (UNSPEC_CADD270 "270")])
2546 (define_int_attr cadd_subadd [(UNSPEC_CADD90 "sub") (UNSPEC_CADD270 "add")])
2547 (define_int_attr cadd_addsub [(UNSPEC_CADD90 "add") (UNSPEC_CADD270 "sub")])
2549 (define_expand "cadd<rot><mode>3"
2550 [(set (match_operand:V_noHI 0 "register_operand" "=&v")
2551 (unspec:V_noHI [(match_operand:V_noHI 1 "register_operand" "v")
2552 (match_operand:V_noHI 2 "register_operand" "v")]
2556 rtx dest = operands[0];
2557 rtx x = operands[1];
2558 rtx y = gen_reg_rtx (<MODE>mode);
2559 emit_insn (gen_dpp_swap_pairs<mode> (y, operands[2]));
2561 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2562 emit_move_insn (even, get_exec (0x5555555555555555UL));
2563 emit_insn (gen_<cadd_subadd><mode>3_exec (dest, x, y,
2564 gcn_gen_undef (<MODE>mode),
2566 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2567 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2568 emit_insn (gen_<cadd_addsub><mode>3_exec (dest, x, y, dest, odd));
2573 (define_expand "vec_fmaddsub<mode>4"
2574 [(match_operand:V_noHI 0 "register_operand" "=&v")
2575 (match_operand:V_noHI 1 "register_operand" "v")
2576 (match_operand:V_noHI 2 "register_operand" "v")
2577 (match_operand:V_noHI 3 "register_operand" "v")]
2580 rtx t1 = gen_reg_rtx (<MODE>mode);
2581 emit_insn (gen_mul<mode>3 (t1, operands[1], operands[2]));
2582 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2583 emit_move_insn (even, get_exec (0x5555555555555555UL));
2584 rtx dest = operands[0];
2585 emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3],
2586 gcn_gen_undef (<MODE>mode), even));
2587 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2588 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2589 emit_insn (gen_add<mode>3_exec (dest, t1, operands[3], dest, odd));
2594 (define_expand "vec_fmsubadd<mode>4"
2595 [(match_operand:V_noHI 0 "register_operand" "=&v")
2596 (match_operand:V_noHI 1 "register_operand" "v")
2597 (match_operand:V_noHI 2 "register_operand" "v")
2598 (match_operand:V_noHI 3 "register_operand" "v")]
2601 rtx t1 = gen_reg_rtx (<MODE>mode);
2602 emit_insn (gen_mul<mode>3 (t1, operands[1], operands[2]));
2603 rtx even = gen_rtx_REG (DImode, EXEC_REG);
2604 emit_move_insn (even, get_exec (0x5555555555555555UL));
2605 rtx dest = operands[0];
2606 emit_insn (gen_add<mode>3_exec (dest, t1, operands[3],
2607 gcn_gen_undef (<MODE>mode), even));
2608 rtx odd = gen_rtx_REG (DImode, EXEC_REG);
2609 emit_move_insn (odd, get_exec (0xaaaaaaaaaaaaaaaaUL));
2610 emit_insn (gen_sub<mode>3_exec (dest, t1, operands[3], dest, odd));
2616 ;; {{{ ALU generic case
2618 (define_code_iterator bitop [and ior xor])
2619 (define_code_iterator shiftop [ashift lshiftrt ashiftrt])
2620 (define_code_iterator minmaxop [smin smax umin umax])
2622 (define_insn "<expander><mode>2<exec>"
2623 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v")
2625 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "vSvB")))]
2627 "v_<mnemonic>0\t%0, %1"
2628 [(set_attr "type" "vop1")
2629 (set_attr "length" "8")])
2631 (define_insn "<expander><mode>3<exec>"
2632 [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand" "= v,RD")
2634 (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "% v, 0")
2635 (match_operand:V_INT_1REG 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2638 v_<mnemonic>0\t%0, %2, %1
2639 ds_<mnemonic>0\t%A0, %2%O0"
2640 [(set_attr "type" "vop2,ds")
2641 (set_attr "length" "8,8")])
2643 (define_insn_and_split "<expander><mode>3"
2644 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2646 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2647 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2651 ds_<mnemonic>0\t%A0, %2%O0"
2652 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
2654 (bitop:<VnSI> (match_dup 5) (match_dup 7)))
2656 (bitop:<VnSI> (match_dup 6) (match_dup 8)))]
2658 operands[3] = gcn_operand_part (<MODE>mode, operands[0], 0);
2659 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 1);
2660 operands[5] = gcn_operand_part (<MODE>mode, operands[1], 0);
2661 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 1);
2662 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 0);
2663 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 1);
2665 [(set_attr "type" "vmult,ds")
2666 (set_attr "length" "16,8")])
2668 (define_insn_and_split "<expander><mode>3_exec"
2669 [(set (match_operand:V_DI 0 "gcn_valu_dst_operand" "= v,RD")
2672 (match_operand:V_DI 1 "gcn_valu_src0_operand" "% v,RD")
2673 (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
2674 (match_operand:V_DI 3 "gcn_register_ds_or_unspec_operand" "U0,U0")
2675 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
2676 "!memory_operand (operands[0], VOIDmode)
2677 || (rtx_equal_p (operands[0], operands[1])
2678 && register_operand (operands[2], VOIDmode))"
2681 ds_<mnemonic>0\t%A0, %2%O0"
2682 "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
2685 (bitop:<VnSI> (match_dup 7) (match_dup 9))
2690 (bitop:<VnSI> (match_dup 8) (match_dup 10))
2694 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 0);
2695 operands[6] = gcn_operand_part (<MODE>mode, operands[0], 1);
2696 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 0);
2697 operands[8] = gcn_operand_part (<MODE>mode, operands[1], 1);
2698 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 0);
2699 operands[10] = gcn_operand_part (<MODE>mode, operands[2], 1);
2700 operands[11] = gcn_operand_part (<MODE>mode, operands[3], 0);
2701 operands[12] = gcn_operand_part (<MODE>mode, operands[3], 1);
2703 [(set_attr "type" "vmult,ds")
2704 (set_attr "length" "16,8")])
2706 (define_expand "<expander><mode>3"
2707 [(set (match_operand:V_QIHI 0 "register_operand" "= v")
2709 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2710 (vec_duplicate:V_QIHI
2711 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2714 enum {ashift, lshiftrt, ashiftrt};
2715 bool unsignedp = (<code> == lshiftrt);
2716 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2717 rtx insi2 = gen_reg_rtx (SImode);
2718 rtx outsi = gen_reg_rtx (<VnSI>mode);
2720 convert_move (insi1, operands[1], unsignedp);
2721 convert_move (insi2, operands[2], unsignedp);
2722 emit_insn (gen_<expander><vnsi>3 (outsi, insi1, insi2));
2723 convert_move (operands[0], outsi, unsignedp);
2727 (define_insn "<expander><mode>3<exec>"
2728 [(set (match_operand:V_INT_noHI 0 "register_operand" "= v")
2730 (match_operand:V_INT_noHI 1 "gcn_alu_operand" " v")
2731 (vec_duplicate:<VnSI>
2732 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
2734 "v_<revmnemonic>0\t%0, %2, %1"
2735 [(set_attr "type" "vop2")
2736 (set_attr "length" "8")])
2738 (define_expand "v<expander><mode>3"
2739 [(set (match_operand:V_QIHI 0 "register_operand" "=v")
2741 (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2742 (match_operand:V_QIHI 2 "gcn_alu_operand" "vB")))]
2745 enum {ashift, lshiftrt, ashiftrt};
2746 bool unsignedp = (<code> == lshiftrt);
2747 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2748 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2749 rtx outsi = gen_reg_rtx (<VnSI>mode);
2751 convert_move (insi1, operands[1], unsignedp);
2752 convert_move (insi2, operands[2], unsignedp);
2753 emit_insn (gen_v<expander><vnsi>3 (outsi, insi1, insi2));
2754 convert_move (operands[0], outsi, unsignedp);
2758 (define_insn "v<expander><mode>3<exec>"
2759 [(set (match_operand:V_INT_noHI 0 "register_operand" "=v")
2761 (match_operand:V_INT_noHI 1 "gcn_alu_operand" " v")
2762 (match_operand:<VnSI> 2 "gcn_alu_operand" "vB")))]
2764 "v_<revmnemonic>0\t%0, %2, %1"
2765 [(set_attr "type" "vop2")
2766 (set_attr "length" "8")])
2768 (define_expand "<expander><mode>3"
2769 [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand")
2771 (match_operand:V_QIHI 1 "gcn_valu_src0_operand")
2772 (match_operand:V_QIHI 2 "gcn_valu_src1com_operand")))]
2775 enum {smin, umin, smax, umax};
2776 bool unsignedp = (<code> == umax || <code> == umin);
2777 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2778 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2779 rtx outsi = gen_reg_rtx (<VnSI>mode);
2781 convert_move (insi1, operands[1], unsignedp);
2782 convert_move (insi2, operands[2], unsignedp);
2783 emit_insn (gen_<code><vnsi>3 (outsi, insi1, insi2));
2784 convert_move (operands[0], outsi, unsignedp);
2788 (define_expand "<expander><mode>3_exec"
2789 [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand")
2792 (match_operand:V_QIHI 1 "gcn_valu_src0_operand")
2793 (match_operand:V_QIHI 2 "gcn_valu_src1com_operand"))
2794 (match_operand:V_QIHI 3 "gcn_register_or_unspec_operand" "U0")
2795 (match_operand:DI 4 "gcn_exec_reg_operand" "e")))]
2798 enum {smin, umin, smax, umax};
2799 bool unsignedp = (<code> == umax || <code> == umin);
2800 rtx insi1 = gen_reg_rtx (<VnSI>mode);
2801 rtx insi2 = gen_reg_rtx (<VnSI>mode);
2802 rtx outsi = gen_reg_rtx (<VnSI>mode);
2803 rtx out = operands[0];
2804 rtx exec = operands[4];
2805 rtx tmp = gen_reg_rtx (<MODE>mode);
2807 convert_move (insi1, operands[1], unsignedp);
2808 convert_move (insi2, operands[2], unsignedp);
2809 emit_insn (gen_<code><vnsi>3_exec (outsi, insi1, insi2,
2810 gcn_gen_undef(<VnSI>mode), exec));
2811 convert_move (tmp, outsi, unsignedp);
2812 emit_insn (gen_mov<mode>_exec (out, tmp, operands[3], exec));
2816 (define_insn "<expander><vnsi>3<exec>"
2817 [(set (match_operand:V_SI 0 "gcn_valu_dst_operand" "= v,RD")
2819 (match_operand:V_SI 1 "gcn_valu_src0_operand" "% v, 0")
2820 (match_operand:V_SI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2823 v_<mnemonic>0\t%0, %2, %1
2824 ds_<mnemonic>0\t%A0, %2%O0"
2825 [(set_attr "type" "vop2,ds")
2826 (set_attr "length" "8,8")])
2828 (define_insn_and_split "<expander><mode>3"
2829 [(set (match_operand:V_DI 0 "register_operand" "=v")
2831 (match_operand:V_DI 1 "gcn_alu_operand" " v")
2832 (match_operand:V_DI 2 "gcn_alu_operand" " v")))
2833 (clobber (reg:DI VCC_REG))]
2839 rtx out = operands[0];
2840 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2842 enum {smin, smax, umin, umax};
2843 bool minp = (<code> == smin || <code> == umin);
2844 if (<code> == smin || <code> == smax)
2845 emit_insn (gen_vec_cmp<mode>di (vcc, minp ? gen_rtx_LT (VOIDmode, 0, 0) :
2846 gen_rtx_GT (VOIDmode, 0, 0), operands[1],
2849 emit_insn (gen_vec_cmp<mode>di (vcc, minp ? gen_rtx_LTU (VOIDmode, 0, 0) :
2850 gen_rtx_GTU (VOIDmode, 0, 0), operands[1],
2852 emit_insn (gen_vcond_mask_<mode>di (out, operands[1], operands[2], vcc));
2854 [(set_attr "type" "mult")])
2856 (define_insn_and_split "<expander><mode>3_exec"
2857 [(set (match_operand:V_DI 0 "register_operand" "= v")
2860 (match_operand:V_DI 1 "gcn_alu_operand" " v")
2861 (match_operand:V_DI 2 "gcn_alu_operand" " v"))
2862 (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
2863 (match_operand:DI 4 "gcn_exec_reg_operand" "+e")))
2864 (clobber (match_scratch:<VnDI> 5 "= &v"))
2865 (clobber (reg:DI VCC_REG))]
2871 rtx out = operands[0];
2872 rtx vcc = gen_rtx_REG (DImode, VCC_REG);
2873 rtx exec = operands[4];
2874 rtx tmp = operands[5];
2876 enum {smin, smax, umin, umax};
2877 bool minp = (<code> == smin || <code> == umin);
2878 if (<code> == smin || <code> == smax)
2879 emit_insn (gen_vec_cmp<mode>di_exec (vcc,
2880 minp ? gen_rtx_LT (VOIDmode, 0, 0) :
2881 gen_rtx_GT (VOIDmode, 0, 0),
2882 operands[1], operands[2], exec));
2884 emit_insn (gen_vec_cmp<mode>di_exec (vcc,
2885 minp ? gen_rtx_LTU (VOIDmode, 0, 0) :
2886 gen_rtx_GTU (VOIDmode, 0, 0),
2887 operands[1], operands[2], exec));
2888 emit_insn (gen_vcond_mask_<mode>di (tmp, operands[1], operands[2], vcc));
2889 emit_insn (gen_mov<mode>_exec (out, tmp, operands[3], exec));
2891 [(set_attr "type" "mult")])
2896 (define_expand "neg<mode>2"
2897 [(match_operand:V_INT 0 "register_operand")
2898 (match_operand:V_INT 1 "register_operand")]
2901 emit_insn (gen_sub<mode>3 (operands[0], gcn_vec_constant (<MODE>mode, 0),
2906 (define_insn_and_split "one_cmpl<mode>2<exec>"
2907 [(set (match_operand:V_DI 0 "register_operand" "= v")
2909 (match_operand:V_DI 1 "gcn_alu_operand" "vSvDB")))]
2913 [(set (match_dup 3) (not:<VnSI> (match_dup 5)))
2914 (set (match_dup 4) (not:<VnSI> (match_dup 6)))]
2916 operands[3] = gcn_operand_part (<VnDI>mode, operands[0], 0);
2917 operands[4] = gcn_operand_part (<VnDI>mode, operands[0], 1);
2918 operands[5] = gcn_operand_part (<VnDI>mode, operands[1], 0);
2919 operands[6] = gcn_operand_part (<VnDI>mode, operands[1], 1);
2921 [(set_attr "type" "mult")])
2924 ;; {{{ FP binops - special cases
2926 ; GCN does not directly provide a DFmode subtract instruction, so we do it by
2927 ; adding the negated second operand to the first.
2929 (define_insn "sub<mode>3<exec>"
2930 [(set (match_operand:V_DF 0 "register_operand" "= v, v")
2932 (match_operand:V_DF 1 "gcn_alu_operand" "vSvB, v")
2933 (match_operand:V_DF 2 "gcn_alu_operand" " v,vSvB")))]
2936 v_add_f64\t%0, %1, -%2
2937 v_add_f64\t%0, -%2, %1"
2938 [(set_attr "type" "vop3a")
2939 (set_attr "length" "8,8")])
2941 (define_insn "subdf3"
2942 [(set (match_operand:DF 0 "register_operand" "= v, v")
2944 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
2945 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
2948 v_add_f64\t%0, %1, -%2
2949 v_add_f64\t%0, -%2, %1"
2950 [(set_attr "type" "vop3a")
2951 (set_attr "length" "8,8")])
2954 ;; {{{ FP binops - generic
2956 (define_code_iterator comm_fp [plus mult smin smax])
2957 (define_code_iterator nocomm_fp [minus])
2958 (define_code_iterator all_fp [plus mult minus smin smax])
2960 (define_insn "<expander><mode>3<exec>"
2961 [(set (match_operand:V_FP 0 "register_operand" "= v")
2963 (match_operand:V_FP 1 "gcn_alu_operand" "% v")
2964 (match_operand:V_FP 2 "gcn_alu_operand" "vSvB")))]
2966 "v_<mnemonic>0\t%0, %2, %1"
2967 [(set_attr "type" "vop2")
2968 (set_attr "length" "8")])
2970 (define_insn "<expander><mode>3"
2971 [(set (match_operand:FP 0 "gcn_valu_dst_operand" "= v, RL")
2973 (match_operand:FP 1 "gcn_valu_src0_operand" "% v, 0")
2974 (match_operand:FP 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2977 v_<mnemonic>0\t%0, %2, %1
2978 v_<mnemonic>0\t%0, %1%O0"
2979 [(set_attr "type" "vop2,ds")
2980 (set_attr "length" "8")])
2982 (define_insn "<expander><mode>3<exec>"
2983 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v, v")
2984 (nocomm_fp:V_FP_1REG
2985 (match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB, v")
2986 (match_operand:V_FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
2989 v_<mnemonic>0\t%0, %1, %2
2990 v_<revmnemonic>0\t%0, %2, %1"
2991 [(set_attr "type" "vop2")
2992 (set_attr "length" "8,8")])
2994 (define_insn "<expander><mode>3"
2995 [(set (match_operand:FP_1REG 0 "register_operand" "= v, v")
2997 (match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB, v")
2998 (match_operand:FP_1REG 2 "gcn_alu_operand" " v,vSvB")))]
3001 v_<mnemonic>0\t%0, %1, %2
3002 v_<revmnemonic>0\t%0, %2, %1"
3003 [(set_attr "type" "vop2")
3004 (set_attr "length" "8,8")])
3006 (define_code_iterator fminmaxop [smin smax])
3007 (define_expand "<fexpander><mode>3"
3008 [(set (match_operand:FP 0 "gcn_valu_dst_operand")
3010 (match_operand:FP 1 "gcn_valu_src0_operand")
3011 (match_operand:FP 2 "gcn_valu_src1_operand")))]
3015 (define_expand "<fexpander><mode>3<exec>"
3016 [(set (match_operand:V_FP 0 "gcn_valu_dst_operand")
3018 (match_operand:V_FP 1 "gcn_valu_src0_operand")
3019 (match_operand:V_FP 2 "gcn_valu_src1_operand")))]
3026 (define_insn "abs<mode>2"
3027 [(set (match_operand:FP 0 "register_operand" "=v")
3028 (abs:FP (match_operand:FP 1 "register_operand" " v")))]
3030 "v_add%i0\t%0, 0, |%1|"
3031 [(set_attr "type" "vop3a")
3032 (set_attr "length" "8")])
3034 (define_insn "abs<mode>2<exec>"
3035 [(set (match_operand:V_FP 0 "register_operand" "=v")
3037 (match_operand:V_FP 1 "register_operand" " v")))]
3039 "v_add%i0\t%0, 0, |%1|"
3040 [(set_attr "type" "vop3a")
3041 (set_attr "length" "8")])
3043 (define_insn "neg<mode>2<exec>"
3044 [(set (match_operand:V_FP 0 "register_operand" "=v")
3046 (match_operand:V_FP 1 "register_operand" " v")))]
3048 "v_add%i0\t%0, 0, -%1"
3049 [(set_attr "type" "vop3a")
3050 (set_attr "length" "8")])
3052 (define_insn "sqrt<mode>2<exec>"
3053 [(set (match_operand:V_FP 0 "register_operand" "= v")
3055 (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))]
3056 "flag_unsafe_math_optimizations"
3058 [(set_attr "type" "vop1")
3059 (set_attr "length" "8")])
3061 (define_insn "sqrt<mode>2"
3062 [(set (match_operand:FP 0 "register_operand" "= v")
3064 (match_operand:FP 1 "gcn_alu_operand" "vSvB")))]
3065 "flag_unsafe_math_optimizations"
3067 [(set_attr "type" "vop1")
3068 (set_attr "length" "8")])
3070 ; These FP unops have f64, f32 and f16 versions.
3071 (define_int_iterator MATH_UNOP_1OR2REG
3072 [UNSPEC_FLOOR UNSPEC_CEIL])
3074 ; These FP unops only have f16/f32 versions.
3075 (define_int_iterator MATH_UNOP_1REG
3076 [UNSPEC_EXP2 UNSPEC_LOG2])
3078 (define_int_iterator MATH_UNOP_TRIG
3079 [UNSPEC_SIN UNSPEC_COS])
3081 (define_int_attr math_unop
3082 [(UNSPEC_FLOOR "floor")
3083 (UNSPEC_CEIL "ceil")
3084 (UNSPEC_EXP2 "exp2")
3085 (UNSPEC_LOG2 "log2")
3087 (UNSPEC_COS "cos")])
3089 (define_int_attr math_unop_insn
3090 [(UNSPEC_FLOOR "floor")
3091 (UNSPEC_CEIL "ceil")
3095 (UNSPEC_COS "cos")])
3097 (define_insn "<math_unop><mode>2"
3098 [(set (match_operand:FP 0 "register_operand" "= v")
3100 [(match_operand:FP 1 "gcn_alu_operand" "vSvB")]
3101 MATH_UNOP_1OR2REG))]
3103 "v_<math_unop_insn>%i0\t%0, %1"
3104 [(set_attr "type" "vop1")
3105 (set_attr "length" "8")])
3107 (define_insn "<math_unop><mode>2<exec>"
3108 [(set (match_operand:V_FP 0 "register_operand" "= v")
3110 [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")]
3111 MATH_UNOP_1OR2REG))]
3113 "v_<math_unop_insn>%i0\t%0, %1"
3114 [(set_attr "type" "vop1")
3115 (set_attr "length" "8")])
3117 (define_insn "<math_unop><mode>2"
3118 [(set (match_operand:FP_1REG 0 "register_operand" "= v")
3120 [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
3122 "flag_unsafe_math_optimizations"
3123 "v_<math_unop_insn>%i0\t%0, %1"
3124 [(set_attr "type" "vop1")
3125 (set_attr "length" "8")])
3127 (define_insn "<math_unop><mode>2<exec>"
3128 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v")
3130 [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
3132 "flag_unsafe_math_optimizations"
3133 "v_<math_unop_insn>%i0\t%0, %1"
3134 [(set_attr "type" "vop1")
3135 (set_attr "length" "8")])
3137 (define_insn "*<math_unop><mode>2_insn"
3138 [(set (match_operand:FP_1REG 0 "register_operand" "= v")
3140 [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")]
3142 "flag_unsafe_math_optimizations"
3143 "v_<math_unop_insn>%i0\t%0, %1"
3144 [(set_attr "type" "vop1")
3145 (set_attr "length" "8")])
3147 (define_insn "*<math_unop><mode>2<exec>_insn"
3148 [(set (match_operand:V_FP_1REG 0 "register_operand" "= v")
3150 [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")]
3152 "flag_unsafe_math_optimizations"
3153 "v_<math_unop_insn>%i0\t%0, %1"
3154 [(set_attr "type" "vop1")
3155 (set_attr "length" "8")])
3157 ; Trigonometric functions need their input scaled by 1/(2*PI) first.
3159 (define_expand "<math_unop><mode>2"
3163 (match_operand:FP_1REG 1 "gcn_alu_operand")))
3164 (set (match_operand:FP_1REG 0 "register_operand")
3168 "flag_unsafe_math_optimizations"
3170 operands[2] = gen_reg_rtx (<MODE>mode);
3171 operands[3] = const_double_from_real_value (gcn_dconst1over2pi (),
3175 (define_expand "<math_unop><mode>2<exec>"
3179 (match_operand:V_FP_1REG 1 "gcn_alu_operand")))
3180 (set (match_operand:V_FP_1REG 0 "register_operand")
3184 "flag_unsafe_math_optimizations"
3186 operands[2] = gen_reg_rtx (<MODE>mode);
3188 gcn_vec_constant (<MODE>mode,
3189 const_double_from_real_value (gcn_dconst1over2pi (),
3190 <SCALAR_MODE>mode));
3193 ; Implement ldexp pattern
3195 (define_insn "ldexp<mode>3<exec>"
3196 [(set (match_operand:SV_FP 0 "register_operand" "= v")
3198 [(match_operand:SV_FP 1 "gcn_alu_operand" " vA")
3199 (match_operand:<VnSI> 2 "gcn_alu_operand" "vSvA")]
3202 "v_ldexp%i0\t%0, %1, %2"
3203 [(set_attr "type" "vop3a")
3204 (set_attr "length" "8")])
3206 ; Implement frexp patterns
3208 (define_insn "frexp<mode>_exp2"
3209 [(set (match_operand:SI 0 "register_operand" "=v")
3211 [(match_operand:FP 1 "gcn_alu_operand" "vB")]
3214 "v_frexp_exp_i32%i1\t%0, %1"
3215 [(set_attr "type" "vop1")
3216 (set_attr "length" "8")])
3218 (define_insn "frexp<mode>_mant2"
3219 [(set (match_operand:FP 0 "register_operand" "=v")
3221 [(match_operand:FP 1 "gcn_alu_operand" "vB")]
3222 UNSPEC_FREXP_MANT))]
3224 "v_frexp_mant%i1\t%0, %1"
3225 [(set_attr "type" "vop1")
3226 (set_attr "length" "8")])
3228 (define_insn "frexp<mode>_exp2<exec>"
3229 [(set (match_operand:<VnSI> 0 "register_operand" "=v")
3231 [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
3234 "v_frexp_exp_i32%i1\t%0, %1"
3235 [(set_attr "type" "vop1")
3236 (set_attr "length" "8")])
3238 (define_insn "frexp<mode>_mant2<exec>"
3239 [(set (match_operand:V_FP 0 "register_operand" "=v")
3241 [(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
3242 UNSPEC_FREXP_MANT))]
3244 "v_frexp_mant%i1\t%0, %1"
3245 [(set_attr "type" "vop1")
3246 (set_attr "length" "8")])
3249 ;; {{{ FP fused multiply and add
3251 (define_insn "fma<mode>4<exec>"
3252 [(set (match_operand:V_FP 0 "register_operand" "= v, v")
3254 (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA")
3255 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA")
3256 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA")))]
3258 "v_fma%i0\t%0, %1, %2, %3"
3259 [(set_attr "type" "vop3a")
3260 (set_attr "length" "8")])
3262 (define_insn "fma<mode>4_negop2<exec>"
3263 [(set (match_operand:V_FP 0 "register_operand" "= v, v, v")
3265 (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA")
3267 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
3268 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
3270 "v_fma%i0\t%0, %1, -%2, %3"
3271 [(set_attr "type" "vop3a")
3272 (set_attr "length" "8")])
3274 (define_insn "fma<mode>4"
3275 [(set (match_operand:FP 0 "register_operand" "= v, v")
3277 (match_operand:FP 1 "gcn_alu_operand" "% vA, vA")
3278 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA")
3279 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA")))]
3281 "v_fma%i0\t%0, %1, %2, %3"
3282 [(set_attr "type" "vop3a")
3283 (set_attr "length" "8")])
3285 (define_insn "fma<mode>4_negop2"
3286 [(set (match_operand:FP 0 "register_operand" "= v, v, v")
3288 (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA")
3290 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
3291 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA")))]
3293 "v_fma%i0\t%0, %1, -%2, %3"
3294 [(set_attr "type" "vop3a")
3295 (set_attr "length" "8")])
3297 (define_insn "fms<mode>4<exec>"
3298 [(set (match_operand:V_FP 0 "register_operand" "= v, v")
3300 (match_operand:V_FP 1 "gcn_alu_operand" "% vA, vA")
3301 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA")
3303 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA"))))]
3305 "v_fma%i0\t%0, %1, %2, -%3"
3306 [(set_attr "type" "vop3a")
3307 (set_attr "length" "8")])
3309 (define_insn "fms<mode>4_negop2<exec>"
3310 [(set (match_operand:V_FP 0 "register_operand" "= v, v, v")
3312 (match_operand:V_FP 1 "gcn_alu_operand" " vA, vA,vSvA")
3314 (match_operand:V_FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
3316 (match_operand:V_FP 3 "gcn_alu_operand" "vSvA, vA, vA"))))]
3318 "v_fma%i0\t%0, %1, -%2, -%3"
3319 [(set_attr "type" "vop3a")
3320 (set_attr "length" "8")])
3322 (define_insn "fms<mode>4"
3323 [(set (match_operand:FP 0 "register_operand" "= v, v")
3325 (match_operand:FP 1 "gcn_alu_operand" "% vA, vA")
3326 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA")
3328 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA"))))]
3330 "v_fma%i0\t%0, %1, %2, -%3"
3331 [(set_attr "type" "vop3a")
3332 (set_attr "length" "8")])
3334 (define_insn "fms<mode>4_negop2"
3335 [(set (match_operand:FP 0 "register_operand" "= v, v, v")
3337 (match_operand:FP 1 "gcn_alu_operand" " vA, vA,vSvA")
3339 (match_operand:FP 2 "gcn_alu_operand" " vA,vSvA, vA"))
3341 (match_operand:FP 3 "gcn_alu_operand" "vSvA, vA, vA"))))]
3343 "v_fma%i0\t%0, %1, -%2, -%3"
3344 [(set_attr "type" "vop3a")
3345 (set_attr "length" "8")])
3350 (define_insn "recip<mode>2<exec>"
3351 [(set (match_operand:SV_FP 0 "register_operand" "= v")
3353 [(match_operand:SV_FP 1 "gcn_alu_operand" "vSvB")]
3357 [(set_attr "type" "vop1")
3358 (set_attr "length" "8")])
3360 ;; v_div_scale takes a numerator (op2) and denominator (op1) and returns the
3361 ;; one that matches op3 adjusted for best results in reciprocal division.
3362 ;; It also emits a VCC mask that is intended for input to v_div_fmas.
3363 ;; The caller is expected to call this twice, once for each input. The output
3364 ;; VCC is the same in both cases, so the caller may discard one.
3365 (define_insn "div_scale<mode><exec_vcc>"
3366 [(set (match_operand:SV_SFDF 0 "register_operand" "=v")
3368 [(match_operand:SV_SFDF 1 "gcn_alu_operand" "v")
3369 (match_operand:SV_SFDF 2 "gcn_alu_operand" "v")
3370 (match_operand:SV_SFDF 3 "gcn_alu_operand" "v")]
3372 (set (match_operand:DI 4 "register_operand" "=SvcV")
3374 [(match_dup 1) (match_dup 2) (match_dup 3)]
3377 "v_div_scale%i0\t%0, %4, %3, %1, %2"
3378 [(set_attr "type" "vop3b")
3379 (set_attr "length" "8")])
3381 ;; v_div_fmas is "FMA and Scale" that uses the VCC output from v_div_scale
3382 ;; to conditionally scale the output of the whole division operation.
3383 ;; This is necessary to counter the adjustments made by v_div_scale and
3384 ;; replaces the last FMA instruction of the Newton Raphson algorithm.
3385 (define_insn "div_fmas<mode><exec>"
3386 [(set (match_operand:SV_SFDF 0 "register_operand" "=v")
3390 (match_operand:SV_SFDF 1 "gcn_alu_operand" "v")
3391 (match_operand:SV_SFDF 2 "gcn_alu_operand" "v"))
3392 (match_operand:SV_SFDF 3 "gcn_alu_operand" "v"))
3393 (match_operand:DI 4 "register_operand" "cV")]
3396 "v_div_fmas%i0\t%0, %1, %2, %3; %4"
3397 [(set_attr "type" "vop3a")
3398 (set_attr "length" "8")
3399 (set_attr "vccwait" "5")])
3401 ;; v_div_fixup takes the inputs and outputs of a division operation already
3402 ;; completed and cleans up the floating-point sign bit, infinity, underflow,
3403 ;; overflow, and NaN status. It will also emit any FP exceptions.
3404 ;; op1: quotient, op2: denominator, op3: numerator
3405 (define_insn "div_fixup<mode><exec>"
3406 [(set (match_operand:SV_FP 0 "register_operand" "=v")
3408 [(match_operand:SV_FP 1 "register_operand" "v")
3409 (match_operand:SV_FP 2 "gcn_alu_operand" "v")
3410 (match_operand:SV_FP 3 "gcn_alu_operand" "v")]
3413 "v_div_fixup%i0\t%0, %1, %2, %3"
3414 [(set_attr "type" "vop3a")
3415 (set_attr "length" "8")])
3417 (define_expand "div<mode>3"
3418 [(match_operand:SV_SFDF 0 "register_operand")
3419 (match_operand:SV_SFDF 1 "gcn_alu_operand")
3420 (match_operand:SV_SFDF 2 "gcn_alu_operand")]
3423 rtx numerator = operands[1];
3424 rtx denominator = operands[2];
3426 /* Scale the inputs if they are close to the FP limits.
3427 This will be reversed later. */
3428 rtx vcc = gen_reg_rtx (DImode);
3429 rtx discardedvcc = gen_reg_rtx (DImode);
3430 rtx scaled_numerator = gen_reg_rtx (<MODE>mode);
3431 rtx scaled_denominator = gen_reg_rtx (<MODE>mode);
3432 emit_insn (gen_div_scale<mode> (scaled_denominator,
3433 denominator, numerator,
3434 denominator, discardedvcc));
3435 emit_insn (gen_div_scale<mode> (scaled_numerator,
3436 denominator, numerator,
3439 /* Find the reciprocal of the denominator, and use Newton-Raphson to
3440 improve the accuracy over the basic hardware instruction. */
3441 rtx one = gcn_vec_constant (<MODE>mode,
3442 const_double_from_real_value (dconst1, <SCALAR_MODE>mode));
3443 rtx initrcp = gen_reg_rtx (<MODE>mode);
3444 rtx fma1 = gen_reg_rtx (<MODE>mode);
3445 rtx rcp = gen_reg_rtx (<MODE>mode);
3446 emit_insn (gen_recip<mode>2 (initrcp, scaled_denominator));
3447 emit_insn (gen_fma<mode>4_negop2 (fma1, initrcp, scaled_denominator, one));
3448 emit_insn (gen_fma<mode>4 (rcp, fma1, initrcp, initrcp));
3450 /* Do the division "a/b" via "a*1/b" and use Newton-Raphson to improve
3451 the accuracy. The "div_fmas" instruction reverses any scaling
3452 performed by "div_scale", above. */
3453 rtx div_est = gen_reg_rtx (<MODE>mode);
3454 rtx fma2 = gen_reg_rtx (<MODE>mode);
3455 rtx fma3 = gen_reg_rtx (<MODE>mode);
3456 rtx fma4 = gen_reg_rtx (<MODE>mode);
3457 rtx fmas = gen_reg_rtx (<MODE>mode);
3458 emit_insn (gen_mul<mode>3 (div_est, scaled_numerator, rcp));
3459 emit_insn (gen_fma<mode>4_negop2 (fma2, div_est, scaled_denominator,
3461 emit_insn (gen_fma<mode>4 (fma3, fma2, rcp, div_est));
3462 emit_insn (gen_fma<mode>4_negop2 (fma4, fma3, scaled_denominator,
3464 emit_insn (gen_div_fmas<mode> (fmas, fma4, rcp, fma3, vcc));
3466 /* Finally, use "div_fixup" to get the details right and find errors. */
3467 emit_insn (gen_div_fixup<mode> (operands[0], fmas, denominator,
3473 ;; {{{ Int/FP conversions
3475 (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
3476 (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
3478 (define_mode_iterator VCVT_MODE
3479 [V2HI V2SI V2HF V2SF V2DF
3480 V4HI V4SI V4HF V4SF V4DF
3481 V8HI V8SI V8HF V8SF V8DF
3482 V16HI V16SI V16HF V16SF V16DF
3483 V32HI V32SI V32HF V32SF V32DF
3484 V64HI V64SI V64HF V64SF V64DF])
3485 (define_mode_iterator VCVT_FMODE
3492 (define_mode_iterator VCVT_IMODE
3500 (define_code_iterator cvt_op [fix unsigned_fix
3501 float unsigned_float
3502 float_extend float_truncate])
3503 (define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
3504 (float "float") (unsigned_float "floatuns")
3505 (float_extend "extend") (float_truncate "trunc")])
3506 (define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
3507 (float "%i0%i1") (unsigned_float "%i0%u1")
3508 (float_extend "%i0%i1")
3509 (float_truncate "%i0%i1")])
3511 (define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
3512 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
3514 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
3515 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
3517 "v_cvt<cvt_operands>\t%0, %1"
3518 [(set_attr "type" "vop1")
3519 (set_attr "length" "8")])
3521 (define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>"
3522 [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v")
3524 (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))]
3525 "MODE_VF (<VCVT_MODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode)
3526 && gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
3528 "v_cvt<cvt_operands>\t%0, %1"
3529 [(set_attr "type" "vop1")
3530 (set_attr "length" "8")])
3532 (define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>"
3533 [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v")
3535 (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))]
3536 "MODE_VF (<VCVT_IMODE:MODE>mode) == MODE_VF (<VCVT_FMODE:MODE>mode)
3537 && gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
3539 "v_cvt<cvt_operands>\t%0, %1"
3540 [(set_attr "type" "vop1")
3541 (set_attr "length" "8")])
3544 ;; {{{ Int/int conversions
3546 (define_code_iterator all_convert [truncate zero_extend sign_extend])
3547 (define_code_iterator zero_convert [truncate zero_extend])
3548 (define_code_attr convop [
3549 (sign_extend "extend")
3550 (zero_extend "zero_extend")
3551 (truncate "trunc")])
3553 (define_expand "<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
3554 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3555 (all_convert:V_INT_1REG
3556 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
3559 (define_insn "*<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>_sdwa<exec>"
3560 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3561 (zero_convert:V_INT_1REG
3562 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
3564 "v_mov_b32_sdwa\t%0, %1 dst_sel:<V_INT_1REG:sdwa> dst_unused:UNUSED_PAD src0_sel:<V_INT_1REG_ALT:sdwa>"
3565 [(set_attr "type" "vop_sdwa")
3566 (set_attr "length" "8")])
3568 (define_insn "extend<V_INT_1REG_ALT:mode><V_INT_1REG:mode>_sdwa<exec>"
3569 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3570 (sign_extend:V_INT_1REG
3571 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
3573 "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<V_INT_1REG_ALT:sdwa>"
3574 [(set_attr "type" "vop_sdwa")
3575 (set_attr "length" "8")])
3577 (define_insn "*<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>_shift<exec>"
3578 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3579 (all_convert:V_INT_1REG
3580 (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
3583 enum {extend, zero_extend, trunc};
3584 rtx shiftwidth = (<V_INT_1REG_ALT:SCALAR_MODE>mode == QImode
3585 || <V_INT_1REG:SCALAR_MODE>mode == QImode
3587 : <V_INT_1REG_ALT:SCALAR_MODE>mode == HImode
3588 || <V_INT_1REG:SCALAR_MODE>mode == HImode
3591 operands[2] = shiftwidth;
3594 return "v_mov_b32 %0, %1";
3595 else if (<convop> == extend || <convop> == trunc)
3596 return "v_lshlrev_b32\t%0, %2, %1\;v_ashrrev_i32\t%0, %2, %0";
3598 return "v_lshlrev_b32\t%0, %2, %1\;v_lshrrev_b32\t%0, %2, %0";
3600 [(set_attr "type" "mult")
3601 (set_attr "length" "8")])
3603 ;; GCC can already do these for scalar types, but not for vector types.
3604 ;; Unfortunately you can't just do SUBREG on a vector to select the low part,
3605 ;; so there must be a few tricks here.
3607 (define_insn_and_split "trunc<vndi><mode>2"
3608 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3609 (truncate:V_INT_1REG
3610 (match_operand:<VnDI> 1 "gcn_alu_operand" " v")))]
3616 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
3617 rtx out = operands[0];
3619 if (<MODE>mode != <VnSI>mode)
3620 emit_insn (gen_trunc<vnsi><mode>2 (out, inlo));
3622 emit_move_insn (out, inlo);
3624 [(set_attr "type" "vop2")
3625 (set_attr "length" "4")])
3627 (define_insn_and_split "trunc<vndi><mode>2_exec"
3628 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
3629 (vec_merge:V_INT_1REG
3630 (truncate:V_INT_1REG
3631 (match_operand:<VnDI> 1 "gcn_alu_operand" " v"))
3632 (match_operand:V_INT_1REG 2 "gcn_alu_or_unspec_operand" "U0")
3633 (match_operand:DI 3 "gcn_exec_operand" " e")))]
3639 rtx out = operands[0];
3640 rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
3641 rtx merge = operands[2];
3642 rtx exec = operands[3];
3644 if (<MODE>mode != <VnSI>mode)
3645 emit_insn (gen_trunc<vnsi><mode>2_exec (out, inlo, merge, exec));
3647 emit_insn (gen_mov<mode>_exec (out, inlo, merge, exec));
3649 [(set_attr "type" "vop2")
3650 (set_attr "length" "4")])
3652 (define_insn_and_split "<convop><mode><vndi>2"
3653 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
3655 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")))]
3661 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
3662 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
3663 rtx in = operands[1];
3665 if (<MODE>mode != <VnSI>mode)
3666 emit_insn (gen_<convop><mode><vnsi>2 (outlo, in));
3668 emit_move_insn (outlo, in);
3670 emit_insn (gen_ashr<vnsi>3 (outhi, outlo, GEN_INT (31)));
3672 emit_insn (gen_vec_duplicate<vnsi> (outhi, const0_rtx));
3674 [(set_attr "type" "mult")
3675 (set_attr "length" "12")])
3677 (define_insn_and_split "<convop><mode><vndi>2_exec"
3678 [(set (match_operand:<VnDI> 0 "register_operand" "=v")
3681 (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v"))
3682 (match_operand:<VnDI> 2 "gcn_alu_or_unspec_operand" "U0")
3683 (match_operand:DI 3 "gcn_exec_operand" " e")))]
3689 rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
3690 rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
3691 rtx in = operands[1];
3692 rtx mergelo = gcn_operand_part (<VnDI>mode, operands[2], 0);
3693 rtx mergehi = gcn_operand_part (<VnDI>mode, operands[2], 1);
3694 rtx exec = operands[3];
3696 if (<MODE>mode != <VnSI>mode)
3697 emit_insn (gen_<convop><mode><vnsi>2_exec (outlo, in, mergelo, exec));
3699 emit_insn (gen_mov<mode>_exec (outlo, in, mergelo, exec));
3701 emit_insn (gen_ashr<vnsi>3_exec (outhi, outlo, GEN_INT (31), mergehi,
3704 emit_insn (gen_vec_duplicate<vnsi>_exec (outhi, const0_rtx, mergehi,
3707 [(set_attr "type" "mult")
3708 (set_attr "length" "12")])
3711 ;; {{{ Vector comparison/merge
3713 (define_insn "vec_cmp<mode>di"
3714 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg, e, e")
3715 (match_operator:DI 1 "gcn_fp_compare_operator"
3716 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA,vSv, B")
3717 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v, v, v")]))
3718 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X, X, X"))]
3721 v_cmp%E1\tvcc, %2, %3
3722 v_cmp%E1\tvcc, %2, %3
3723 v_cmpx%E1\tvcc, %2, %3
3724 v_cmpx%E1\tvcc, %2, %3
3725 v_cmp%E1\t%0, %2, %3
3726 v_cmp%E1\t%0, %2, %3
3729 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
3730 (set_attr "length" "4,8,4,8,8,8,4,8")
3731 (set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
3733 (define_expand "vec_cmpu<mode>di"
3734 [(match_operand:DI 0 "register_operand")
3735 (match_operator 1 "gcn_compare_operator"
3736 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
3737 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])]
3740 /* Unsigned comparisons use the same patterns as signed comparisons,
3741 except that they use unsigned operators (e.g. LTU vs LT).
3742 The '%E1' directive then does the Right Thing. */
3743 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
3748 ; There's no instruction for 8-bit vector comparison, so we need to extend.
3749 (define_expand "vec_cmp<u><mode>di"
3750 [(match_operand:DI 0 "register_operand")
3751 (match_operator 1 "gcn_compare_operator"
3752 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
3753 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])]
3754 "can_create_pseudo_p ()"
3756 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
3757 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
3759 emit_insn (gen_<expander><mode><vnsi>2 (sitmp1, operands[2]));
3760 emit_insn (gen_<expander><mode><vnsi>2 (sitmp2, operands[3]));
3761 emit_insn (gen_vec_cmp<vnsi>di (operands[0], operands[1], sitmp1, sitmp2));
3765 (define_insn "vec_cmp<mode>di_exec"
3766 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg, e, e")
3768 (match_operator 1 "gcn_fp_compare_operator"
3769 [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA,vSv, B")
3770 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v, v,vA, v, v, v")])
3771 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e, e, e")))
3772 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X, X, X"))]
3775 v_cmp%E1\tvcc, %2, %3
3776 v_cmp%E1\tvcc, %2, %3
3777 v_cmpx%E1\tvcc, %2, %3
3778 v_cmpx%E1\tvcc, %2, %3
3779 v_cmp%E1\t%0, %2, %3
3780 v_cmp%E1\t%0, %2, %3
3783 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
3784 (set_attr "length" "4,8,4,8,8,8,4,8")
3785 (set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
3787 (define_expand "vec_cmpu<mode>di_exec"
3788 [(match_operand:DI 0 "register_operand")
3789 (match_operator 1 "gcn_compare_operator"
3790 [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
3791 (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])
3792 (match_operand:DI 4 "gcn_exec_reg_operand")]
3795 /* Unsigned comparisons use the same patterns as signed comparisons,
3796 except that they use unsigned operators (e.g. LTU vs LT).
3797 The '%E1' directive then does the Right Thing. */
3798 emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1],
3799 operands[2], operands[3],
3804 (define_expand "vec_cmp<u><mode>di_exec"
3805 [(match_operand:DI 0 "register_operand")
3806 (match_operator 1 "gcn_compare_operator"
3807 [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
3808 (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])
3809 (match_operand:DI 4 "gcn_exec_reg_operand")]
3810 "can_create_pseudo_p ()"
3812 rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
3813 rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
3815 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp1, operands[2],
3816 operands[2], operands[4]));
3817 emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp2, operands[3],
3818 operands[3], operands[4]));
3819 emit_insn (gen_vec_cmp<vnsi>di_exec (operands[0], operands[1], sitmp1,
3820 sitmp2, operands[4]));
3824 (define_insn "vec_cmp<mode>di_dup"
3825 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg, e,e")
3826 (match_operator:DI 1 "gcn_fp_compare_operator"
3827 [(vec_duplicate:V_noQI
3828 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
3829 " Sv, B,Sv,B, A,Sv,B"))
3830 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v, v,v")]))
3831 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X, X,X"))]
3834 v_cmp%E1\tvcc, %2, %3
3835 v_cmp%E1\tvcc, %2, %3
3836 v_cmpx%E1\tvcc, %2, %3
3837 v_cmpx%E1\tvcc, %2, %3
3838 v_cmp%E1\t%0, %2, %3
3841 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
3842 (set_attr "length" "4,8,4,8,8,4,8")
3843 (set_attr "rdna" "*,*,no,no,*,yes,yes")])
3845 (define_insn "vec_cmp<mode>di_dup_exec"
3846 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg, e,e")
3848 (match_operator 1 "gcn_fp_compare_operator"
3849 [(vec_duplicate:V_noQI
3850 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
3851 " Sv, B,Sv,B, A,Sv,B"))
3852 (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v, v,v, v, v,v")])
3853 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e, e,e")))
3854 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X, X,X"))]
3857 v_cmp%E1\tvcc, %2, %3
3858 v_cmp%E1\tvcc, %2, %3
3859 v_cmpx%E1\tvcc, %2, %3
3860 v_cmpx%E1\tvcc, %2, %3
3861 v_cmp%E1\t%0, %2, %3
3864 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
3865 (set_attr "length" "4,8,4,8,8,4,8")
3866 (set_attr "rdna" "*,*,no,no,*,yes,yes")])
3868 (define_expand "vcond_mask_<mode>di"
3870 [(set (match_operand:V_ALL 0 "register_operand" "")
3872 (match_operand:V_ALL 1 "gcn_vop3_operand" "")
3873 (match_operand:V_ALL 2 "gcn_alu_operand" "")
3874 (match_operand:DI 3 "register_operand" "")))
3875 (clobber (scratch:<VnDI>))])]
3879 (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>"
3880 [(match_operand:V_ALL 0 "register_operand")
3881 (match_operand:V_ALL 1 "gcn_vop3_operand")
3882 (match_operand:V_ALL 2 "gcn_alu_operand")
3883 (match_operator 3 "gcn_fp_compare_operator"
3884 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
3885 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])]
3888 rtx tmp = gen_reg_rtx (DImode);
3889 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di
3890 (tmp, operands[3], operands[4], operands[5]));
3891 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3892 (operands[0], operands[1], operands[2], tmp));
3896 (define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>_exec"
3897 [(match_operand:V_ALL 0 "register_operand")
3898 (match_operand:V_ALL 1 "gcn_vop3_operand")
3899 (match_operand:V_ALL 2 "gcn_alu_operand")
3900 (match_operator 3 "gcn_fp_compare_operator"
3901 [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
3902 (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])
3903 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
3906 rtx tmp = gen_reg_rtx (DImode);
3907 emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di_exec
3908 (tmp, operands[3], operands[4], operands[5], operands[6]));
3909 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3910 (operands[0], operands[1], operands[2], tmp));
3914 (define_expand "vcondu<V_ALL:mode><V_INT:mode>"
3915 [(match_operand:V_ALL 0 "register_operand")
3916 (match_operand:V_ALL 1 "gcn_vop3_operand")
3917 (match_operand:V_ALL 2 "gcn_alu_operand")
3918 (match_operator 3 "gcn_fp_compare_operator"
3919 [(match_operand:V_INT 4 "gcn_alu_operand")
3920 (match_operand:V_INT 5 "gcn_vop3_operand")])]
3923 rtx tmp = gen_reg_rtx (DImode);
3924 emit_insn (gen_vec_cmpu<V_INT:mode>di
3925 (tmp, operands[3], operands[4], operands[5]));
3926 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3927 (operands[0], operands[1], operands[2], tmp));
3931 (define_expand "vcondu<V_ALL:mode><V_INT:mode>_exec"
3932 [(match_operand:V_ALL 0 "register_operand")
3933 (match_operand:V_ALL 1 "gcn_vop3_operand")
3934 (match_operand:V_ALL 2 "gcn_alu_operand")
3935 (match_operator 3 "gcn_fp_compare_operator"
3936 [(match_operand:V_INT 4 "gcn_alu_operand")
3937 (match_operand:V_INT 5 "gcn_vop3_operand")])
3938 (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
3941 rtx tmp = gen_reg_rtx (DImode);
3942 emit_insn (gen_vec_cmpu<V_INT:mode>di_exec
3943 (tmp, operands[3], operands[4], operands[5], operands[6]));
3944 emit_insn (gen_vcond_mask_<V_ALL:mode>di
3945 (operands[0], operands[1], operands[2], tmp));
3950 ;; {{{ Fully masked loop support
3952 (define_expand "while_ultsidi"
3953 [(match_operand:DI 0 "register_operand")
3954 (match_operand:SI 1 "")
3955 (match_operand:SI 2 "")
3956 (match_operand:SI 3 "")]
3959 if (GET_CODE (operands[1]) != CONST_INT
3960 || GET_CODE (operands[2]) != CONST_INT)
3962 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3964 if (GET_CODE (operands[1]) != CONST_INT
3965 || INTVAL (operands[1]) != 0)
3967 tmp = gen_reg_rtx (V64SImode);
3968 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
3970 emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
3971 gen_rtx_GT (VOIDmode, 0, 0),
3976 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
3977 HOST_WIDE_INT mask = (diff >= 64 ? -1
3978 : ~((unsigned HOST_WIDE_INT)-1 << diff));
3979 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
3981 if (INTVAL (operands[3]) < 64)
3982 emit_insn (gen_anddi3 (operands[0], operands[0],
3983 gen_rtx_CONST_INT (VOIDmode,
3984 ~((unsigned HOST_WIDE_INT)-1
3985 << INTVAL (operands[3])))));
3989 (define_expand "maskload<mode>di"
3990 [(match_operand:V_MOV 0 "register_operand")
3991 (match_operand:V_MOV 1 "memory_operand")
3992 (match_operand 2 "")]
3995 rtx exec = force_reg (DImode, operands[2]);
3996 rtx addr = gcn_expand_scalar_to_vector_address
3997 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (<VnDI>mode));
3998 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
3999 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
4001 /* Masked lanes are required to hold zero. */
4002 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
4004 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v,
4005 operands[0], exec));
4009 (define_expand "maskstore<mode>di"
4010 [(match_operand:V_MOV 0 "memory_operand")
4011 (match_operand:V_MOV 1 "register_operand")
4012 (match_operand 2 "")]
4015 rtx exec = force_reg (DImode, operands[2]);
4016 rtx addr = gcn_expand_scalar_to_vector_address
4017 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (<VnDI>mode));
4018 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
4019 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
4020 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
4024 (define_expand "mask_gather_load<mode><vnsi>"
4025 [(match_operand:V_MOV 0 "register_operand")
4026 (match_operand:DI 1 "register_operand")
4027 (match_operand:<VnSI> 2 "register_operand")
4028 (match_operand 3 "immediate_operand")
4029 (match_operand:SI 4 "gcn_alu_operand")
4030 (match_operand:DI 5 "")]
4033 rtx exec = force_reg (DImode, operands[5]);
4035 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
4036 operands[2], operands[4],
4037 INTVAL (operands[3]), exec);
4039 /* Masked lanes are required to hold zero. */
4040 emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
4042 if (GET_MODE (addr) == <VnDI>mode)
4043 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
4044 const0_rtx, const0_rtx,
4045 const0_rtx, operands[0],
4048 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
4050 const0_rtx, const0_rtx,
4051 operands[0], exec));
4055 (define_expand "mask_scatter_store<mode><vnsi>"
4056 [(match_operand:DI 0 "register_operand")
4057 (match_operand:<VnSI> 1 "register_operand")
4058 (match_operand 2 "immediate_operand")
4059 (match_operand:SI 3 "gcn_alu_operand")
4060 (match_operand:V_MOV 4 "register_operand")
4061 (match_operand:DI 5 "")]
4064 rtx exec = force_reg (DImode, operands[5]);
4066 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
4067 operands[1], operands[3],
4068 INTVAL (operands[2]), exec);
4070 if (GET_MODE (addr) == <VnDI>mode)
4071 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
4072 operands[4], const0_rtx,
4076 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
4077 const0_rtx, operands[4],
4078 const0_rtx, const0_rtx,
4083 (define_code_iterator cond_op [plus minus mult])
4085 (define_expand "cond_<expander><mode>"
4086 [(match_operand:V_ALL 0 "register_operand")
4087 (match_operand:DI 1 "register_operand")
4089 (match_operand:V_ALL 2 "gcn_alu_operand")
4090 (match_operand:V_ALL 3 "gcn_alu_operand"))
4091 (match_operand:V_ALL 4 "register_operand")]
4094 operands[1] = force_reg (DImode, operands[1]);
4095 operands[2] = force_reg (<MODE>mode, operands[2]);
4097 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
4098 operands[3], operands[4],
4103 (define_code_iterator cond_fminmaxop [smin smax])
4105 (define_expand "cond_<fexpander><mode>"
4106 [(match_operand:V_FP 0 "register_operand")
4107 (match_operand:DI 1 "register_operand")
4108 (cond_fminmaxop:V_FP
4109 (match_operand:V_FP 2 "gcn_alu_operand")
4110 (match_operand:V_FP 3 "gcn_alu_operand"))
4111 (match_operand:V_FP 4 "register_operand")]
4114 operands[1] = force_reg (DImode, operands[1]);
4115 operands[2] = force_reg (<MODE>mode, operands[2]);
4117 emit_insn (gen_<fexpander><mode>3_exec (operands[0], operands[2],
4118 operands[3], operands[4],
4123 (define_code_iterator cond_minmaxop [smin smax umin umax])
4125 (define_expand "cond_<expander><mode>"
4126 [(match_operand:V_INT 0 "register_operand")
4127 (match_operand:DI 1 "register_operand")
4128 (cond_minmaxop:V_INT
4129 (match_operand:V_INT 2 "gcn_alu_operand")
4130 (match_operand:V_INT 3 "gcn_alu_operand"))
4131 (match_operand:V_INT 4 "register_operand")]
4134 operands[1] = force_reg (DImode, operands[1]);
4135 operands[2] = force_reg (<MODE>mode, operands[2]);
4136 rtx tmp = gen_reg_rtx (<MODE>mode);
4138 emit_insn (gen_<expander><mode>3_exec (tmp, operands[2], operands[3],
4139 gcn_gen_undef(<MODE>mode),
4141 emit_insn (gen_vcond_mask_<mode>di (operands[0], tmp, operands[4],
4146 (define_code_iterator cond_bitop [and ior xor])
4148 (define_expand "cond_<expander><mode>"
4149 [(match_operand:V_INT 0 "register_operand")
4150 (match_operand:DI 1 "register_operand")
4152 (match_operand:V_INT 2 "gcn_alu_operand")
4153 (match_operand:V_INT 3 "gcn_alu_operand"))
4154 (match_operand:V_INT 4 "register_operand")]
4157 operands[1] = force_reg (DImode, operands[1]);
4158 operands[2] = force_reg (<MODE>mode, operands[2]);
4160 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
4161 operands[3], operands[4],
4166 (define_code_iterator cond_shiftop [ashift lshiftrt ashiftrt])
4168 (define_expand "cond_<expander><mode>"
4169 [(match_operand:V_INT_noHI 0 "register_operand")
4170 (match_operand:DI 1 "register_operand")
4171 (cond_shiftop:V_INT_noHI
4172 (match_operand:V_INT_noHI 2 "gcn_alu_operand")
4173 (match_operand:V_INT_noHI 3 "gcn_alu_operand"))
4174 (match_operand:V_INT_noHI 4 "register_operand")]
4177 operands[1] = force_reg (DImode, operands[1]);
4178 operands[2] = force_reg (<MODE>mode, operands[2]);
4180 rtx shiftby = gen_reg_rtx (<VnSI>mode);
4181 convert_move (shiftby, operands[3], 0);
4183 emit_insn (gen_v<expander><mode>3_exec (operands[0], operands[2],
4184 shiftby, operands[4],
4190 ;; {{{ Vector reductions
4192 (define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
4193 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
4196 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
4198 (define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
4200 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
4202 ; FIXME: Isn't there a better way of doing this?
4203 (define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
4204 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
4205 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
4206 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
4207 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
4208 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
4209 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
4210 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
4212 (define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
4213 (UNSPEC_SMAX_DPP_SHR "smax")
4214 (UNSPEC_UMIN_DPP_SHR "umin")
4215 (UNSPEC_UMAX_DPP_SHR "umax")
4216 (UNSPEC_PLUS_DPP_SHR "plus")
4217 (UNSPEC_AND_DPP_SHR "and")
4218 (UNSPEC_IOR_DPP_SHR "ior")
4219 (UNSPEC_XOR_DPP_SHR "xor")])
4221 (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
4222 (UNSPEC_SMAX_DPP_SHR "v_max%i0")
4223 (UNSPEC_UMIN_DPP_SHR "v_min%u0")
4224 (UNSPEC_UMAX_DPP_SHR "v_max%u0")
4225 (UNSPEC_PLUS_DPP_SHR "v_add%U0")
4226 (UNSPEC_AND_DPP_SHR "v_and%B0")
4227 (UNSPEC_IOR_DPP_SHR "v_or%B0")
4228 (UNSPEC_XOR_DPP_SHR "v_xor%B0")])
4230 (define_expand "reduc_<reduc_op>_scal_<mode>"
4231 [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
4232 (unspec:<SCALAR_MODE>
4233 [(match_operand:V_ALL 1 "register_operand")]
4235 "!TARGET_WAVE64_COMPAT"
4237 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
4240 rtx last_lane = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
4241 emit_insn (gen_vec_extract<mode><scalar_mode> (operands[0], tmp,
4247 (define_expand "reduc_<fexpander>_scal_<mode>"
4248 [(match_operand:<SCALAR_MODE> 0 "register_operand")
4250 (match_operand:V_FP 1 "register_operand"))]
4251 "!TARGET_WAVE64_COMPAT"
4253 /* fmin/fmax are identical to smin/smax. */
4254 emit_insn (gen_reduc_<expander>_scal_<mode> (operands[0], operands[1]));
4258 ;; Warning: This "-ffast-math" implementation converts in-order reductions
4259 ;; into associative reductions. It's also used where OpenMP or
4260 ;; OpenACC paralellization has already broken the in-order semantics.
4261 (define_expand "fold_left_plus_<mode>"
4262 [(match_operand:<SCALAR_MODE> 0 "register_operand")
4263 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
4264 (match_operand:V_FP 2 "gcn_alu_operand")]
4265 "!TARGET_WAVE64_COMPAT
4266 && can_create_pseudo_p ()
4267 && (flag_openacc || flag_openmp
4268 || flag_associative_math)"
4270 rtx dest = operands[0];
4271 rtx scalar = operands[1];
4272 rtx vector = operands[2];
4273 rtx tmp = gen_reg_rtx (<SCALAR_MODE>mode);
4275 emit_insn (gen_reduc_plus_scal_<mode> (tmp, vector));
4276 emit_insn (gen_add<scalar_mode>3 (dest, scalar, tmp));
4280 (define_insn "*<reduc_op>_dpp_shr_<mode>"
4281 [(set (match_operand:V_1REG 0 "register_operand" "=v")
4283 [(match_operand:V_1REG 1 "register_operand" "v")
4284 (match_operand:V_1REG 2 "register_operand" "v")
4285 (match_operand:SI 3 "const_int_operand" "n")]
4289 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
4290 <reduc_unspec>, INTVAL (operands[3]));
4292 [(set_attr "type" "vop_dpp")
4293 (set_attr "length" "8")])
4295 (define_insn_and_split "*<reduc_op>_dpp_shr_<mode>"
4296 [(set (match_operand:V_DI 0 "register_operand" "=v")
4298 [(match_operand:V_DI 1 "register_operand" "v")
4299 (match_operand:V_DI 2 "register_operand" "v")
4300 (match_operand:SI 3 "const_int_operand" "n")]
4301 REDUC_2REG_UNSPEC))]
4307 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
4310 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
4312 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
4313 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
4314 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
4315 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
4316 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
4317 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
4319 [(set_attr "type" "vmult")
4320 (set_attr "length" "16")])
4322 ; Special cases for addition.
4324 (define_insn "*plus_carry_dpp_shr_<mode>"
4325 [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
4327 [(match_operand:V_INT_1REG 1 "register_operand" "v")
4328 (match_operand:V_INT_1REG 2 "register_operand" "v")
4329 (match_operand:SI 3 "const_int_operand" "n")]
4330 UNSPEC_PLUS_CARRY_DPP_SHR))
4331 (clobber (reg:DI VCC_REG))]
4334 return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add_co_u32",
4335 UNSPEC_PLUS_CARRY_DPP_SHR,
4336 INTVAL (operands[3]));
4338 [(set_attr "type" "vop_dpp")
4339 (set_attr "length" "8")])
4341 (define_insn "*plus_carry_in_dpp_shr_<mode>"
4342 [(set (match_operand:V_SI 0 "register_operand" "=v")
4344 [(match_operand:V_SI 1 "register_operand" "v")
4345 (match_operand:V_SI 2 "register_operand" "v")
4346 (match_operand:SI 3 "const_int_operand" "n")
4347 (match_operand:DI 4 "register_operand" "cV")]
4348 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
4349 (clobber (reg:DI VCC_REG))]
4352 return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc_co_u32",
4353 UNSPEC_PLUS_CARRY_IN_DPP_SHR,
4354 INTVAL (operands[3]));
4356 [(set_attr "type" "vop_dpp")
4357 (set_attr "length" "8")])
4359 (define_insn_and_split "*plus_carry_dpp_shr_<mode>"
4360 [(set (match_operand:V_DI 0 "register_operand" "=v")
4362 [(match_operand:V_DI 1 "register_operand" "v")
4363 (match_operand:V_DI 2 "register_operand" "v")
4364 (match_operand:SI 3 "const_int_operand" "n")]
4365 UNSPEC_PLUS_CARRY_DPP_SHR))
4366 (clobber (reg:DI VCC_REG))]
4370 [(parallel [(set (match_dup 4)
4372 [(match_dup 6) (match_dup 8) (match_dup 3)]
4373 UNSPEC_PLUS_CARRY_DPP_SHR))
4374 (clobber (reg:DI VCC_REG))])
4375 (parallel [(set (match_dup 5)
4377 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
4378 UNSPEC_PLUS_CARRY_IN_DPP_SHR))
4379 (clobber (reg:DI VCC_REG))])]
4381 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
4382 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
4383 operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
4384 operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
4385 operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
4386 operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
4388 [(set_attr "type" "vmult")
4389 (set_attr "length" "16")])
4392 ;; {{{ Miscellaneous
4394 (define_expand "vec_series<mode>"
4395 [(match_operand:V_SI 0 "register_operand")
4396 (match_operand:SI 1 "gcn_alu_operand")
4397 (match_operand:SI 2 "gcn_alu_operand")]
4400 rtx tmp = gen_reg_rtx (<MODE>mode);
4401 rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1));
4403 emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2]));
4404 emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1]));
4408 (define_expand "vec_series<mode>"
4409 [(match_operand:V_DI 0 "register_operand")
4410 (match_operand:DI 1 "gcn_alu_operand")
4411 (match_operand:DI 2 "gcn_alu_operand")]
4414 rtx tmp = gen_reg_rtx (<MODE>mode);
4415 rtx v1 = gen_rtx_REG (<VnSI>mode, VGPR_REGNO (1));
4416 rtx op1vec = gen_reg_rtx (<MODE>mode);
4418 emit_insn (gen_mul<mode>3_zext_dup2 (tmp, v1, operands[2]));
4419 emit_insn (gen_vec_duplicate<mode> (op1vec, operands[1]));
4420 emit_insn (gen_add<mode>3 (operands[0], tmp, op1vec));