1 ;; Machine description for NVPTX.
2 ;; Copyright (C) 2014-2025 Free Software Foundation, Inc.
3 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_c_enum "unspec" [
35 UNSPEC_FPINT_NEARBYINT
54 (define_c_enum "unspecv" [
65 UNSPECV_UNIFORM_WARP_CHECK
84 (define_attr "subregs_ok" "false,true"
85 (const_string "false"))
87 (define_attr "atomic" "false,true"
88 (const_string "false"))
90 ;; The nvptx operand predicates, in general, don't permit subregs and
91 ;; only literal constants, which differ from the generic ones, which
92 ;; permit subregs and symbolc constants (as appropriate)
93 (define_predicate "nvptx_register_operand"
96 return register_operand (op, mode);
99 (define_predicate "nvptx_register_or_complex_di_df_register_operand"
100 (ior (match_code "reg")
101 (match_code "concat"))
103 if (GET_CODE (op) == CONCAT)
104 return ((GET_MODE (op) == DCmode || GET_MODE (op) == CDImode)
105 && nvptx_register_operand (XEXP (op, 0), mode)
106 && nvptx_register_operand (XEXP (op, 1), mode));
108 return nvptx_register_operand (op, mode);
111 (define_predicate "nvptx_nonimmediate_operand"
112 (match_code "mem,reg")
114 return (REG_P (op) ? register_operand (op, mode)
115 : memory_operand (op, mode));
118 (define_predicate "nvptx_nonmemory_operand"
119 (match_code "reg,const_int,const_double")
121 return (REG_P (op) ? register_operand (op, mode)
122 : immediate_operand (op, mode));
125 (define_predicate "const0_operand"
126 (and (match_code "const_int")
127 (match_test "op == const0_rtx")))
129 ;; True if this operator is valid for predication.
130 (define_predicate "predicate_operator"
131 (match_code "eq,ne"))
133 (define_predicate "ne_operator"
136 (define_predicate "nvptx_comparison_operator"
137 (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu"))
139 (define_predicate "nvptx_float_comparison_operator"
140 (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered"))
142 (define_predicate "nvptx_vector_index_operand"
143 (and (match_code "const_int")
144 (match_test "UINTVAL (op) < 4")))
146 ;; Test for a valid operand for a call instruction.
147 (define_predicate "call_insn_operand"
148 (match_code "symbol_ref,reg")
150 return REG_P (op) || SYMBOL_REF_FUNCTION_P (op);
153 ;; Return true if OP is a call with parallel USEs of the argument
155 (define_predicate "call_operation"
156 (match_code "parallel")
158 int arg_end = XVECLEN (op, 0);
160 for (int i = 1; i < arg_end; i++)
162 rtx elt = XVECEXP (op, 0, i);
164 if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0)))
170 ;; Test for a function symbol ref operand
171 (define_predicate "symbol_ref_function_operand"
172 (match_code "symbol_ref")
174 return SYMBOL_REF_FUNCTION_P (op);
177 (define_attr "predicable" "no,yes"
178 (const_string "yes"))
181 [(match_operator 0 "predicate_operator"
182 [(match_operand:BI 1 "nvptx_register_operand" "")
183 (match_operand:BI 2 "const0_operand" "")])]
188 (define_constraint "P0"
189 "An integer with the value 0."
190 (and (match_code "const_int")
191 (match_test "ival == 0")))
193 (define_constraint "P1"
194 "An integer with the value 1."
195 (and (match_code "const_int")
196 (match_test "ival == 1")))
198 (define_constraint "Pn"
199 "An integer with the value -1."
200 (and (match_code "const_int")
201 (match_test "ival == -1")))
203 (define_constraint "R"
207 (define_constraint "Ia"
208 "Any integer constant."
209 (and (match_code "const_int") (match_test "true")))
211 (define_mode_iterator QHSDISDFM [QI HI SI DI SF DF])
212 (define_mode_iterator QHSDIM [QI HI SI DI])
213 (define_mode_iterator HSDIM [HI SI DI])
214 (define_mode_iterator BHSDIM [BI HI SI DI])
215 (define_mode_iterator SDIM [SI DI])
216 (define_mode_iterator SDISDFM [SI DI SF DF])
217 (define_mode_iterator QHIM [QI HI])
218 (define_mode_iterator QHSIM [QI HI SI])
219 (define_mode_iterator SDFM [SF DF])
220 (define_mode_iterator HSFM [HF SF])
221 (define_mode_iterator SDCM [SC DC])
222 (define_mode_iterator BITS [SI SF])
223 (define_mode_iterator BITD [DI DF])
224 (define_mode_iterator VECIM [V2SI V2DI])
226 ;; This mode iterator allows :P to be used for patterns that operate on
227 ;; pointer-sized quantities. Exactly one of the two alternatives will match.
228 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
230 ;; Define element mode for each vector mode.
231 (define_mode_attr VECELEM [(V2SI "SI") (V2DI "DI")])
232 (define_mode_attr Vecelem [(V2SI "si") (V2DI "di")])
234 ;; We should get away with not defining memory alternatives, since we don't
235 ;; get variables in this mode and pseudos are never spilled.
237 [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R")
238 (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,P1"))]
241 %.\\tmov%t0\\t%0, %1;
242 %.\\tsetp.eq.u32\\t%0, 1, 0;
243 %.\\tsetp.eq.u32\\t%0, 1, 1;")
245 (define_insn "*mov<mode>_insn"
246 [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m")
247 (match_operand:VECIM 1 "general_operand" "Ri,m,R"))]
248 "!MEM_P (operands[0]) || REG_P (operands[1])"
250 if (which_alternative == 1)
251 return "%.\\tld%A1%u1\\t%0, %1;";
252 if (which_alternative == 2)
253 return "%.\\tst%A0%u0\\t%0, %1;";
255 return nvptx_output_mov_insn (operands[0], operands[1]);
257 [(set_attr "subregs_ok" "true")])
259 (define_insn "*mov<mode>_insn"
260 [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m")
261 (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))]
262 "!MEM_P (operands[0]) || REG_P (operands[1])"
264 if (which_alternative == 1)
265 return "%.\\tld%A1%u1\\t%0, %1;";
266 if (which_alternative == 2)
267 return "%.\\tst%A0%u0\\t%0, %1;";
269 return nvptx_output_mov_insn (operands[0], operands[1]);
271 [(set_attr "subregs_ok" "true")])
273 ;; ptxas segfaults on 'mov.u64 %r24,bar+4096', so break it up.
275 [(set (match_operand:DI 0 "nvptx_register_operand")
276 (const:DI (plus:DI (match_operand:DI 1 "symbol_ref_function_operand")
277 (match_operand 2 "const_int_operand"))))]
279 [(set (match_dup 0) (match_dup 1))
280 (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))
284 (define_insn "*mov<mode>_insn"
285 [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
286 (match_operand:SDFM 1 "general_operand" "RF,m,R"))]
287 "!MEM_P (operands[0]) || REG_P (operands[1])"
289 if (which_alternative == 1)
290 return "%.\\tld%A1%u0\\t%0, %1;";
291 if (which_alternative == 2)
292 return "%.\\tst%A0%u1\\t%0, %1;";
294 return nvptx_output_mov_insn (operands[0], operands[1]);
296 [(set_attr "subregs_ok" "true")])
298 (define_insn "*movhf_insn"
299 [(set (match_operand:HF 0 "nonimmediate_operand" "=R,R,m")
300 (match_operand:HF 1 "nonimmediate_operand" "R,m,R"))]
301 "!MEM_P (operands[0]) || REG_P (operands[1])"
303 %.\\tmov.b16\\t%0, %1;
304 %.\\tld.b16\\t%0, %1;
305 %.\\tst.b16\\t%0, %1;"
306 [(set_attr "subregs_ok" "true")])
308 (define_expand "movhf"
309 [(set (match_operand:HF 0 "nonimmediate_operand" "")
310 (match_operand:HF 1 "nonimmediate_operand" ""))]
313 /* Load HFmode constants as SFmode with an explicit FLOAT_TRUNCATE. */
314 if (CONST_DOUBLE_P (operands[1]))
316 rtx tmp1 = gen_reg_rtx (SFmode);
317 REAL_VALUE_TYPE d = *CONST_DOUBLE_REAL_VALUE (operands[1]);
318 real_convert (&d, SFmode, &d);
319 emit_move_insn (tmp1, const_double_from_real_value (d, SFmode));
321 if (!REG_P (operands[0]))
323 rtx tmp2 = gen_reg_rtx (HFmode);
324 emit_insn (gen_truncsfhf2 (tmp2, tmp1));
325 emit_move_insn (operands[0], tmp2);
328 emit_insn (gen_truncsfhf2 (operands[0], tmp1));
332 if (MEM_P (operands[0]) && !REG_P (operands[1]))
334 rtx tmp = gen_reg_rtx (HFmode);
335 emit_move_insn (tmp, operands[1]);
336 emit_move_insn (operands[0], tmp);
341 (define_insn "load_arg_reg<mode>"
342 [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R")
343 (unspec:QHIM [(match_operand 1 "const_int_operand" "n")]
346 "%.\\tcvt%t0.u32\\t%0, %%ar%1;")
348 (define_insn "load_arg_reg<mode>"
349 [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
350 (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")]
353 "%.\\tmov%t0\\t%0, %%ar%1;")
355 (define_expand "mov<mode>"
356 [(set (match_operand:VECIM 0 "nonimmediate_operand" "")
357 (match_operand:VECIM 1 "general_operand" ""))]
360 if (MEM_P (operands[0]) && !REG_P (operands[1]))
362 rtx tmp = gen_reg_rtx (<MODE>mode);
363 emit_move_insn (tmp, operands[1]);
364 emit_move_insn (operands[0], tmp);
369 (define_expand "mov<mode>"
370 [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "")
371 (match_operand:QHSDISDFM 1 "general_operand" ""))]
374 if (MEM_P (operands[0]) && !REG_P (operands[1]))
376 rtx tmp = gen_reg_rtx (<MODE>mode);
377 emit_move_insn (tmp, operands[1]);
378 emit_move_insn (operands[0], tmp);
382 if (GET_CODE (operands[1]) == LABEL_REF)
383 sorry ("target cannot support label values");
386 (define_insn "zero_extendqihi2"
387 [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
388 (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))]
391 %.\\tcvt.u16.u%T1\\t%0, %1;
392 %.\\tld%A1.u8\\t%0, %1;"
393 [(set_attr "subregs_ok" "true")])
395 (define_insn "zero_extend<mode>si2"
396 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
397 (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
400 %.\\tcvt.u32.u%T1\\t%0, %1;
401 %.\\tld%A1.u%T1\\t%0, %1;"
402 [(set_attr "subregs_ok" "true")])
404 (define_insn "zero_extend<mode>di2"
405 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
406 (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
409 %.\\tcvt.u64.u%T1\\t%0, %1;
410 %.\\tld%A1%u1\\t%0, %1;"
411 [(set_attr "subregs_ok" "true")])
413 (define_insn "extendqihi2"
414 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
415 (sign_extend:HI (match_operand:QI 1 "nvptx_register_operand" "R")))]
417 "%.\\tcvt.s16.s8\\t%0, %1;"
418 [(set_attr "subregs_ok" "true")])
420 (define_insn "extend<mode>si2"
421 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
422 (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
425 %.\\tcvt.s32.s%T1\\t%0, %1;
426 %.\\tld%A1.s%T1\\t%0, %1;"
427 [(set_attr "subregs_ok" "true")])
429 (define_insn "extend<mode>di2"
430 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
431 (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
434 %.\\tcvt.s64.s%T1\\t%0, %1;
435 %.\\tld%A1.s%T1\\t%0, %1;"
436 [(set_attr "subregs_ok" "true")])
438 (define_insn "trunchiqi2"
439 [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m")
440 (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))]
443 %.\\tcvt%t0.u16\\t%0, %1;
444 %.\\tst%A0.u8\\t%0, %1;"
445 [(set_attr "subregs_ok" "true")])
447 (define_insn "truncsi<mode>2"
448 [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
449 (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
452 if (which_alternative == 1)
453 return "%.\\tst%A0.u%T0\\t%0, %1;";
454 if (GET_MODE (operands[0]) == QImode)
455 return "%.\\tmov%t0\\t%0, %1;";
456 return "%.\\tcvt%t0.u32\\t%0, %1;";
458 [(set_attr "subregs_ok" "true")])
460 (define_insn "truncdi<mode>2"
461 [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m")
462 (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))]
465 %.\\tcvt%t0.u64\\t%0, %1;
466 %.\\tst%A0.u%T0\\t%0, %1;"
467 [(set_attr "subregs_ok" "true")])
469 ;; Sign-extensions of truncations
471 (define_insn "*extend_trunc_<mode>2_qi"
472 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
474 (truncate:QI (match_operand:HSDIM 1 "nvptx_register_operand" "R"))))]
476 "%.\\tcvt.s%T0.s8\\t%0, %1;"
477 [(set_attr "subregs_ok" "true")])
479 (define_insn "*extend_trunc_<mode>2_hi"
480 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
482 (truncate:HI (match_operand:SDIM 1 "nvptx_register_operand" "R"))))]
484 "%.\\tcvt.s%T0.s16\\t%0, %1;"
485 [(set_attr "subregs_ok" "true")])
487 (define_insn "*extend_trunc_di2_si"
488 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
490 (truncate:SI (match_operand:DI 1 "nvptx_register_operand" "R"))))]
492 "%.\\tcvt.s64.s32\\t%0, %1;"
493 [(set_attr "subregs_ok" "true")])
495 ;; Integer arithmetic
497 (define_insn "add<mode>3"
498 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
499 (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
500 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
502 "%.\\tadd%t0\\t%0, %1, %2;")
504 (define_insn "*vadd_addsi4"
505 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
506 (plus:SI (plus:SI (match_operand:SI 1 "nvptx_register_operand" "R")
507 (match_operand:SI 2 "nvptx_register_operand" "R"))
508 (match_operand:SI 3 "nvptx_register_operand" "R")))]
510 "%.\\tvadd%t0%t1%t2.add\\t%0, %1, %2, %3;")
512 (define_insn "*vsub_addsi4"
513 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
514 (plus:SI (minus:SI (match_operand:SI 1 "nvptx_register_operand" "R")
515 (match_operand:SI 2 "nvptx_register_operand" "R"))
516 (match_operand:SI 3 "nvptx_register_operand" "R")))]
518 "%.\\tvsub%t0%t1%t2.add\\t%0, %1, %2, %3;")
520 (define_insn "sub<mode>3"
521 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
522 (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
523 (match_operand:HSDIM 2 "nvptx_register_operand" "R")))]
526 if (GET_MODE (operands[0]) == HImode)
527 /* Workaround https://developer.nvidia.com/nvidia_bug/3527713.
529 return "%.\\tsub.s16\\t%0, %1, %2;";
531 return "%.\\tsub%t0\\t%0, %1, %2;";
534 (define_insn "mul<mode>3"
535 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
536 (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
537 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
539 "%.\\tmul.lo%t0\\t%0, %1, %2;")
541 (define_insn "*mad<mode>3"
542 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
543 (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
544 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri"))
545 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
547 "%.\\tmad.lo%t0\\t%0, %1, %2, %3;")
549 (define_insn "div<mode>3"
550 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
551 (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
552 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
554 "%.\\tdiv.s%T0\\t%0, %1, %2;")
556 (define_insn "udiv<mode>3"
557 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
558 (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
559 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
561 "%.\\tdiv.u%T0\\t%0, %1, %2;")
563 (define_insn "mod<mode>3"
564 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
565 (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
566 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
568 "%.\\trem.s%T0\\t%0, %1, %2;")
570 (define_insn "umod<mode>3"
571 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
572 (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
573 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
575 "%.\\trem.u%T0\\t%0, %1, %2;")
577 (define_insn "smin<mode>3"
578 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
579 (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
580 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
582 "%.\\tmin.s%T0\\t%0, %1, %2;")
584 (define_insn "umin<mode>3"
585 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
586 (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
587 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
589 "%.\\tmin.u%T0\\t%0, %1, %2;")
591 (define_insn "smax<mode>3"
592 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
593 (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
594 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
596 "%.\\tmax.s%T0\\t%0, %1, %2;")
598 (define_insn "umax<mode>3"
599 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
600 (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
601 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
603 "%.\\tmax.u%T0\\t%0, %1, %2;")
605 (define_insn "abs<mode>2"
606 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
607 (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
609 "%.\\tabs.s%T0\\t%0, %1;")
611 (define_insn "neg<mode>2"
612 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
613 (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
615 "%.\\tneg.s%T0\\t%0, %1;")
617 (define_insn "one_cmpl<mode>2"
618 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
619 (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
621 "%.\\tnot.b%T0\\t%0, %1;")
623 (define_insn "one_cmplbi2"
624 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
625 (not:BI (match_operand:BI 1 "nvptx_register_operand" "R")))]
627 "%.\\tnot.pred\\t%0, %1;")
629 (define_insn "*cnot<mode>2"
630 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
631 (eq:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
634 "%.\\tcnot.b%T0\\t%0, %1;")
636 (define_insn "bitrev<mode>2"
637 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
638 (bitreverse:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
640 "%.\\tbrev.b%T0\\t%0, %1;")
642 (define_insn "clz<mode>2"
643 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
644 (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
646 "%.\\tclz.b%T1\\t%0, %1;")
648 (define_expand "ctz<mode>2"
649 [(set (match_operand:SI 0 "nvptx_register_operand" "")
650 (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))]
653 rtx tmpreg = gen_reg_rtx (<MODE>mode);
654 emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1]));
655 emit_insn (gen_clz<mode>2 (operands[0], tmpreg));
659 (define_insn "popcountsi2"
660 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
661 (popcount:SI (match_operand:SI 1 "nvptx_register_operand" "R")))]
663 "%.\\tpopc.b32\\t%0, %1;")
665 (define_insn "popcountdi2"
666 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
668 (popcount:DI (match_operand:DI 1 "nvptx_register_operand" "R"))))]
670 "%.\\tpopc.b64\\t%0, %1;")
672 ;; Multiplication variants
674 (define_insn "mulhisi3"
675 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
676 (mult:SI (sign_extend:SI
677 (match_operand:HI 1 "nvptx_register_operand" "R"))
679 (match_operand:HI 2 "nvptx_register_operand" "R"))))]
681 "%.\\tmul.wide.s16\\t%0, %1, %2;")
683 (define_insn "mulsidi3"
684 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
685 (mult:DI (sign_extend:DI
686 (match_operand:SI 1 "nvptx_register_operand" "R"))
688 (match_operand:SI 2 "nvptx_register_operand" "R"))))]
690 "%.\\tmul.wide.s32\\t%0, %1, %2;")
692 (define_insn "umulhisi3"
693 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
694 (mult:SI (zero_extend:SI
695 (match_operand:HI 1 "nvptx_register_operand" "R"))
697 (match_operand:HI 2 "nvptx_register_operand" "R"))))]
699 "%.\\tmul.wide.u16\\t%0, %1, %2;")
701 (define_insn "umulsidi3"
702 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
703 (mult:DI (zero_extend:DI
704 (match_operand:SI 1 "nvptx_register_operand" "R"))
706 (match_operand:SI 2 "nvptx_register_operand" "R"))))]
708 "%.\\tmul.wide.u32\\t%0, %1, %2;")
710 (define_expand "mulditi3"
711 [(set (match_operand:TI 0 "nvptx_register_operand")
712 (mult:TI (sign_extend:TI
713 (match_operand:DI 1 "nvptx_register_operand"))
715 (match_operand:DI 2 "nvptx_nonmemory_operand"))))]
718 rtx hi = gen_reg_rtx (DImode);
719 rtx lo = gen_reg_rtx (DImode);
720 emit_insn (gen_smuldi3_highpart (hi, operands[1], operands[2]));
721 emit_insn (gen_muldi3 (lo, operands[1], operands[2]));
722 emit_move_insn (gen_highpart (DImode, operands[0]), hi);
723 emit_move_insn (gen_lowpart (DImode, operands[0]), lo);
727 (define_expand "umulditi3"
728 [(set (match_operand:TI 0 "nvptx_register_operand")
729 (mult:TI (zero_extend:TI
730 (match_operand:DI 1 "nvptx_register_operand"))
732 (match_operand:DI 2 "nvptx_nonmemory_operand"))))]
735 rtx hi = gen_reg_rtx (DImode);
736 rtx lo = gen_reg_rtx (DImode);
737 emit_insn (gen_umuldi3_highpart (hi, operands[1], operands[2]));
738 emit_insn (gen_muldi3 (lo, operands[1], operands[2]));
739 emit_move_insn (gen_highpart (DImode, operands[0]), hi);
740 emit_move_insn (gen_lowpart (DImode, operands[0]), lo);
744 (define_insn "smul<mode>3_highpart"
745 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
747 (match_operand:HSDIM 1 "nvptx_register_operand" "R")
748 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
750 "%.\\tmul.hi.s%T0\\t%0, %1, %2;")
752 (define_insn "umul<mode>3_highpart"
753 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
755 (match_operand:HSDIM 1 "nvptx_register_operand" "R")
756 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
758 "%.\\tmul.hi.u%T0\\t%0, %1, %2;")
760 (define_insn "*smulhi3_highpart_2"
761 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
764 (mult:SI (sign_extend:SI
765 (match_operand:HI 1 "nvptx_register_operand" "R"))
767 (match_operand:HI 2 "nvptx_register_operand" "R")))
770 "%.\\tmul.hi.s16\\t%0, %1, %2;")
772 (define_insn "*smulsi3_highpart_2"
773 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
776 (mult:DI (sign_extend:DI
777 (match_operand:SI 1 "nvptx_register_operand" "R"))
779 (match_operand:SI 2 "nvptx_register_operand" "R")))
782 "%.\\tmul.hi.s32\\t%0, %1, %2;")
784 (define_insn "*umulhi3_highpart_2"
785 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
788 (mult:SI (zero_extend:SI
789 (match_operand:HI 1 "nvptx_register_operand" "R"))
791 (match_operand:HI 2 "nvptx_register_operand" "R")))
794 "%.\\tmul.hi.u16\\t%0, %1, %2;")
796 (define_insn "*umulsi3_highpart_2"
797 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
800 (mult:DI (zero_extend:DI
801 (match_operand:SI 1 "nvptx_register_operand" "R"))
803 (match_operand:SI 2 "nvptx_register_operand" "R")))
806 "%.\\tmul.hi.u32\\t%0, %1, %2;")
810 (define_insn "ashl<mode>3"
811 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
812 (ashift:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
813 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
815 "%.\\tshl.b%T0\\t%0, %1, %2;")
817 (define_insn "ashr<mode>3"
818 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
819 (ashiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
820 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
822 "%.\\tshr.s%T0\\t%0, %1, %2;")
824 (define_insn "lshr<mode>3"
825 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
826 (lshiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
827 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
829 "%.\\tshr.u%T0\\t%0, %1, %2;")
831 (define_insn "rotlsi3"
832 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
833 (rotate:SI (match_operand:SI 1 "nvptx_register_operand" "R")
834 (and:SI (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
837 "%.\\tshf.l.wrap.b32\\t%0, %1, %1, %2;")
839 (define_insn "rotrsi3"
840 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
841 (rotatert:SI (match_operand:SI 1 "nvptx_register_operand" "R")
842 (and:SI (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
845 "%.\\tshf.r.wrap.b32\\t%0, %1, %1, %2;")
847 ;; Logical operations
849 (define_code_iterator any_logic [and ior xor])
850 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
851 (define_code_attr ilogic [(and "and") (ior "ior") (xor "xor")])
853 (define_insn "<ilogic><mode>3"
854 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
856 (match_operand:HSDIM 1 "nvptx_register_operand" "R")
857 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
859 "%.\\t<logic>.b%T0\\t%0, %1, %2;")
861 (define_insn "<ilogic>bi3"
862 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
863 (any_logic:BI (match_operand:BI 1 "nvptx_register_operand" "R")
864 (match_operand:BI 2 "nvptx_register_operand" "R")))]
866 "%.\\t<logic>.pred\\t%0, %1, %2;")
869 [(set (match_operand:HSDIM 0 "nvptx_register_operand")
871 (ne:HSDIM (match_operand:BI 1 "nvptx_register_operand")
873 (ne:HSDIM (match_operand:BI 2 "nvptx_register_operand")
875 "can_create_pseudo_p ()"
876 [(set (match_dup 3) (any_logic:BI (match_dup 1) (match_dup 2)))
877 (set (match_dup 0) (ne:HSDIM (match_dup 3) (const_int 0)))]
879 operands[3] = gen_reg_rtx (BImode);
882 ;; Comparisons and branches
884 (define_insn "cmp<mode>"
885 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
886 (match_operator:BI 1 "nvptx_comparison_operator"
887 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
888 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
890 "%.\\tsetp%c1\\t%0, %2, %3;")
892 (define_insn "*cmp<mode>"
893 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
894 (match_operator:BI 1 "nvptx_float_comparison_operator"
895 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
896 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
898 "%.\\tsetp%c1\\t%0, %2, %3;")
900 (define_insn "*cmphf"
901 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
902 (match_operator:BI 1 "nvptx_float_comparison_operator"
903 [(match_operand:HF 2 "nvptx_register_operand" "R")
904 (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")]))]
906 "%.\\tsetp%c1\\t%0, %2, %3;")
910 (label_ref (match_operand 0 "" "")))]
914 (define_insn "br_true"
916 (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R")
918 (label_ref (match_operand 1 "" ""))
922 [(set_attr "predicable" "no")])
924 (define_insn "br_false"
926 (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R")
928 (label_ref (match_operand 1 "" ""))
932 [(set_attr "predicable" "no")])
934 ;; unified conditional branch
935 (define_insn "br_true_uni"
936 [(set (pc) (if_then_else
937 (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
938 UNSPEC_BR_UNIFIED) (const_int 0))
939 (label_ref (match_operand 1 "" "")) (pc)))]
941 "%j0\\tbra.uni\\t%l1;"
942 [(set_attr "predicable" "no")])
944 (define_insn "br_false_uni"
945 [(set (pc) (if_then_else
946 (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
947 UNSPEC_BR_UNIFIED) (const_int 0))
948 (label_ref (match_operand 1 "" "")) (pc)))]
950 "%J0\\tbra.uni\\t%l1;"
951 [(set_attr "predicable" "no")])
953 (define_expand "cbranch<mode>4"
955 (if_then_else (match_operator 0 "nvptx_comparison_operator"
956 [(match_operand:HSDIM 1 "nvptx_register_operand" "")
957 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")])
958 (label_ref (match_operand 3 "" ""))
962 rtx t = nvptx_expand_compare (operands[0]);
964 operands[1] = XEXP (t, 0);
965 operands[2] = XEXP (t, 1);
968 (define_expand "cbranch<mode>4"
970 (if_then_else (match_operator 0 "nvptx_float_comparison_operator"
971 [(match_operand:SDFM 1 "nvptx_register_operand" "")
972 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")])
973 (label_ref (match_operand 3 "" ""))
977 rtx t = nvptx_expand_compare (operands[0]);
979 operands[1] = XEXP (t, 0);
980 operands[2] = XEXP (t, 1);
983 (define_expand "cbranchbi4"
985 (if_then_else (match_operator 0 "predicate_operator"
986 [(match_operand:BI 1 "nvptx_register_operand" "")
987 (match_operand:BI 2 "const0_operand" "")])
988 (label_ref (match_operand 3 "" ""))
993 ;; Conditional stores
995 (define_insn "setcc<mode>_from_bi"
996 [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R")
997 (ne:QHSDIM (match_operand:BI 1 "nvptx_register_operand" "R")
1000 "%.\\tselp%t0\\t%0, 1, 0, %1;")
1002 (define_insn "*setcc<mode>_from_not_bi"
1003 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1004 (eq:HSDIM (match_operand:BI 1 "nvptx_register_operand" "R")
1007 "%.\\tselp%t0\\t%0, 0, 1, %1;")
1009 (define_insn "extendbi<mode>2"
1010 [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R")
1012 (match_operand:BI 1 "nvptx_register_operand" "R")))]
1014 "%.\\tselp%t0\\t%0, -1, 0, %1;")
1016 (define_insn "zero_extendbi<mode>2"
1017 [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R")
1019 (match_operand:BI 1 "nvptx_register_operand" "R")))]
1021 "%.\\tselp%t0\\t%0, 1, 0, %1;")
1023 (define_insn "sel_true<mode>"
1024 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1026 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
1027 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
1028 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
1030 "%.\\tselp%t0\\t%0, %2, %3, %1;")
1032 (define_insn "sel_true<mode>"
1033 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1035 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
1036 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
1037 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
1039 "%.\\tselp%t0\\t%0, %2, %3, %1;")
1041 (define_insn "sel_false<mode>"
1042 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1044 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
1045 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
1046 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
1048 "%.\\tselp%t0\\t%0, %3, %2, %1;")
1050 (define_insn "sel_false<mode>"
1051 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1053 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
1054 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
1055 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
1057 "%.\\tselp%t0\\t%0, %3, %2, %1;")
1059 (define_code_iterator eqne [eq ne])
1061 ;; Split negation of a predicate into a conditional move.
1062 (define_insn_and_split "*selp<mode>_neg_<code>"
1063 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1064 (neg:HSDIM (eqne:HSDIM
1065 (match_operand:BI 1 "nvptx_register_operand" "R")
1072 (eqne (match_dup 1) (const_int 0))
1076 ;; Split bitwise not of a predicate into a conditional move.
1077 (define_insn_and_split "*selp<mode>_not_<code>"
1078 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
1079 (not:HSDIM (eqne:HSDIM
1080 (match_operand:BI 1 "nvptx_register_operand" "R")
1087 (eqne (match_dup 1) (const_int 0))
1091 (define_insn "*setcc_int<mode>"
1092 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1094 (match_operator:SI 1 "nvptx_comparison_operator"
1095 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
1096 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")])))]
1098 "%.\\tset%t0%c1\\t%0, %2, %3;")
1100 (define_insn "*setcc_int<mode>"
1101 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1103 (match_operator:SI 1 "nvptx_float_comparison_operator"
1104 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
1105 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")])))]
1107 "%.\\tset%t0%c1\\t%0, %2, %3;")
1109 (define_insn "setcc_float<mode>"
1110 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1111 (match_operator:SF 1 "nvptx_comparison_operator"
1112 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
1113 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
1115 "%.\\tset%t0%c1\\t%0, %2, %3;")
1117 (define_insn "setcc_float<mode>"
1118 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1119 (match_operator:SF 1 "nvptx_float_comparison_operator"
1120 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
1121 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
1123 "%.\\tset%t0%c1\\t%0, %2, %3;")
1125 (define_expand "cstore<mode>4"
1126 [(set (match_operand:SI 0 "nvptx_register_operand")
1127 (match_operator:SI 1 "nvptx_comparison_operator"
1128 [(match_operand:HSDIM 2 "nvptx_register_operand")
1129 (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
1132 rtx reg = gen_reg_rtx (BImode);
1133 rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
1134 operands[2], operands[3]);
1135 emit_move_insn (reg, cmp);
1136 emit_insn (gen_setccsi_from_bi (operands[0], reg));
1140 (define_expand "cstore<mode>4"
1141 [(set (match_operand:SI 0 "nvptx_register_operand")
1142 (match_operator:SI 1 "nvptx_float_comparison_operator"
1143 [(match_operand:SDFM 2 "nvptx_register_operand")
1144 (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
1147 rtx reg = gen_reg_rtx (BImode);
1148 rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
1149 operands[2], operands[3]);
1150 emit_move_insn (reg, cmp);
1151 emit_insn (gen_setccsi_from_bi (operands[0], reg));
1155 (define_expand "cstorehf4"
1156 [(set (match_operand:SI 0 "nvptx_register_operand")
1157 (match_operator:SI 1 "nvptx_float_comparison_operator"
1158 [(match_operand:HF 2 "nvptx_register_operand")
1159 (match_operand:HF 3 "nvptx_nonmemory_operand")]))]
1162 rtx reg = gen_reg_rtx (BImode);
1163 rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
1164 operands[2], operands[3]);
1165 emit_move_insn (reg, cmp);
1166 emit_insn (gen_setccsi_from_bi (operands[0], reg));
1172 (define_insn "call_insn_<mode>"
1173 [(match_parallel 2 "call_operation"
1174 [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Rs"))
1175 (match_operand 1))])]
1178 return nvptx_output_call_insn (insn, NULL_RTX, operands[0]);
1181 (define_insn "call_value_insn_<mode>"
1182 [(match_parallel 3 "call_operation"
1183 [(set (match_operand 0 "nvptx_register_operand" "=R")
1184 (call (mem:QI (match_operand:P 1 "call_insn_operand" "Rs"))
1185 (match_operand 2)))])]
1188 return nvptx_output_call_insn (insn, operands[0], operands[1]);
1191 (define_expand "call"
1192 [(match_operand 0 "" "")]
1195 nvptx_expand_call (NULL_RTX, operands[0]);
1199 (define_expand "call_value"
1200 [(match_operand 0 "" "")
1201 (match_operand 1 "" "")]
1204 nvptx_expand_call (operands[0], operands[1]);
1208 ;; Floating point arithmetic.
1210 (define_insn "add<mode>3"
1211 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1212 (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1213 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1215 "%.\\tadd%t0\\t%0, %1, %2;")
1217 (define_insn "sub<mode>3"
1218 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1219 (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1220 (match_operand:SDFM 2 "nvptx_register_operand" "R")))]
1222 "%.\\tsub%t0\\t%0, %1, %2;")
1224 (define_insn "mul<mode>3"
1225 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1226 (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1227 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1229 "%.\\tmul%t0\\t%0, %1, %2;")
1231 (define_insn "fma<mode>4"
1232 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1233 (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1234 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
1235 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
1237 "%.\\tfma%#%t0\\t%0, %1, %2, %3;")
1239 (define_insn "*recip<mode>2"
1240 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1242 (match_operand:SDFM 2 "const_double_operand" "F")
1243 (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1244 "CONST_DOUBLE_P (operands[2])
1245 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[2]), &dconst1)"
1246 "%.\\trcp%#%t0\\t%0, %1;")
1248 (define_insn "div<mode>3"
1249 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1250 (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1251 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1253 "%.\\tdiv%#%t0\\t%0, %1, %2;")
1255 (define_insn "copysign<mode>3"
1256 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1257 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_nonmemory_operand" "RF")
1258 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")]
1261 "%.\\tcopysign%t0\\t%0, %2, %1;")
1263 (define_insn "smin<mode>3"
1264 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1265 (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1266 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1268 "%.\\tmin%t0\\t%0, %1, %2;")
1270 (define_insn "smax<mode>3"
1271 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1272 (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
1273 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
1275 "%.\\tmax%t0\\t%0, %1, %2;")
1277 (define_insn "abs<mode>2"
1278 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1279 (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1281 "%.\\tabs%t0\\t%0, %1;")
1283 (define_insn "neg<mode>2"
1284 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1285 (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1287 "%.\\tneg%t0\\t%0, %1;")
1289 (define_insn "sqrt<mode>2"
1290 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1291 (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1293 "%.\\tsqrt%#%t0\\t%0, %1;")
1295 (define_expand "sincossf3"
1296 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1297 (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")]
1299 (set (match_operand:SF 1 "nvptx_register_operand" "=R")
1300 (unspec:SF [(match_dup 2)] UNSPEC_SIN))]
1301 "flag_unsafe_math_optimizations"
1303 operands[2] = make_safe_from (operands[2], operands[0]);
1306 (define_insn "sinsf2"
1307 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1308 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1310 "flag_unsafe_math_optimizations"
1311 "%.\\tsin.approx%t0\\t%0, %1;")
1313 (define_insn "cossf2"
1314 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1315 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1317 "flag_unsafe_math_optimizations"
1318 "%.\\tcos.approx%t0\\t%0, %1;")
1320 (define_insn "log2sf2"
1321 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1322 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1324 "flag_unsafe_math_optimizations"
1325 "%.\\tlg2.approx%t0\\t%0, %1;")
1327 (define_insn "exp2sf2"
1328 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1329 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1331 "flag_unsafe_math_optimizations"
1332 "%.\\tex2.approx%t0\\t%0, %1;")
1334 (define_insn "setcc_isinf<mode>"
1335 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
1336 (unspec:BI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1339 "%.\\ttestp.infinite%t1\\t%0, %1;")
1341 (define_expand "isinf<mode>2"
1342 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1343 (unspec:SI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1347 rtx pred = gen_reg_rtx (BImode);
1348 emit_insn (gen_setcc_isinf<mode> (pred, operands[1]));
1349 emit_insn (gen_setccsi_from_bi (operands[0], pred));
1353 ;; HFmode floating point arithmetic.
1355 (define_insn "addhf3"
1356 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1357 (plus:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1358 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1360 "%.\\tadd.f16\\t%0, %1, %2;")
1362 (define_insn "subhf3"
1363 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1364 (minus:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1365 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1367 "%.\\tsub.f16\\t%0, %1, %2;")
1369 (define_insn "mulhf3"
1370 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1371 (mult:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1372 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1374 "%.\\tmul.f16\\t%0, %1, %2;")
1376 (define_insn "fmahf4"
1377 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1378 (fma:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1379 (match_operand:HF 2 "nvptx_nonmemory_operand" "RF")
1380 (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")))]
1382 "%.\\tfma%#.f16\\t%0, %1, %2, %3;")
1384 (define_insn "neghf2"
1385 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1386 (neg:HF (match_operand:HF 1 "nvptx_register_operand" "R")))]
1388 "%.\\txor.b16\\t%0, %1, -32768;")
1390 (define_insn "abshf2"
1391 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1392 (abs:HF (match_operand:HF 1 "nvptx_register_operand" "R")))]
1394 "%.\\tand.b16\\t%0, %1, 32767;")
1396 (define_insn "exp2hf2"
1397 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1398 (unspec:HF [(match_operand:HF 1 "nvptx_register_operand" "R")]
1400 "TARGET_SM75 && flag_unsafe_math_optimizations"
1401 "%.\\tex2.approx.f16\\t%0, %1;")
1403 (define_insn "tanh<mode>2"
1404 [(set (match_operand:HSFM 0 "nvptx_register_operand" "=R")
1405 (unspec:HSFM [(match_operand:HSFM 1 "nvptx_register_operand" "R")]
1407 "TARGET_SM75 && flag_unsafe_math_optimizations"
1408 "%.\\ttanh.approx%t0\\t%0, %1;")
1410 ;; HFmode floating point arithmetic.
1412 (define_insn "sminhf3"
1413 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1414 (smin:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1415 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1417 "%.\\tmin.f16\\t%0, %1, %2;")
1419 (define_insn "smaxhf3"
1420 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1421 (smax:HF (match_operand:HF 1 "nvptx_register_operand" "R")
1422 (match_operand:HF 2 "nvptx_register_operand" "R")))]
1424 "%.\\tmax.f16\\t%0, %1, %2;")
1426 ;; Conversions involving floating point
1428 (define_insn "extendsfdf2"
1429 [(set (match_operand:DF 0 "nvptx_register_operand" "=R")
1430 (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))]
1432 "%.\\tcvt%t0%t1\\t%0, %1;")
1434 (define_insn "truncdfsf2"
1435 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1436 (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))]
1438 "%.\\tcvt%#%t0%t1\\t%0, %1;")
1440 (define_insn "floatunssi<mode>2"
1441 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1442 (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
1444 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
1446 (define_insn "floatsi<mode>2"
1447 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1448 (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
1450 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
1452 (define_insn "floatunsdi<mode>2"
1453 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1454 (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
1456 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
1458 (define_insn "floatdi<mode>2"
1459 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1460 (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
1462 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
1464 (define_insn "fixuns_trunc<mode>si2"
1465 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1466 (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1468 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
1470 (define_insn "fix_trunc<mode>si2"
1471 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1472 (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1474 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
1476 (define_insn "fixuns_trunc<mode>di2"
1477 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
1478 (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1480 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
1482 (define_insn "fix_trunc<mode>di2"
1483 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
1484 (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1486 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
1488 (define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC
1489 UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT])
1490 (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor")
1491 (UNSPEC_FPINT_BTRUNC "btrunc")
1492 (UNSPEC_FPINT_CEIL "ceil")
1493 (UNSPEC_FPINT_NEARBYINT "nearbyint")])
1494 (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
1495 (UNSPEC_FPINT_BTRUNC ".rzi")
1496 (UNSPEC_FPINT_CEIL ".rpi")
1497 (UNSPEC_FPINT_NEARBYINT "%#i")])
1499 (define_insn "<FPINT:fpint_name><SDFM:mode>2"
1500 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1501 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1504 "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;")
1506 (define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL])
1507 (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor")
1508 (UNSPEC_FPINT_CEIL "lceil")])
1509 (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
1510 (UNSPEC_FPINT_CEIL ".rpi")])
1512 (define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2"
1513 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1514 (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1517 "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;")
1519 (define_insn "extendhf<mode>2"
1520 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1521 (float_extend:SDFM (match_operand:HF 1 "nvptx_register_operand" "R")))]
1523 "%.\\tcvt%t0%t1\\t%0, %1;")
1525 (define_insn "trunc<mode>hf2"
1526 [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
1527 (float_truncate:HF (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1529 "%.\\tcvt%#%t0%t1\\t%0, %1;")
1531 ;; Vector operations
1533 (define_insn "*vec_set<mode>_0"
1534 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1536 (vec_duplicate:VECIM
1537 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1541 "%.\\tmov%t1\\t%0.x, %1;")
1543 (define_insn "*vec_set<mode>_1"
1544 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1546 (vec_duplicate:VECIM
1547 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1551 "%.\\tmov%t1\\t%0.y, %1;")
1553 (define_insn "*vec_set<mode>_2"
1554 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1556 (vec_duplicate:VECIM
1557 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1561 "%.\\tmov%t1\\t%0.z, %1;")
1563 (define_insn "*vec_set<mode>_3"
1564 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1566 (vec_duplicate:VECIM
1567 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1571 "%.\\tmov%t1\\t%0.w, %1;")
1573 (define_expand "vec_set<mode>"
1574 [(match_operand:VECIM 0 "nvptx_register_operand")
1575 (match_operand:<VECELEM> 1 "nvptx_register_operand")
1576 (match_operand:SI 2 "nvptx_vector_index_operand")]
1579 enum machine_mode mode = GET_MODE (operands[0]);
1580 int mask = 1 << INTVAL (operands[2]);
1581 rtx tmp = gen_rtx_VEC_DUPLICATE (mode, operands[1]);
1582 tmp = gen_rtx_VEC_MERGE (mode, tmp, operands[0], GEN_INT (mask));
1583 emit_insn (gen_rtx_SET (operands[0], tmp));
1587 (define_insn "vec_extract<mode><Vecelem>"
1588 [(set (match_operand:<VECELEM> 0 "nvptx_register_operand" "=R")
1589 (vec_select:<VECELEM>
1590 (match_operand:VECIM 1 "nvptx_register_operand" "R")
1591 (parallel [(match_operand:SI 2 "nvptx_vector_index_operand" "")])))]
1594 static const char *const asms[4] = {
1595 "%.\\tmov%t0\\t%0, %1.x;",
1596 "%.\\tmov%t0\\t%0, %1.y;",
1597 "%.\\tmov%t0\\t%0, %1.z;",
1598 "%.\\tmov%t0\\t%0, %1.w;"
1600 return asms[INTVAL (operands[2])];
1615 (define_insn "fake_nop"
1619 .reg .u32 %%nop_src;
1620 .reg .u32 %%nop_dst;
1621 mov.u32 %%nop_dst, %%nop_src;
1624 (define_insn "return"
1628 return nvptx_output_return ();
1630 [(set_attr "predicable" "no")])
1632 (define_expand "epilogue"
1633 [(clobber (const_int 0))]
1636 if (TARGET_SOFT_STACK)
1637 emit_insn (gen_set_softstack (Pmode, gen_rtx_REG (Pmode,
1638 SOFTSTACK_PREV_REGNUM)));
1639 emit_jump_insn (gen_return ());
1643 (define_expand "nonlocal_goto"
1644 [(match_operand 0 "" "")
1645 (match_operand 1 "" "")
1646 (match_operand 2 "" "")
1647 (match_operand 3 "" "")]
1650 sorry ("target cannot support nonlocal goto");
1651 emit_insn (gen_nop ());
1655 (define_expand "nonlocal_goto_receiver"
1659 sorry ("target cannot support nonlocal goto");
1662 (define_expand "allocate_stack"
1663 [(match_operand 0 "nvptx_register_operand")
1664 (match_operand 1 "nvptx_register_operand")]
1667 if (!TARGET_SOFT_STACK
1670 emit_insn (gen_nvptx_alloca (Pmode, operands[0], operands[1]));
1671 else if (!TARGET_SOFT_STACK)
1673 sorry ("target cannot support alloca");
1674 emit_insn (gen_nop ());
1676 else if (TARGET_SOFT_STACK)
1678 emit_move_insn (stack_pointer_rtx,
1679 gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
1680 emit_insn (gen_set_softstack (Pmode, stack_pointer_rtx));
1681 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
1688 (define_insn "@nvptx_alloca_<mode>"
1689 [(set (match_operand:P 0 "nvptx_register_operand" "=R")
1690 (unspec:P [(match_operand:P 1 "nvptx_nonmemory_operand" "Ri")]
1695 /* Convert the address from '.local' state space to generic. That way,
1696 we don't have to use 'st.local', 'ld.local', and can easily pass the
1697 address to other "generic functions".
1698 TODO 'gcc.target/nvptx/alloca-5.c' */
1699 output_asm_insn ("{", NULL);
1700 output_asm_insn ("\\t.reg%t0\\t%0_local;", operands);
1701 output_asm_insn ("\\talloca%u0\\t%0_local, %1;", operands);
1702 output_asm_insn ("\\tcvta.local%u0\\t%0, %0_local;", operands);
1703 output_asm_insn ("}", NULL);
1706 [(set_attr "predicable" "no")])
1708 (define_insn "@set_softstack_<mode>"
1709 [(unspec [(match_operand:P 0 "nvptx_register_operand" "R")]
1710 UNSPEC_SET_SOFTSTACK)]
1713 return nvptx_output_set_softstack (REGNO (operands[0]));
1716 (define_expand "save_stack_block"
1717 [(match_operand 0 "register_operand" "")
1718 (match_operand 1 "register_operand" "")]
1719 "!TARGET_SOFT_STACK"
1724 gcc_checking_assert (REG_P (operands[0]));
1725 emit_insn (gen_nvptx_stacksave (Pmode, operands[0], operands[1]));
1729 /* The concept of a '%stack' pointer doesn't apply like this.
1730 GCC however occasionally synthesizes '__builtin_stack_save ()',
1731 '__builtin_stack_restore ()', and isn't able to optimize them all
1732 away. Just submit a dummy -- user code shouldn't be able to observe
1734 emit_move_insn (operands[0], GEN_INT (0xdeadbeef));
1739 (define_insn "@nvptx_stacksave_<mode>"
1740 [(set (match_operand:P 0 "nvptx_register_operand" "=R")
1741 (unspec:P [(match_operand:P 1 "register_operand" "R")]
1745 "%.\\tstacksave%u0\\t%0;")
1747 (define_expand "restore_stack_block"
1748 [(match_operand 0 "register_operand" "")
1749 (match_operand 1 "register_operand" "")]
1752 if (!TARGET_SOFT_STACK
1756 operands[1] = force_reg (Pmode, operands[1]);
1757 emit_insn (gen_nvptx_stackrestore (Pmode, operands[0], operands[1]));
1759 else if (!TARGET_SOFT_STACK)
1760 ; /* See 'save_stack_block'. */
1761 else if (TARGET_SOFT_STACK)
1763 emit_move_insn (operands[0], operands[1]);
1764 emit_insn (gen_set_softstack (Pmode, operands[0]));
1771 (define_insn "@nvptx_stackrestore_<mode>"
1772 [(set (match_operand:P 0 "nvptx_register_operand" "=R")
1773 (unspec:P [(match_operand:P 1 "nvptx_register_operand" "R")]
1774 UNSPEC_STACKRESTORE))]
1777 "%.\\tstackrestore%u1\\t%1;")
1779 (define_expand "save_stack_function"
1780 [(match_operand 0 "register_operand" "")
1781 (match_operand 1 "register_operand" "")]
1782 "!TARGET_SOFT_STACK"
1784 /* See 'STACK_SAVEAREA_MODE'. */
1785 gcc_checking_assert (operands[0] == 0);
1789 (define_expand "restore_stack_function"
1790 [(match_operand 0 "register_operand" "")
1791 (match_operand 1 "register_operand" "")]
1794 if (!TARGET_SOFT_STACK)
1795 /* See 'STACK_SAVEAREA_MODE'. */
1796 gcc_checking_assert (operands[1] == 0);
1801 [(trap_if (const_int 1) (const_int 0))]
1805 (define_insn "trap_if_true"
1806 [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R")
1810 "%j0 trap; %j0 exit;"
1811 [(set_attr "predicable" "no")])
1813 (define_insn "trap_if_false"
1814 [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R")
1818 "%J0 trap; %J0 exit;"
1819 [(set_attr "predicable" "no")])
1821 (define_expand "ctrap<mode>4"
1822 [(trap_if (match_operator 0 "nvptx_comparison_operator"
1823 [(match_operand:SDIM 1 "nvptx_register_operand")
1824 (match_operand:SDIM 2 "nvptx_nonmemory_operand")])
1825 (match_operand 3 "const0_operand"))]
1828 rtx t = nvptx_expand_compare (operands[0]);
1829 emit_insn (gen_trap_if_true (t));
1833 (define_insn "oacc_dim_size"
1834 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1835 (unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
1839 static const char *const asms[] =
1840 { /* Must match oacc_loop_levels ordering. */
1841 "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */
1842 "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */
1843 "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */
1845 return asms[INTVAL (operands[1])];
1848 (define_insn "oacc_dim_pos"
1849 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1850 (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
1854 static const char *const asms[] =
1855 { /* Must match oacc_loop_levels ordering. */
1856 "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */
1857 "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */
1858 "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */
1860 return asms[INTVAL (operands[1])];
1863 (define_insn "nvptx_fork"
1864 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1868 [(set_attr "predicable" "no")])
1870 (define_insn "nvptx_forked"
1871 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1875 [(set_attr "predicable" "no")])
1877 (define_insn "nvptx_joining"
1878 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1882 [(set_attr "predicable" "no")])
1884 (define_insn "nvptx_join"
1885 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1889 [(set_attr "predicable" "no")])
1891 (define_expand "oacc_fork"
1892 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1893 (match_operand:SI 1 "general_operand" ""))
1894 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1898 if (operands[0] != const0_rtx)
1899 emit_move_insn (operands[0], operands[1]);
1900 nvptx_expand_oacc_fork (INTVAL (operands[2]));
1904 (define_expand "oacc_join"
1905 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1906 (match_operand:SI 1 "general_operand" ""))
1907 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1911 if (operands[0] != const0_rtx)
1912 emit_move_insn (operands[0], operands[1]);
1913 nvptx_expand_oacc_join (INTVAL (operands[2]));
1917 ;; only 32-bit shuffles exist.
1918 (define_insn "nvptx_shuffle<mode>"
1919 [(set (match_operand:BITS 0 "nvptx_register_operand" "=R")
1921 [(match_operand:BITS 1 "nvptx_register_operand" "R")
1922 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
1923 (match_operand:SI 3 "const_int_operand" "n")]
1928 return "%.\\tshfl.sync%S3.b32\\t%0, %1, %2, 31, 0xffffffff;";
1930 return "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;";
1933 (define_insn "nvptx_vote_ballot"
1934 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1935 (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")]
1936 UNSPEC_VOTE_BALLOT))]
1940 return "%.\\tvote.sync.ballot.b32\\t%0, %1, 0xffffffff;";
1942 return "%.\\tvote.ballot.b32\\t%0, %1;";
1945 ;; Patterns for OpenMP SIMD-via-SIMT lowering
1947 (define_insn "@omp_simt_enter_<mode>"
1948 [(set (match_operand:P 0 "nvptx_register_operand" "=R")
1949 (unspec_volatile:P [(match_operand:P 1 "nvptx_nonmemory_operand" "Ri")
1950 (match_operand:P 2 "nvptx_nonmemory_operand" "Ri")]
1951 UNSPECV_SIMT_ENTER))]
1954 return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
1957 (define_expand "omp_simt_enter"
1958 [(match_operand 0 "nvptx_register_operand" "=R")
1959 (match_operand 1 "nvptx_nonmemory_operand" "Ri")
1960 (match_operand 2 "const_int_operand" "n")]
1963 if (!CONST_INT_P (operands[1]))
1964 cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
1966 cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
1967 cfun->machine->simt_stack_size);
1968 cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
1969 cfun->machine->simt_stack_align);
1970 cfun->machine->has_simtreg = true;
1971 emit_insn (gen_omp_simt_enter (Pmode, operands[0], operands[1], operands[2]));
1975 (define_expand "omp_simt_exit"
1976 [(match_operand 0 "nvptx_register_operand" "R")]
1979 emit_insn (gen_omp_simt_exit (Pmode, operands[0]));
1981 emit_insn (gen_nvptx_warpsync ());
1983 emit_insn (gen_nvptx_uniform_warp_check ());
1987 (define_insn "@omp_simt_exit_<mode>"
1988 [(unspec_volatile [(match_operand:P 0 "nvptx_register_operand" "R")]
1992 return nvptx_output_simt_exit (operands[0]);
1995 ;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
1996 (define_insn "omp_simt_lane"
1997 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1998 (unspec:SI [(const_int 0)] UNSPEC_LANEID))]
2000 "%.\\tmov.u32\\t%0, %%laneid;")
2002 ;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and
2003 ;; place a compiler barrier to disallow unrolling/peeling the containing loop
2004 (define_expand "omp_simt_ordered"
2005 [(match_operand:SI 0 "nvptx_register_operand" "=R")
2006 (match_operand:SI 1 "nvptx_register_operand" "R")]
2009 emit_move_insn (operands[0], operands[1]);
2010 emit_insn (gen_nvptx_nounroll ());
2014 ;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange
2016 (define_expand "omp_simt_xchg_bfly"
2017 [(match_operand 0 "nvptx_register_or_complex_di_df_register_operand" "=R")
2018 (match_operand 1 "nvptx_register_or_complex_di_df_register_operand" "R")
2019 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
2022 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
2027 ;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1
2028 ;; from lane given by index in operand 2 to operand 0 in all lanes
2029 (define_expand "omp_simt_xchg_idx"
2030 [(match_operand 0 "nvptx_register_or_complex_di_df_register_operand" "=R")
2031 (match_operand 1 "nvptx_register_or_complex_di_df_register_operand" "R")
2032 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
2035 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
2040 ;; Implement IFN_GOMP_SIMT_VOTE_ANY:
2041 ;; set operand 0 to zero iff all lanes supply zero in operand 1
2042 (define_expand "omp_simt_vote_any"
2043 [(match_operand:SI 0 "nvptx_register_operand" "=R")
2044 (match_operand:SI 1 "nvptx_register_operand" "R")]
2047 rtx pred = gen_reg_rtx (BImode);
2048 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
2049 emit_insn (gen_nvptx_vote_ballot (operands[0], pred));
2053 ;; Implement IFN_GOMP_SIMT_LAST_LANE:
2054 ;; set operand 0 to the lowest lane index that passed non-zero in operand 1
2055 (define_expand "omp_simt_last_lane"
2056 [(match_operand:SI 0 "nvptx_register_operand" "=R")
2057 (match_operand:SI 1 "nvptx_register_operand" "R")]
2060 rtx pred = gen_reg_rtx (BImode);
2061 rtx tmp = gen_reg_rtx (SImode);
2062 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
2063 emit_insn (gen_nvptx_vote_ballot (tmp, pred));
2064 emit_insn (gen_ctzsi2 (operands[0], tmp));
2068 ;; extract parts of a 64 bit object into 2 32-bit ints
2069 (define_insn "unpack<mode>si2"
2070 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
2071 (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R")
2072 (const_int 0)] UNSPEC_BIT_CONV))
2073 (set (match_operand:SI 1 "nvptx_register_operand" "=R")
2074 (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))]
2076 "%.\\tmov.b64\\t{%0,%1}, %2;")
2078 ;; pack 2 32-bit ints into a 64 bit object
2079 (define_insn "packsi<mode>2"
2080 [(set (match_operand:BITD 0 "nvptx_register_operand" "=R")
2081 (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R")
2082 (match_operand:SI 2 "nvptx_register_operand" "R")]
2085 "%.\\tmov.b64\\t%0, {%1,%2};")
2089 (define_expand "atomic_compare_and_swap<mode>"
2090 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
2091 (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output
2092 (match_operand:SDIM 2 "memory_operand") ;; memory
2093 (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input
2094 (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input
2095 (match_operand:SI 5 "const_int_operand") ;; is_weak
2096 (match_operand:SI 6 "const_int_operand") ;; success model
2097 (match_operand:SI 7 "const_int_operand")] ;; failure model
2100 if (nvptx_mem_local_p (operands[2]))
2101 emit_insn (gen_atomic_compare_and_swap<mode>_1_local
2102 (operands[1], operands[2], operands[3], operands[4],
2105 emit_insn (gen_atomic_compare_and_swap<mode>_1
2106 (operands[1], operands[2], operands[3], operands[4],
2109 rtx cond = gen_reg_rtx (BImode);
2110 emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3]));
2111 emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0)));
2115 (define_insn "atomic_compare_and_swap<mode>_1_local"
2116 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
2117 (unspec_volatile:SDIM
2118 [(match_operand:SDIM 1 "memory_operand" "+m")
2119 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
2120 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
2121 (match_operand:SI 4 "const_int_operand")]
2124 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS_LOCAL))]
2127 output_asm_insn ("{", NULL);
2128 output_asm_insn ("\\t" ".reg.pred" "\\t" "%%eq_p;", NULL);
2129 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
2130 output_asm_insn ("\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2131 output_asm_insn ("\\t" "setp.eq%t0" "\\t" "%%eq_p, %%val, %2;",
2133 output_asm_insn ("@%%eq_p\\t" "st%A1%t0" "\\t" "%1,%3;", operands);
2134 output_asm_insn ("\\t" "mov%t0" "\\t" "%0,%%val;", operands);
2135 output_asm_insn ("}", NULL);
2138 [(set_attr "predicable" "no")])
2140 (define_insn "atomic_compare_and_swap<mode>_1"
2141 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
2142 (unspec_volatile:SDIM
2143 [(match_operand:SDIM 1 "memory_operand" "+m")
2144 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
2145 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
2146 (match_operand:SI 4 "const_int_operand")]
2149 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
2153 = "%.\\tatom%A1.cas.b%T0\\t%x0, %1, %2, %3;";
2154 return nvptx_output_atomic_insn (t, operands, 1, 4);
2156 [(set_attr "atomic" "true")])
2158 (define_insn "atomic_exchange<mode>"
2159 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
2160 (unspec_volatile:SDIM
2161 [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory
2162 (match_operand:SI 3 "const_int_operand")] ;; model
2165 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
2168 if (nvptx_mem_local_p (operands[1]))
2170 output_asm_insn ("{", NULL);
2171 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
2172 output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2173 output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%2;", operands);
2174 output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands);
2175 output_asm_insn ("}", NULL);
2179 = "%.\tatom%A1.exch.b%T0\t%x0, %1, %2;";
2180 return nvptx_output_atomic_insn (t, operands, 1, 3);
2182 [(set_attr "atomic" "true")])
2184 (define_expand "atomic_store<mode>"
2185 [(match_operand:SDIM 0 "memory_operand" "=m") ;; memory
2186 (match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input
2187 (match_operand:SI 2 "const_int_operand")] ;; model
2190 struct address_info info;
2191 decompose_mem_address (&info, operands[0]);
2192 if (info.base != NULL && REG_P (*info.base)
2193 && REGNO_PTR_FRAME_P (REGNO (*info.base)))
2195 emit_insn (gen_mov<mode> (operands[0], operands[1]));
2201 emit_insn (gen_nvptx_atomic_store_sm70<mode> (operands[0], operands[1],
2206 bool maybe_shared_p = nvptx_mem_maybe_shared_p (operands[0]);
2207 if (!maybe_shared_p)
2208 /* Fall back to expand_atomic_store. */
2211 emit_insn (gen_nvptx_atomic_store<mode> (operands[0], operands[1],
2216 (define_insn "nvptx_atomic_store_sm70<mode>"
2217 [(set (match_operand:SDIM 0 "memory_operand" "+m") ;; memory
2218 (unspec_volatile:SDIM
2219 [(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input
2220 (match_operand:SI 2 "const_int_operand")] ;; model
2225 = "%.\tst%A0.b%T0\t%0, %1;";
2226 return nvptx_output_atomic_insn (t, operands, 0, 2);
2228 [(set_attr "atomic" "false")]) ;; Note: st is not an atomic insn.
2230 (define_insn "nvptx_atomic_store<mode>"
2231 [(set (match_operand:SDIM 0 "memory_operand" "+m") ;; memory
2232 (unspec_volatile:SDIM
2233 [(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input
2234 (match_operand:SI 2 "const_int_operand")] ;; model
2239 = "%.\tatom%A0.exch.b%T0\t_, %0, %1;";
2240 return nvptx_output_atomic_insn (t, operands, 0, 2);
2242 [(set_attr "atomic" "true")])
2244 (define_insn "atomic_fetch_add<mode>"
2245 [(set (match_operand:SDIM 1 "memory_operand" "+m")
2246 (unspec_volatile:SDIM
2247 [(plus:SDIM (match_dup 1)
2248 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
2249 (match_operand:SI 3 "const_int_operand")] ;; model
2251 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
2255 if (nvptx_mem_local_p (operands[1]))
2257 output_asm_insn ("{", NULL);
2258 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
2259 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%update;", operands);
2260 output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2261 output_asm_insn ("%.\\t" "add%t0" "\\t" "%%update,%%val,%2;",
2263 output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands);
2264 output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands);
2265 output_asm_insn ("}", NULL);
2269 = "%.\\tatom%A1.add%t0\\t%x0, %1, %2;";
2270 return nvptx_output_atomic_insn (t, operands, 1, 3);
2272 [(set_attr "atomic" "true")])
2274 (define_insn "atomic_fetch_addsf"
2275 [(set (match_operand:SF 1 "memory_operand" "+m")
2277 [(plus:SF (match_dup 1)
2278 (match_operand:SF 2 "nvptx_nonmemory_operand" "RF"))
2279 (match_operand:SI 3 "const_int_operand")] ;; model
2281 (set (match_operand:SF 0 "nvptx_register_operand" "=R")
2285 if (nvptx_mem_local_p (operands[1]))
2287 output_asm_insn ("{", NULL);
2288 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
2289 output_asm_insn ("\\t" ".reg%t0" "\\t" "%%update;", operands);
2290 output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2291 output_asm_insn ("%.\\t" "add%t0" "\\t" "%%update,%%val,%2;",
2293 output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands);
2294 output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands);
2295 output_asm_insn ("}", NULL);
2299 = "%.\\tatom%A1.add%t0\\t%x0, %1, %2;";
2300 return nvptx_output_atomic_insn (t, operands, 1, 3);
2302 [(set_attr "atomic" "true")])
2304 (define_insn "atomic_fetch_<logic><mode>"
2305 [(set (match_operand:SDIM 1 "memory_operand" "+m")
2306 (unspec_volatile:SDIM
2307 [(any_logic:SDIM (match_dup 1)
2308 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
2309 (match_operand:SI 3 "const_int_operand")] ;; model
2311 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
2313 "<MODE>mode == SImode || TARGET_SM35"
2315 if (nvptx_mem_local_p (operands[1]))
2317 output_asm_insn ("{", NULL);
2318 output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%val;", operands);
2319 output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%update;", operands);
2320 output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
2321 output_asm_insn ("%.\\t" "<logic>.b%T0" "\\t" "%%update,%%val,%2;",
2323 output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands);
2324 output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands);
2325 output_asm_insn ("}", NULL);
2329 = "%.\\tatom%A1.<logic>.b%T0\\t%x0, %1, %2;";
2330 return nvptx_output_atomic_insn (t, operands, 1, 3);
2333 [(set_attr "atomic" "true")])
2335 (define_expand "atomic_test_and_set"
2336 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
2337 (match_operand:QI 1 "memory_operand") ;; memory
2338 (match_operand:SI 2 "const_int_operand")] ;; model
2343 libfunc = init_one_libfunc ("__atomic_test_and_set_1");
2344 addr = convert_memory_address (ptr_mode, XEXP (operands[1], 0));
2345 emit_library_call_value (libfunc, operands[0], LCT_NORMAL, SImode,
2347 operands[2], SImode);
2351 (define_insn "nvptx_barsync"
2352 [(unspec_volatile [(match_operand:SI 0 "nvptx_nonmemory_operand" "Ri")
2353 (match_operand:SI 1 "const_int_operand")]
2357 if (INTVAL (operands[1]) == 0)
2358 return (TARGET_PTX_6_0
2359 ? "\\tbarrier.sync.aligned\\t%0;"
2360 : "\\tbar.sync\\t%0;");
2362 return (TARGET_PTX_6_0
2363 ? "\\tbarrier.sync\\t%0, %1;"
2364 : "\\tbar.sync\\t%0, %1;");
2366 [(set_attr "predicable" "no")])
2368 (define_insn "nvptx_warpsync"
2369 [(unspec_volatile [(const_int 0)] UNSPECV_WARPSYNC)]
2371 "%.\\tbar.warp.sync\\t0xffffffff;")
2373 (define_int_iterator BARRED
2376 UNSPECV_BARRED_POPC])
2377 (define_int_attr barred_op
2378 [(UNSPECV_BARRED_AND "and")
2379 (UNSPECV_BARRED_OR "or")
2380 (UNSPECV_BARRED_POPC "popc")])
2381 (define_int_attr barred_mode
2382 [(UNSPECV_BARRED_AND "BI")
2383 (UNSPECV_BARRED_OR "BI")
2384 (UNSPECV_BARRED_POPC "SI")])
2385 (define_int_attr barred_ptxtype
2386 [(UNSPECV_BARRED_AND "pred")
2387 (UNSPECV_BARRED_OR "pred")
2388 (UNSPECV_BARRED_POPC "u32")])
2390 (define_insn "nvptx_barred_<barred_op>"
2391 [(set (match_operand:<barred_mode> 0 "nvptx_register_operand" "=R")
2393 [(match_operand:SI 1 "nvptx_nonmemory_operand" "Ri")
2394 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
2395 (match_operand:SI 3 "const_int_operand" "i")
2396 (match_operand:BI 4 "nvptx_register_operand" "R")]
2399 "\\tbar.red.<barred_op>.<barred_ptxtype> \\t%0, %1, %2, %p3%4;";"
2400 [(set_attr "predicable" "no")])
2402 (define_insn "nvptx_uniform_warp_check"
2403 [(unspec_volatile [(const_int 0)] UNSPECV_UNIFORM_WARP_CHECK)]
2406 const char *insns[] = {
2408 "\\t" ".reg.pred" "\\t" "%%r_sync;",
2409 "\\t" "mov.pred" "\\t" "%%r_sync, 1;",
2410 "%.\\t" "vote.all.pred" "\\t" "%%r_sync, 1;",
2411 "@!%%r_sync\\t" "trap;",
2412 "@!%%r_sync\\t" "exit;",
2416 for (const char **p = &insns[0]; *p != NULL; p++)
2417 output_asm_insn (*p, NULL);
2421 (define_expand "memory_barrier"
2423 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
2426 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
2427 MEM_VOLATILE_P (operands[0]) = 1;
2430 ;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys
2431 ;; (corresponding to cuda functions threadfence_block, threadfence and
2432 ;; threadfence_system). For the insn memory_barrier we use membar.sys. This
2433 ;; may be overconservative, but before using membar.gl instead we'll need to
2434 ;; explain in detail why it's safe to use. For now, use membar.sys.
2435 (define_insn "*memory_barrier"
2436 [(set (match_operand:BLK 0 "" "")
2437 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
2440 [(set_attr "predicable" "no")])
2442 (define_expand "nvptx_membar_cta"
2444 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
2447 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
2448 MEM_VOLATILE_P (operands[0]) = 1;
2451 (define_insn "*nvptx_membar_cta"
2452 [(set (match_operand:BLK 0 "" "")
2453 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
2456 [(set_attr "predicable" "no")])
2458 (define_expand "nvptx_membar_gl"
2460 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))]
2463 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
2464 MEM_VOLATILE_P (operands[0]) = 1;
2467 (define_insn "*nvptx_membar_gl"
2468 [(set (match_operand:BLK 0 "" "")
2469 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))]
2472 [(set_attr "predicable" "no")])
2474 (define_insn "nvptx_nounroll"
2475 [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
2477 "\\t.pragma \\\"nounroll\\\";"
2478 [(set_attr "predicable" "no")])
2480 (define_insn "nvptx_red_partition"
2481 [(set (match_operand:DI 0 "nonimmediate_operand" "=R")
2482 (unspec_volatile:DI [(match_operand:DI 1 "const_int_operand")]
2486 return nvptx_output_red_partition (operands[0], operands[1]);
2488 [(set_attr "predicable" "no")])
2490 ;; Expand QI mode operations using SI mode instructions.
2491 (define_code_iterator any_sbinary [plus minus smin smax])
2492 (define_code_attr sbinary [(plus "add") (minus "sub") (smin "smin") (smax "smax")])
2494 (define_code_iterator any_ubinary [and ior xor umin umax])
2495 (define_code_attr ubinary [(and "and") (ior "ior") (xor "xor") (umin "umin")
2498 (define_code_iterator any_sunary [neg abs])
2499 (define_code_attr sunary [(neg "neg") (abs "abs")])
2501 (define_code_iterator any_uunary [not])
2502 (define_code_attr uunary [(not "one_cmpl")])
2504 (define_expand "<sbinary>qi3"
2505 [(set (match_operand:QI 0 "nvptx_register_operand")
2506 (any_sbinary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")
2507 (match_operand:QI 2 "nvptx_nonmemory_operand")))]
2510 rtx reg = gen_reg_rtx (SImode);
2511 rtx op0 = convert_modes (SImode, QImode, operands[1], 0);
2512 rtx op1 = convert_modes (SImode, QImode, operands[2], 0);
2513 if (<CODE> == MINUS)
2514 op0 = force_reg (SImode, op0);
2515 emit_insn (gen_<sbinary>si3 (reg, op0, op1));
2516 emit_insn (gen_truncsiqi2 (operands[0], reg));
2520 (define_expand "<ubinary>qi3"
2521 [(set (match_operand:QI 0 "nvptx_register_operand")
2522 (any_ubinary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")
2523 (match_operand:QI 2 "nvptx_nonmemory_operand")))]
2526 rtx reg = gen_reg_rtx (SImode);
2527 rtx op0 = convert_modes (SImode, QImode, operands[1], 1);
2528 rtx op1 = convert_modes (SImode, QImode, operands[2], 1);
2529 emit_insn (gen_<ubinary>si3 (reg, op0, op1));
2530 emit_insn (gen_truncsiqi2 (operands[0], reg));
2534 (define_expand "<sunary>qi2"
2535 [(set (match_operand:QI 0 "nvptx_register_operand")
2536 (any_sunary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")))]
2539 rtx reg = gen_reg_rtx (SImode);
2540 rtx op0 = convert_modes (SImode, QImode, operands[1], 0);
2541 emit_insn (gen_<sunary>si2 (reg, op0));
2542 emit_insn (gen_truncsiqi2 (operands[0], reg));
2546 (define_expand "<uunary>qi2"
2547 [(set (match_operand:QI 0 "nvptx_register_operand")
2548 (any_uunary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")))]
2551 rtx reg = gen_reg_rtx (SImode);
2552 rtx op0 = convert_modes (SImode, QImode, operands[1], 1);
2553 emit_insn (gen_<uunary>si2 (reg, op0));
2554 emit_insn (gen_truncsiqi2 (operands[0], reg));
2558 (define_expand "cstoreqi4"
2559 [(set (match_operand:SI 0 "nvptx_register_operand")
2560 (match_operator:SI 1 "nvptx_comparison_operator"
2561 [(match_operand:QI 2 "nvptx_nonmemory_operand")
2562 (match_operand:QI 3 "nvptx_nonmemory_operand")]))]
2565 rtx reg = gen_reg_rtx (BImode);
2566 enum rtx_code code = GET_CODE (operands[1]);
2567 int unsignedp = unsigned_condition_p (code);
2568 rtx op2 = convert_modes (SImode, QImode, operands[2], unsignedp);
2569 rtx op3 = convert_modes (SImode, QImode, operands[3], unsignedp);
2570 rtx cmp = gen_rtx_fmt_ee (code, SImode, op2, op3);
2571 emit_insn (gen_cmpsi (reg, cmp, op2, op3));
2572 emit_insn (gen_setccsi_from_bi (operands[0], reg));
2576 (define_insn "*ext_truncsi2_qi"
2577 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
2579 (truncate:QI (match_operand:SI 1 "nvptx_register_operand" "R"))))]
2581 "%.\\tcvt.s32.s8\\t%0, %1;")
2583 (define_insn "*zext_truncsi2_qi"
2584 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
2586 (truncate:QI (match_operand:SI 1 "nvptx_register_operand" "R"))))]
2588 "%.\\tcvt.u32.u8\\t%0, %1;")