libcpp, c, middle-end: Optimize initializers using #embed in C
[official-gcc.git] / gcc / config / pru / pru.cc
blobd070007918720a66c3dbe7bcfb250b0eeb1ed90f
1 /* Target machine subroutines for TI PRU.
2 Copyright (C) 2014-2024 Free Software Foundation, Inc.
3 Dimitar Dimitrov <dimitar@dinux.eu>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "attribs.h"
32 #include "df.h"
33 #include "memmodel.h"
34 #include "tm_p.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "output.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "explow.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "expr.h"
47 #include "toplev.h"
48 #include "langhooks.h"
49 #include "cfgrtl.h"
50 #include "stor-layout.h"
51 #include "dumpfile.h"
52 #include "builtins.h"
53 #include "pru-protos.h"
55 /* This file should be included last. */
56 #include "target-def.h"
58 #define INIT_ARRAY_ENTRY_BYTES 2
60 /* Global PRU CTABLE entries, filled in by pragmas, and used for fast
61 addressing via LBCO/SBCO instructions. */
62 struct pru_ctable_entry pru_ctable[32];
64 /* Forward function declarations. */
65 static bool prologue_saved_reg_p (int);
66 static void pru_reorg_loop (rtx_insn *);
68 struct GTY (()) machine_function
70 /* Current frame information, to be filled in by pru_compute_frame_layout
71 with register save masks, and offsets for the current function. */
73 /* Mask of registers to save. */
74 HARD_REG_SET save_mask;
75 /* Number of bytes that the entire frame takes up. */
76 int total_size;
77 /* Number of bytes that variables take up. */
78 int var_size;
79 /* Number of bytes that outgoing arguments take up. */
80 int out_args_size;
81 /* Number of bytes needed to store registers in frame. */
82 int save_reg_size;
83 /* Offset from new stack pointer to store registers. */
84 int save_regs_offset;
85 /* True if final frame layout is already calculated. */
86 bool initialized;
87 /* Number of doloop tags used so far. */
88 int doloop_tags;
89 /* True if the last tag was allocated to a doloop_end. */
90 bool doloop_tag_from_end;
93 /* Stack layout and calling conventions.
95 The PRU ABI defines r4 as Argument Pointer. GCC implements the same
96 semantics, but represents it with HARD_FRAME_POINTER_REGNUM and
97 names it FP. The stack layout is shown below:
99 ---------------------- high address
100 | incoming args
101 ------call-boundary---
102 | pretend_args ^
103 FP ---------------- | total
104 | save_regs | frame
105 --------------- | size
106 | local vars |
107 --------------- |
108 | outgoing args V
109 SP ---------------------- low address
113 #define PRU_STACK_ALIGN(LOC) ROUND_UP ((LOC), STACK_BOUNDARY / BITS_PER_UNIT)
115 /* Implement TARGET_COMPUTE_FRAME_LAYOUT. */
116 static void
117 pru_compute_frame_layout (void)
119 int regno;
120 HARD_REG_SET *save_mask;
121 int total_size;
122 int var_size;
123 int out_args_size;
124 int save_reg_size;
126 gcc_assert (!cfun->machine->initialized);
128 save_mask = &cfun->machine->save_mask;
129 CLEAR_HARD_REG_SET (*save_mask);
131 var_size = PRU_STACK_ALIGN ((HOST_WIDE_INT) get_frame_size ());
132 out_args_size = PRU_STACK_ALIGN ((HOST_WIDE_INT) crtl->outgoing_args_size);
133 total_size = var_size + out_args_size;
135 /* Calculate space needed for gp registers. */
136 save_reg_size = 0;
137 for (regno = 0; regno <= LAST_GP_REGNUM; regno++)
138 if (prologue_saved_reg_p (regno))
140 SET_HARD_REG_BIT (*save_mask, regno);
141 save_reg_size += 1;
144 save_reg_size = PRU_STACK_ALIGN (save_reg_size);
145 total_size += save_reg_size;
146 total_size += PRU_STACK_ALIGN (crtl->args.pretend_args_size);
148 /* Save other computed information. */
149 cfun->machine->total_size = total_size;
150 cfun->machine->var_size = var_size;
151 cfun->machine->out_args_size = out_args_size;
152 cfun->machine->save_reg_size = save_reg_size;
153 cfun->machine->initialized = reload_completed;
154 cfun->machine->save_regs_offset = out_args_size + var_size;
157 /* Emit efficient RTL equivalent of ADD3 with the given const_int for
158 frame-related registers.
159 op0 - Destination register.
160 op1 - First addendum operand (a register).
161 addendum - Second addendum operand (a constant).
162 kind - Note kind. REG_NOTE_MAX if no note must be added.
164 static rtx
165 pru_add3_frame_adjust (rtx op0, rtx op1, int addendum,
166 const enum reg_note kind)
168 rtx insn;
170 rtx op0_adjust = gen_rtx_SET (op0, plus_constant (Pmode, op1, addendum));
172 if (UBYTE_INT (addendum) || UBYTE_INT (-addendum))
173 insn = emit_insn (op0_adjust);
174 else
176 /* Help the compiler to cope with an arbitrary integer constant.
177 Reload has finished so we can't expect the compiler to
178 auto-allocate a temporary register. But we know that call-saved
179 registers are not live yet, so we utilize them. */
180 rtx tmpreg = gen_rtx_REG (Pmode, PROLOGUE_TEMP_REGNUM);
181 if (addendum < 0)
183 emit_insn (gen_rtx_SET (tmpreg, gen_int_mode (-addendum, Pmode)));
184 insn = emit_insn (gen_sub3_insn (op0, op1, tmpreg));
186 else
188 emit_insn (gen_rtx_SET (tmpreg, gen_int_mode (addendum, Pmode)));
189 insn = emit_insn (gen_add3_insn (op0, op1, tmpreg));
193 /* Attach a note indicating what happened. */
194 if (kind != REG_NOTE_MAX)
195 add_reg_note (insn, kind, copy_rtx (op0_adjust));
197 RTX_FRAME_RELATED_P (insn) = 1;
199 return insn;
202 /* Add a const_int to the stack pointer register. */
203 static rtx
204 pru_add_to_sp (int addendum, const enum reg_note kind)
206 return pru_add3_frame_adjust (stack_pointer_rtx, stack_pointer_rtx,
207 addendum, kind);
210 /* Helper function used during prologue/epilogue. Emits a single LBBO/SBBO
211 instruction for load/store of the next group of consecutive registers. */
212 static int
213 xbbo_next_reg_cluster (int regno_start, int *sp_offset, bool do_store)
215 int regno, nregs, i;
216 rtx addr;
217 rtx_insn *insn;
219 nregs = 0;
221 /* Skip the empty slots. */
222 for (; regno_start <= LAST_GP_REGNUM;)
223 if (TEST_HARD_REG_BIT (cfun->machine->save_mask, regno_start))
224 break;
225 else
226 regno_start++;
228 /* Find the largest consecutive group of registers to save. */
229 for (regno = regno_start; regno <= LAST_GP_REGNUM;)
230 if (TEST_HARD_REG_BIT (cfun->machine->save_mask, regno))
232 regno++;
233 nregs++;
235 else
236 break;
238 if (!nregs)
239 return -1;
241 gcc_assert (UBYTE_INT (*sp_offset));
243 /* Ok, save this bunch. */
244 addr = plus_constant (Pmode, stack_pointer_rtx, *sp_offset);
246 if (do_store)
247 insn = targetm.gen_store_multiple (gen_frame_mem (BLKmode, addr),
248 gen_rtx_REG (QImode, regno_start),
249 GEN_INT (nregs));
250 else
251 insn = targetm.gen_load_multiple (gen_rtx_REG (QImode, regno_start),
252 gen_frame_mem (BLKmode, addr),
253 GEN_INT (nregs));
255 gcc_assert (reload_completed);
256 gcc_assert (insn);
257 emit_insn (insn);
259 /* Tag as frame-related. */
260 RTX_FRAME_RELATED_P (insn) = 1;
262 if (!do_store)
264 /* Tag epilogue unwind notes. */
265 for (i = regno_start; i < (regno_start + nregs); i++)
266 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (QImode, i));
269 /* Increment and save offset in anticipation of the next register group. */
270 *sp_offset += nregs * UNITS_PER_WORD;
272 return regno_start + nregs;
275 /* Emit function prologue. */
276 void
277 pru_expand_prologue (void)
279 int regno_start;
280 int total_frame_size;
281 int sp_offset; /* Offset from base_reg to final stack value. */
282 int save_regs_base; /* Offset from base_reg to register save area. */
283 int save_offset; /* Temporary offset to currently saved register group. */
285 total_frame_size = cfun->machine->total_size;
287 if (flag_stack_usage_info)
288 current_function_static_stack_size = total_frame_size;
290 /* Decrement the stack pointer. */
291 if (!UBYTE_INT (total_frame_size))
293 /* We need an intermediary point, this will point at the spill block. */
294 pru_add_to_sp (cfun->machine->save_regs_offset - total_frame_size,
295 REG_NOTE_MAX);
296 save_regs_base = 0;
297 sp_offset = -cfun->machine->save_regs_offset;
299 else if (total_frame_size)
301 pru_add_to_sp (- total_frame_size, REG_NOTE_MAX);
302 save_regs_base = cfun->machine->save_regs_offset;
303 sp_offset = 0;
305 else
306 save_regs_base = sp_offset = 0;
308 regno_start = 0;
309 save_offset = save_regs_base;
311 regno_start = xbbo_next_reg_cluster (regno_start, &save_offset, true);
312 while (regno_start >= 0);
314 /* Set FP before adjusting SP. This way fp_offset has
315 better chance to fit in UBYTE. */
316 if (frame_pointer_needed)
318 int fp_offset = total_frame_size
319 - crtl->args.pretend_args_size
320 + sp_offset;
322 pru_add3_frame_adjust (hard_frame_pointer_rtx, stack_pointer_rtx,
323 fp_offset, REG_NOTE_MAX);
326 if (sp_offset)
327 pru_add_to_sp (sp_offset, REG_FRAME_RELATED_EXPR);
329 /* If we are profiling, make sure no instructions are scheduled before
330 the call to mcount. */
331 if (crtl->profile)
332 emit_insn (gen_blockage ());
335 /* Emit function epilogue. */
336 void
337 pru_expand_epilogue (bool sibcall_p)
339 int total_frame_size;
340 int sp_adjust, save_offset;
341 int regno_start;
343 if (!sibcall_p && pru_can_use_return_insn ())
345 emit_jump_insn (gen_return ());
346 return;
349 emit_insn (gen_blockage ());
351 total_frame_size = cfun->machine->total_size;
353 if (frame_pointer_needed)
355 /* Recover the stack pointer. */
356 pru_add3_frame_adjust (stack_pointer_rtx, hard_frame_pointer_rtx,
357 - cfun->machine->save_reg_size,
358 REG_CFA_ADJUST_CFA);
360 save_offset = 0;
361 sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
363 else if (!UBYTE_INT (total_frame_size))
365 pru_add_to_sp (cfun->machine->save_regs_offset, REG_CFA_ADJUST_CFA);
366 save_offset = 0;
367 sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
369 else
371 save_offset = cfun->machine->save_regs_offset;
372 sp_adjust = total_frame_size;
375 regno_start = 0;
377 regno_start = xbbo_next_reg_cluster (regno_start, &save_offset, false);
378 while (regno_start >= 0);
380 /* Emit a blockage insn here to keep these insns from being moved to
381 an earlier spot in the epilogue.
383 This is necessary as we must not cut the stack back before all the
384 restores are finished. */
385 emit_insn (gen_blockage ());
387 if (sp_adjust)
388 pru_add_to_sp (sp_adjust, REG_CFA_ADJUST_CFA);
390 if (!sibcall_p)
391 emit_jump_insn (gen_simple_return ());
394 /* Implement RETURN_ADDR_RTX. Note, we do not support moving
395 back to a previous frame. */
397 pru_get_return_address (int count)
399 if (count != 0)
400 return NULL_RTX;
402 /* Return r3.w2. */
403 return get_hard_reg_initial_val (HImode, RA_REGNUM);
406 /* Implement FUNCTION_PROFILER macro. */
407 void
408 pru_function_profiler (FILE *file, int)
410 fprintf (file, "\tmov\tr1, ra\n");
411 fprintf (file, "\tcall\t_mcount\n");
412 fprintf (file, "\tmov\tra, r1\n");
415 /* Dump stack layout. */
416 static void
417 pru_dump_frame_layout (FILE *file)
419 fprintf (file, "\t%s Current Frame Info\n", ASM_COMMENT_START);
420 fprintf (file, "\t%s total_size = %d\n", ASM_COMMENT_START,
421 cfun->machine->total_size);
422 fprintf (file, "\t%s var_size = %d\n", ASM_COMMENT_START,
423 cfun->machine->var_size);
424 fprintf (file, "\t%s out_args_size = %d\n", ASM_COMMENT_START,
425 cfun->machine->out_args_size);
426 fprintf (file, "\t%s save_reg_size = %d\n", ASM_COMMENT_START,
427 cfun->machine->save_reg_size);
428 fprintf (file, "\t%s initialized = %d\n", ASM_COMMENT_START,
429 cfun->machine->initialized);
430 fprintf (file, "\t%s save_regs_offset = %d\n", ASM_COMMENT_START,
431 cfun->machine->save_regs_offset);
432 fprintf (file, "\t%s is_leaf = %d\n", ASM_COMMENT_START,
433 crtl->is_leaf);
434 fprintf (file, "\t%s frame_pointer_needed = %d\n", ASM_COMMENT_START,
435 frame_pointer_needed);
436 fprintf (file, "\t%s pretend_args_size = %d\n", ASM_COMMENT_START,
437 crtl->args.pretend_args_size);
440 /* Return true if REGNO should be saved in the prologue. */
441 static bool
442 prologue_saved_reg_p (int regno)
444 gcc_assert (GP_REG_P (regno));
446 /* Do not save the register if function will not return. */
447 if (TREE_THIS_VOLATILE (current_function_decl))
448 return false;
450 if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
451 return true;
453 /* 32-bit FP. */
454 if (frame_pointer_needed
455 && regno >= HARD_FRAME_POINTER_REGNUM
456 && regno < HARD_FRAME_POINTER_REGNUM + GET_MODE_SIZE (Pmode))
457 return true;
459 /* 16-bit RA. */
460 if (regno == RA_REGNUM && df_regs_ever_live_p (RA_REGNUM))
461 return true;
462 if (regno == RA_REGNUM + 1 && df_regs_ever_live_p (RA_REGNUM + 1))
463 return true;
465 return false;
468 /* Implement TARGET_CAN_ELIMINATE. */
469 static bool
470 pru_can_eliminate (const int, const int to)
472 if (to == STACK_POINTER_REGNUM)
473 return !frame_pointer_needed;
474 return true;
477 /* Implement INITIAL_ELIMINATION_OFFSET macro. */
479 pru_initial_elimination_offset (int from, int to)
481 int offset;
483 /* Set OFFSET to the offset from the stack pointer. */
484 switch (from)
486 case FRAME_POINTER_REGNUM:
487 offset = cfun->machine->out_args_size;
488 break;
490 case ARG_POINTER_REGNUM:
491 offset = cfun->machine->total_size;
492 offset -= crtl->args.pretend_args_size;
493 break;
495 default:
496 gcc_unreachable ();
499 /* If we are asked for the frame pointer offset, then adjust OFFSET
500 by the offset from the frame pointer to the stack pointer. */
501 if (to == HARD_FRAME_POINTER_REGNUM)
502 offset -= cfun->machine->total_size - crtl->args.pretend_args_size;
505 return offset;
508 /* Return nonzero if this function is known to have a null epilogue.
509 This allows the optimizer to omit jumps to jumps if no stack
510 was created. */
512 pru_can_use_return_insn (void)
514 if (!reload_completed || crtl->profile)
515 return 0;
517 return cfun->machine->total_size == 0;
520 /* Implement `TARGET_CLASS_LIKELY_SPILLED_P'. The original intention
521 of the default implementation is kept, but is adjusted for PRU.
522 Return TRUE if the given class C contains a single SImode
523 (as opposed to word_mode!) register. */
525 static bool
526 pru_class_likely_spilled_p (reg_class_t c)
528 return (reg_class_size[(int) c] <= GET_MODE_SIZE (SImode));
531 /* Implement TARGET_HARD_REGNO_MODE_OK. */
533 static bool
534 pru_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
536 switch (GET_MODE_SIZE (mode))
538 case 1: return true;
539 case 2: return (regno % 4) <= 2;
540 case 4: return (regno % 4) == 0;
541 case 8: return (regno % 4) == 0;
542 case 16: return (regno % 4) == 0; /* Not sure why TImode is used. */
543 case 32: return (regno % 4) == 0; /* Not sure why CTImode is used. */
544 default:
545 /* TODO: Find out why VOIDmode and BLKmode are passed. */
546 gcc_assert (mode == BLKmode || mode == VOIDmode);
547 return (regno % 4) == 0;
551 /* Implement `TARGET_HARD_REGNO_SCRATCH_OK'.
552 Returns true if REGNO is safe to be allocated as a scratch
553 register (for a define_peephole2) in the current function. */
555 static bool
556 pru_hard_regno_scratch_ok (unsigned int regno)
558 /* Don't allow hard registers that might be part of the frame pointer.
559 Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
560 and don't handle a frame pointer that spans more than one register.
561 TODO: Fix those faulty places. */
563 if ((!reload_completed || frame_pointer_needed)
564 && (IN_RANGE (regno, HARD_FRAME_POINTER_REGNUM,
565 HARD_FRAME_POINTER_REGNUM + 3)
566 || IN_RANGE (regno, FRAME_POINTER_REGNUM,
567 FRAME_POINTER_REGNUM + 3)))
568 return false;
570 return true;
574 /* Worker function for `HARD_REGNO_RENAME_OK'.
575 Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
578 pru_hard_regno_rename_ok (unsigned int old_reg,
579 unsigned int new_reg)
581 /* Don't allow hard registers that might be part of the frame pointer.
582 Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
583 and don't care for a frame pointer that spans more than one register.
584 TODO: Fix those faulty places. */
585 if ((!reload_completed || frame_pointer_needed)
586 && (IN_RANGE (old_reg, HARD_FRAME_POINTER_REGNUM,
587 HARD_FRAME_POINTER_REGNUM + 3)
588 || IN_RANGE (old_reg, FRAME_POINTER_REGNUM,
589 FRAME_POINTER_REGNUM + 3)
590 || IN_RANGE (new_reg, HARD_FRAME_POINTER_REGNUM,
591 HARD_FRAME_POINTER_REGNUM + 3)
592 || IN_RANGE (new_reg, FRAME_POINTER_REGNUM,
593 FRAME_POINTER_REGNUM + 3)))
594 return 0;
596 return 1;
599 /* Allocate a chunk of memory for per-function machine-dependent data. */
600 static struct machine_function *
601 pru_init_machine_status (void)
603 return ggc_cleared_alloc<machine_function> ();
606 /* Implement TARGET_OPTION_OVERRIDE. */
607 static void
608 pru_option_override (void)
610 #ifdef SUBTARGET_OVERRIDE_OPTIONS
611 SUBTARGET_OVERRIDE_OPTIONS;
612 #endif
614 /* Check for unsupported options. */
615 if (flag_pic == 1)
616 warning (OPT_fpic, "%<-fpic%> is not supported");
617 if (flag_pic == 2)
618 warning (OPT_fPIC, "%<-fPIC%> is not supported");
619 if (flag_pie == 1)
620 warning (OPT_fpie, "%<-fpie%> is not supported");
621 if (flag_pie == 2)
622 warning (OPT_fPIE, "%<-fPIE%> is not supported");
624 /* QBxx conditional branching cannot cope with block reordering. */
625 if (flag_reorder_blocks_and_partition)
627 inform (input_location, "%<-freorder-blocks-and-partition%> "
628 "not supported on this architecture");
629 flag_reorder_blocks_and_partition = 0;
630 flag_reorder_blocks = 1;
633 /* Function to allocate machine-dependent function status. */
634 init_machine_status = &pru_init_machine_status;
636 /* Save the initial options in case the user does function specific
637 options. */
638 target_option_default_node = target_option_current_node
639 = build_target_option_node (&global_options, &global_options_set);
642 /* Compute a (partial) cost for rtx X. Return true if the complete
643 cost has been computed, and false if subexpressions should be
644 scanned. In either case, *TOTAL contains the cost result. */
645 static bool
646 pru_rtx_costs (rtx x, machine_mode mode, int outer_code, int, int *total, bool)
648 const int code = GET_CODE (x);
650 switch (code)
652 case CONST_INT:
653 if ((mode == VOIDmode && UBYTE_INT (INTVAL (x)))
654 || (mode != VOIDmode && const_ubyte_operand (x, mode)))
656 *total = COSTS_N_INSNS (0);
657 return true;
659 else if ((mode == VOIDmode && UHWORD_INT (INTVAL (x)))
660 || (mode != VOIDmode && const_uhword_operand (x, mode)))
662 *total = COSTS_N_INSNS (1);
663 return true;
665 else if (outer_code == MEM && ctable_addr_operand (x, VOIDmode))
667 *total = COSTS_N_INSNS (0);
668 return true;
670 else
672 *total = COSTS_N_INSNS (2);
673 return true;
676 case LABEL_REF:
677 case SYMBOL_REF:
678 case CONST:
680 *total = COSTS_N_INSNS (1);
681 return true;
683 case CONST_DOUBLE:
685 *total = COSTS_N_INSNS (2);
686 return true;
688 case CONST_WIDE_INT:
690 /* PRU declares no vector or very large integer types. */
691 gcc_unreachable ();
692 return true;
694 case SET:
696 int factor;
698 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
699 the mode for the factor. */
700 mode = GET_MODE (SET_DEST (x));
702 /* SI move has the same cost as a QI move. Moves larger than
703 64 bits are costly. */
704 factor = CEIL (GET_MODE_SIZE (mode), GET_MODE_SIZE (SImode));
705 *total = factor * COSTS_N_INSNS (1);
707 return false;
710 case MULT:
712 /* Factor in that "mul" requires fixed registers, which
713 would likely require register moves. */
714 *total = COSTS_N_INSNS (7);
715 return false;
717 case PLUS:
719 rtx op0 = XEXP (x, 0);
720 rtx op1 = XEXP (x, 1);
721 machine_mode op1_mode = GET_MODE (op1);
723 /* Generic RTL address expressions do not enforce mode for
724 offsets, yet our UBYTE constraint requires it. Fix it here. */
725 if (op1_mode == VOIDmode && CONST_INT_P (op1) && outer_code == MEM)
726 op1_mode = Pmode;
727 if (outer_code == MEM
728 && ((REG_P (op0) && reg_or_ubyte_operand (op1, op1_mode))
729 || ctable_addr_operand (op0, VOIDmode)
730 || ctable_addr_operand (op1, VOIDmode)
731 || (ctable_base_operand (op0, VOIDmode) && REG_P (op1))
732 || (ctable_base_operand (op1, VOIDmode) && REG_P (op0))))
734 /* CTABLE or REG base addressing - PLUS comes for free. */
735 *total = COSTS_N_INSNS (0);
736 return true;
738 else
740 *total = COSTS_N_INSNS (1);
741 return false;
744 case SIGN_EXTEND:
746 *total = COSTS_N_INSNS (3);
747 return false;
749 case ASHIFTRT:
751 rtx op1 = XEXP (x, 1);
752 if (const_1_operand (op1, VOIDmode))
753 *total = COSTS_N_INSNS (3);
754 else
755 *total = COSTS_N_INSNS (7);
756 return false;
758 case ZERO_EXTRACT:
760 rtx op2 = XEXP (x, 2);
761 if ((outer_code == EQ || outer_code == NE)
762 && CONST_INT_P (op2)
763 && INTVAL (op2) == 1)
765 /* Branch if bit is set/clear is a single instruction. */
766 *total = COSTS_N_INSNS (0);
767 return true;
769 else
771 *total = COSTS_N_INSNS (2);
772 return false;
775 case ZERO_EXTEND:
777 /* 64-bit zero extensions actually have a cost because they
778 require setting a register to zero.
779 32-bit and smaller are free. */
780 int factor = (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (SImode)) ? 0 : 1;
781 *total = factor * COSTS_N_INSNS (1);
782 return false;
785 default:
787 /* PRU ALU is 32 bit, despite GCC's UNITS_PER_WORD=1. */
788 int factor = CEIL (GET_MODE_SIZE (mode), GET_MODE_SIZE (SImode));
789 *total = factor * COSTS_N_INSNS (1);
790 return false;
795 /* Calculate the cost of an addressing mode that contains ADDR.
796 ADDR must be a valid address. */
798 static int
799 pru_address_cost (rtx addr, machine_mode, addr_space_t as, bool)
801 if (as != ADDR_SPACE_GENERIC)
802 /* All currently implemented special address spaces for PRU
803 are much more efficient than generic memory I/O. */
804 return 0;
805 else if (ctable_addr_operand (addr, VOIDmode)
806 || (GET_CODE (addr) == PLUS
807 && ctable_base_operand (XEXP (addr, 1), VOIDmode)))
808 /* Using CTABLE instructions reduces register pressure,
809 so give it precedence. */
810 return 1;
811 else
812 /* Same two instructions (LBBO/SBBO) are used for any valid
813 addressing mode. */
814 return 2;
817 /* Insn costs on PRU are straightforward because:
818 - Insns emit 0, 1 or more instructions.
819 - All instructions are 32-bit length.
820 - All instructions execute in 1 cycle (sans memory access delays).
821 The "length" attribute maps nicely to the insn cost. */
823 static int
824 pru_insn_cost (rtx_insn *insn, bool speed)
826 /* Use generic cost calculation for unrecognized insns. */
827 if (recog_memoized (insn) < 0)
828 return pattern_cost (insn, speed);
830 unsigned int len = get_attr_length (insn);
832 gcc_assert ((len % 4) == 0);
834 int cost = COSTS_N_INSNS (len / 4);
835 /* Some insns have zero length (e.g. blockage, pruloop_end).
836 In such cases give the minimum cost, because a return of
837 0 would incorrectly indicate that the insn cost is unknown. */
838 if (cost == 0)
839 cost = 1;
841 /* Writes are usually posted, so they take 1 cycle. Reads
842 from DMEM usually take 3 cycles.
843 See TI document SPRACE8A, Device-Specific PRU Read Latency Values. */
844 if (speed && get_attr_type (insn) == TYPE_LD)
845 cost += COSTS_N_INSNS (2);
847 return cost;
850 static GTY(()) rtx eqdf_libfunc;
851 static GTY(()) rtx nedf_libfunc;
852 static GTY(()) rtx ledf_libfunc;
853 static GTY(()) rtx ltdf_libfunc;
854 static GTY(()) rtx gedf_libfunc;
855 static GTY(()) rtx gtdf_libfunc;
856 static GTY(()) rtx eqsf_libfunc;
857 static GTY(()) rtx nesf_libfunc;
858 static GTY(()) rtx lesf_libfunc;
859 static GTY(()) rtx ltsf_libfunc;
860 static GTY(()) rtx gesf_libfunc;
861 static GTY(()) rtx gtsf_libfunc;
863 /* Implement the TARGET_INIT_LIBFUNCS macro. We use this to rename library
864 functions to match the PRU ABI. */
866 static void
867 pru_init_libfuncs (void)
869 /* Double-precision floating-point arithmetic. */
870 set_optab_libfunc (add_optab, DFmode, "__pruabi_addd");
871 set_optab_libfunc (sdiv_optab, DFmode, "__pruabi_divd");
872 set_optab_libfunc (smul_optab, DFmode, "__pruabi_mpyd");
873 set_optab_libfunc (neg_optab, DFmode, "__pruabi_negd");
874 set_optab_libfunc (sub_optab, DFmode, "__pruabi_subd");
876 /* Single-precision floating-point arithmetic. */
877 set_optab_libfunc (add_optab, SFmode, "__pruabi_addf");
878 set_optab_libfunc (sdiv_optab, SFmode, "__pruabi_divf");
879 set_optab_libfunc (smul_optab, SFmode, "__pruabi_mpyf");
880 set_optab_libfunc (neg_optab, SFmode, "__pruabi_negf");
881 set_optab_libfunc (sub_optab, SFmode, "__pruabi_subf");
883 /* Floating-point comparisons. */
884 eqsf_libfunc = init_one_libfunc ("__pruabi_eqf");
885 nesf_libfunc = init_one_libfunc ("__pruabi_neqf");
886 lesf_libfunc = init_one_libfunc ("__pruabi_lef");
887 ltsf_libfunc = init_one_libfunc ("__pruabi_ltf");
888 gesf_libfunc = init_one_libfunc ("__pruabi_gef");
889 gtsf_libfunc = init_one_libfunc ("__pruabi_gtf");
890 eqdf_libfunc = init_one_libfunc ("__pruabi_eqd");
891 nedf_libfunc = init_one_libfunc ("__pruabi_neqd");
892 ledf_libfunc = init_one_libfunc ("__pruabi_led");
893 ltdf_libfunc = init_one_libfunc ("__pruabi_ltd");
894 gedf_libfunc = init_one_libfunc ("__pruabi_ged");
895 gtdf_libfunc = init_one_libfunc ("__pruabi_gtd");
897 /* In PRU ABI, much like other TI processors, floating point
898 comparisons return non-standard values. This quirk is handled
899 by disabling the optab library functions, and handling the
900 comparison during RTL expansion. */
901 set_optab_libfunc (eq_optab, SFmode, NULL);
902 set_optab_libfunc (ne_optab, SFmode, NULL);
903 set_optab_libfunc (gt_optab, SFmode, NULL);
904 set_optab_libfunc (ge_optab, SFmode, NULL);
905 set_optab_libfunc (lt_optab, SFmode, NULL);
906 set_optab_libfunc (le_optab, SFmode, NULL);
907 set_optab_libfunc (eq_optab, DFmode, NULL);
908 set_optab_libfunc (ne_optab, DFmode, NULL);
909 set_optab_libfunc (gt_optab, DFmode, NULL);
910 set_optab_libfunc (ge_optab, DFmode, NULL);
911 set_optab_libfunc (lt_optab, DFmode, NULL);
912 set_optab_libfunc (le_optab, DFmode, NULL);
914 /* The isunordered function appears to be supported only by GCC. */
915 set_optab_libfunc (unord_optab, SFmode, "__pruabi_unordf");
916 set_optab_libfunc (unord_optab, DFmode, "__pruabi_unordd");
918 /* Floating-point to integer conversions. */
919 set_conv_libfunc (sfix_optab, SImode, DFmode, "__pruabi_fixdi");
920 set_conv_libfunc (ufix_optab, SImode, DFmode, "__pruabi_fixdu");
921 set_conv_libfunc (sfix_optab, DImode, DFmode, "__pruabi_fixdlli");
922 set_conv_libfunc (ufix_optab, DImode, DFmode, "__pruabi_fixdull");
923 set_conv_libfunc (sfix_optab, SImode, SFmode, "__pruabi_fixfi");
924 set_conv_libfunc (ufix_optab, SImode, SFmode, "__pruabi_fixfu");
925 set_conv_libfunc (sfix_optab, DImode, SFmode, "__pruabi_fixflli");
926 set_conv_libfunc (ufix_optab, DImode, SFmode, "__pruabi_fixfull");
928 /* Conversions between floating types. */
929 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__pruabi_cvtdf");
930 set_conv_libfunc (sext_optab, DFmode, SFmode, "__pruabi_cvtfd");
932 /* Integer to floating-point conversions. */
933 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__pruabi_fltid");
934 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__pruabi_fltud");
935 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__pruabi_fltllid");
936 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__pruabi_fltulld");
937 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__pruabi_fltif");
938 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__pruabi_fltuf");
939 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__pruabi_fltllif");
940 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__pruabi_fltullf");
942 /* Long long. */
943 set_optab_libfunc (ashr_optab, DImode, "__pruabi_asrll");
944 set_optab_libfunc (smul_optab, DImode, "__pruabi_mpyll");
945 set_optab_libfunc (ashl_optab, DImode, "__pruabi_lslll");
946 set_optab_libfunc (lshr_optab, DImode, "__pruabi_lsrll");
948 set_optab_libfunc (sdiv_optab, SImode, "__pruabi_divi");
949 set_optab_libfunc (udiv_optab, SImode, "__pruabi_divu");
950 set_optab_libfunc (smod_optab, SImode, "__pruabi_remi");
951 set_optab_libfunc (umod_optab, SImode, "__pruabi_remu");
952 set_optab_libfunc (sdivmod_optab, SImode, "__pruabi_divremi");
953 set_optab_libfunc (udivmod_optab, SImode, "__pruabi_divremu");
954 set_optab_libfunc (sdiv_optab, DImode, "__pruabi_divlli");
955 set_optab_libfunc (udiv_optab, DImode, "__pruabi_divull");
956 set_optab_libfunc (smod_optab, DImode, "__pruabi_remlli");
957 set_optab_libfunc (umod_optab, DImode, "__pruabi_remull");
958 set_optab_libfunc (udivmod_optab, DImode, "__pruabi_divremull");
961 /* Given a comparison CODE, return a similar comparison but without
962 the "equals" condition. In other words, it strips GE/GEU/LE/LEU
963 and instead returns GT/GTU/LT/LTU. */
965 enum rtx_code
966 pru_noteq_condition (enum rtx_code code)
968 switch (code)
970 case GT: return GT;
971 case GTU: return GTU;
972 case GE: return GT;
973 case GEU: return GTU;
974 case LT: return LT;
975 case LTU: return LTU;
976 case LE: return LT;
977 case LEU: return LTU;
978 default:
979 gcc_unreachable ();
983 /* Emit comparison instruction if necessary, returning the expression
984 that holds the compare result in the proper mode. Return the comparison
985 that should be used in the jump insn. */
988 pru_expand_fp_compare (rtx comparison, machine_mode mode)
990 enum rtx_code code = GET_CODE (comparison);
991 rtx op0 = XEXP (comparison, 0);
992 rtx op1 = XEXP (comparison, 1);
993 rtx cmp;
994 enum rtx_code jump_code = code;
995 machine_mode op_mode = GET_MODE (op0);
996 rtx_insn *insns;
997 rtx libfunc;
999 gcc_assert (op_mode == DFmode || op_mode == SFmode);
1001 /* FP exceptions are not raised by PRU's softfp implementation. So the
1002 following transformations are safe. */
1003 if (code == UNGE)
1005 code = LT;
1006 jump_code = EQ;
1008 else if (code == UNLE)
1010 code = GT;
1011 jump_code = EQ;
1013 else
1014 jump_code = NE;
1016 switch (code)
1018 case EQ:
1019 libfunc = op_mode == DFmode ? eqdf_libfunc : eqsf_libfunc;
1020 break;
1021 case NE:
1022 libfunc = op_mode == DFmode ? nedf_libfunc : nesf_libfunc;
1023 break;
1024 case GT:
1025 libfunc = op_mode == DFmode ? gtdf_libfunc : gtsf_libfunc;
1026 break;
1027 case GE:
1028 libfunc = op_mode == DFmode ? gedf_libfunc : gesf_libfunc;
1029 break;
1030 case LT:
1031 libfunc = op_mode == DFmode ? ltdf_libfunc : ltsf_libfunc;
1032 break;
1033 case LE:
1034 libfunc = op_mode == DFmode ? ledf_libfunc : lesf_libfunc;
1035 break;
1036 default:
1037 gcc_unreachable ();
1039 start_sequence ();
1041 cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode,
1042 op0, op_mode, op1, op_mode);
1043 insns = get_insns ();
1044 end_sequence ();
1046 emit_libcall_block (insns, cmp, cmp,
1047 gen_rtx_fmt_ee (code, SImode, op0, op1));
1049 return gen_rtx_fmt_ee (jump_code, mode, cmp, const0_rtx);
1052 /* Return the sign bit position for given OP's mode. */
1053 static int
1054 sign_bit_position (const rtx op)
1056 const int sz = GET_MODE_SIZE (GET_MODE (op));
1058 return sz * 8 - 1;
1061 /* Parse the given CVAL integer value, and extract the "filling" byte
1062 range of consecutive 0xff byte values. Rest of bytes must be 0x00.
1063 There must be only one range in the given value. This range would
1064 typically be used to calculate the parameters of
1065 PRU instructions ZERO and FILL.
1067 The parameter MODE determines the maximum byte range to consider
1068 in the given input constant.
1070 Example input:
1071 cval = 0xffffffffffffff00 = -256
1072 mode = SImode
1073 Return value:
1074 start = 1
1075 nbytes = 3
1077 On error, return a range with -1 for START and NBYTES. */
1078 pru_byterange
1079 pru_calc_byterange (HOST_WIDE_INT cval, machine_mode mode)
1081 const pru_byterange invalid_range = { -1, -1 };
1082 pru_byterange r = invalid_range;
1083 enum { ST_FFS, ST_INRANGE, ST_TRAILING_ZEROS } st = ST_FFS;
1084 int i;
1086 for (i = 0; i < GET_MODE_SIZE (mode); i++)
1088 const int b = cval & ((1U << BITS_PER_UNIT) - 1);
1089 cval >>= BITS_PER_UNIT;
1091 if (b == 0x00 && (st == ST_FFS || st == ST_TRAILING_ZEROS))
1092 /* No action. */;
1093 else if (b == 0x00 && st == ST_INRANGE)
1094 st = ST_TRAILING_ZEROS;
1095 else if (b == 0xff && st == ST_FFS)
1097 st = ST_INRANGE;
1098 r.start = i;
1099 r.nbytes = 1;
1101 else if (b == 0xff && st == ST_INRANGE)
1102 r.nbytes++;
1103 else
1104 return invalid_range;
1107 if (st != ST_TRAILING_ZEROS && st != ST_INRANGE)
1108 return invalid_range;
1109 return r;
1112 /* Branches and compares. */
1114 /* PRU's ALU does not support signed comparison operations. That's why we
1115 emulate them. By first checking the sign bit and handling every possible
1116 operand sign combination, we can simulate signed comparisons in just
1117 5 instructions. See table below.
1119 .-------------------.---------------------------------------------------.
1120 | Operand sign bit | Mapping the signed comparison to an unsigned one |
1121 |---------+---------+------------+------------+------------+------------|
1122 | OP1.b31 | OP2.b31 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1123 |---------+---------+------------+------------+------------+------------|
1124 | 0 | 0 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1125 |---------+---------+------------+------------+------------+------------|
1126 | 0 | 1 | false | false | true | true |
1127 |---------+---------+------------+------------+------------+------------|
1128 | 1 | 0 | true | true | false | false |
1129 |---------+---------+------------+------------+------------+------------|
1130 | 1 | 1 | OP1 < OP2 | OP1 <= OP2 | OP1 > OP2 | OP1 >= OP2 |
1131 `---------'---------'------------'------------'------------+------------'
1134 Given the table above, here is an example for a concrete op:
1136 qbbc OP1_POS, OP1, 31
1137 OP1_NEG: qbbc BRANCH_TAKEN_LABEL, OP2, 31
1138 OP1_NEG_OP2_NEG: qblt BRANCH_TAKEN_LABEL, OP2, OP1
1139 ; jmp OUT -> can be eliminated because we'll take the
1140 ; following branch. OP2.b31 is guaranteed to be 1
1141 ; by the time we get here.
1142 OP1_POS: qbbs OUT, OP2, 31
1143 OP1_POS_OP2_POS: qblt BRANCH_TAKEN_LABEL, OP2, OP1
1144 #if FAR_JUMP
1145 jmp OUT
1146 BRANCH_TAKEN_LABEL: jmp REAL_BRANCH_TAKEN_LABEL
1147 #endif
1148 OUT:
1152 /* Output asm code for a signed-compare LT/LE conditional branch. */
1153 static const char *
1154 pru_output_ltle_signed_cbranch (rtx *operands, bool is_near)
1156 static char buf[1024];
1157 enum rtx_code code = GET_CODE (operands[0]);
1158 rtx op1;
1159 rtx op2;
1160 const char *cmp_opstr;
1161 int bufi = 0;
1163 op1 = operands[1];
1164 op2 = operands[2];
1166 gcc_assert (GET_CODE (op1) == REG && GET_CODE (op2) == REG);
1168 /* Determine the comparison operators for positive and negative operands. */
1169 if (code == LT)
1170 cmp_opstr = "qblt";
1171 else if (code == LE)
1172 cmp_opstr = "qble";
1173 else
1174 gcc_unreachable ();
1176 if (is_near)
1177 bufi = snprintf (buf, sizeof (buf),
1178 "qbbc\t.+12, %%1, %d\n\t"
1179 "qbbc\t%%l3, %%2, %d\n\t" /* OP1_NEG. */
1180 "%s\t%%l3, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1181 "qbbs\t.+8, %%2, %d\n\t" /* OP1_POS. */
1182 "%s\t%%l3, %%2, %%1", /* OP1_POS_OP2_POS. */
1183 sign_bit_position (op1),
1184 sign_bit_position (op2),
1185 cmp_opstr,
1186 sign_bit_position (op2),
1187 cmp_opstr);
1188 else
1189 bufi = snprintf (buf, sizeof (buf),
1190 "qbbc\t.+12, %%1, %d\n\t"
1191 "qbbc\t.+20, %%2, %d\n\t" /* OP1_NEG. */
1192 "%s\t.+16, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1193 "qbbs\t.+16, %%2, %d\n\t" /* OP1_POS. */
1194 "%s\t.+8, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1195 "jmp\t.+8\n\t" /* jmp OUT. */
1196 "jmp\t%%%%label(%%l3)", /* BRANCH_TAKEN_LABEL. */
1197 sign_bit_position (op1),
1198 sign_bit_position (op2),
1199 cmp_opstr,
1200 sign_bit_position (op2),
1201 cmp_opstr);
1203 gcc_assert (bufi > 0);
1204 gcc_assert ((unsigned int) bufi < sizeof (buf));
1206 return buf;
1209 /* Output asm code for a signed-compare GT/GE conditional branch. */
1210 static const char *
1211 pru_output_gtge_signed_cbranch (rtx *operands, bool is_near)
1213 static char buf[1024];
1214 enum rtx_code code = GET_CODE (operands[0]);
1215 rtx op1;
1216 rtx op2;
1217 const char *cmp_opstr;
1218 int bufi = 0;
1220 op1 = operands[1];
1221 op2 = operands[2];
1223 gcc_assert (GET_CODE (op1) == REG && GET_CODE (op2) == REG);
1225 /* Determine the comparison operators for positive and negative operands. */
1226 if (code == GT)
1227 cmp_opstr = "qbgt";
1228 else if (code == GE)
1229 cmp_opstr = "qbge";
1230 else
1231 gcc_unreachable ();
1233 if (is_near)
1234 bufi = snprintf (buf, sizeof (buf),
1235 "qbbs\t.+12, %%1, %d\n\t"
1236 "qbbs\t%%l3, %%2, %d\n\t" /* OP1_POS. */
1237 "%s\t%%l3, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1238 "qbbc\t.+8, %%2, %d\n\t" /* OP1_NEG. */
1239 "%s\t%%l3, %%2, %%1", /* OP1_NEG_OP2_NEG. */
1240 sign_bit_position (op1),
1241 sign_bit_position (op2),
1242 cmp_opstr,
1243 sign_bit_position (op2),
1244 cmp_opstr);
1245 else
1246 bufi = snprintf (buf, sizeof (buf),
1247 "qbbs\t.+12, %%1, %d\n\t"
1248 "qbbs\t.+20, %%2, %d\n\t" /* OP1_POS. */
1249 "%s\t.+16, %%2, %%1\n\t" /* OP1_POS_OP2_POS. */
1250 "qbbc\t.+16, %%2, %d\n\t" /* OP1_NEG. */
1251 "%s\t.+8, %%2, %%1\n\t" /* OP1_NEG_OP2_NEG. */
1252 "jmp\t.+8\n\t" /* jmp OUT. */
1253 "jmp\t%%%%label(%%l3)", /* BRANCH_TAKEN_LABEL. */
1254 sign_bit_position (op1),
1255 sign_bit_position (op2),
1256 cmp_opstr,
1257 sign_bit_position (op2),
1258 cmp_opstr);
1260 gcc_assert (bufi > 0);
1261 gcc_assert ((unsigned int) bufi < sizeof (buf));
1263 return buf;
1266 /* Output asm code for a signed-compare conditional branch.
1268 If IS_NEAR is true, then QBBx instructions may be used for reaching
1269 the destination label. Otherwise JMP is used, at the expense of
1270 increased code size. */
1271 const char *
1272 pru_output_signed_cbranch (rtx *operands, bool is_near)
1274 enum rtx_code code = GET_CODE (operands[0]);
1276 if (code == LT || code == LE)
1277 return pru_output_ltle_signed_cbranch (operands, is_near);
1278 else if (code == GT || code == GE)
1279 return pru_output_gtge_signed_cbranch (operands, is_near);
1280 else
1281 gcc_unreachable ();
1284 /* Optimized version of pru_output_signed_cbranch for constant second
1285 operand. */
1287 const char *
1288 pru_output_signed_cbranch_ubyteop2 (rtx *operands, bool is_near)
1290 static char buf[1024];
1291 enum rtx_code code = GET_CODE (operands[0]);
1292 int regop_sign_bit_pos = sign_bit_position (operands[1]);
1293 const char *cmp_opstr;
1294 const char *rcmp_opstr;
1296 /* We must swap operands due to PRU's demand OP1 to be the immediate. */
1297 code = swap_condition (code);
1299 /* Determine normal and reversed comparison operators for both positive
1300 operands. This enables us to go completely unsigned.
1302 NOTE: We cannot use the R print modifier because we convert signed
1303 comparison operators to unsigned ones. */
1304 switch (code)
1306 case LT: cmp_opstr = "qblt"; rcmp_opstr = "qbge"; break;
1307 case LE: cmp_opstr = "qble"; rcmp_opstr = "qbgt"; break;
1308 case GT: cmp_opstr = "qbgt"; rcmp_opstr = "qble"; break;
1309 case GE: cmp_opstr = "qbge"; rcmp_opstr = "qblt"; break;
1310 default: gcc_unreachable ();
1313 /* OP2 is a constant unsigned byte - utilize this info to generate
1314 optimized code. We can "remove half" of the op table above because
1315 we know that OP2.b31 = 0 (remember that 0 <= OP2 <= 255). */
1316 if (code == LT || code == LE)
1318 if (is_near)
1319 snprintf (buf, sizeof (buf),
1320 "qbbs\t.+8, %%1, %d\n\t"
1321 "%s\t%%l3, %%1, %%u2",
1322 regop_sign_bit_pos,
1323 cmp_opstr);
1324 else
1325 snprintf (buf, sizeof (buf),
1326 "qbbs\t.+12, %%1, %d\n\t"
1327 "%s\t.+8, %%1, %%u2\n\t"
1328 "jmp\t%%%%label(%%l3)",
1329 regop_sign_bit_pos,
1330 rcmp_opstr);
1332 else if (code == GT || code == GE)
1334 if (is_near)
1335 snprintf (buf, sizeof (buf),
1336 "qbbs\t%%l3, %%1, %d\n\t"
1337 "%s\t%%l3, %%1, %%u2",
1338 regop_sign_bit_pos,
1339 cmp_opstr);
1340 else
1341 snprintf (buf, sizeof (buf),
1342 "qbbs\t.+8, %%1, %d\n\t"
1343 "%s\t.+8, %%1, %%u2\n\t"
1344 "jmp\t%%%%label(%%l3)",
1345 regop_sign_bit_pos,
1346 rcmp_opstr);
1348 else
1349 gcc_unreachable ();
1351 return buf;
1354 /* Optimized version of pru_output_signed_cbranch_ubyteop2 for constant
1355 zero second operand. */
1357 const char *
1358 pru_output_signed_cbranch_zeroop2 (rtx *operands, bool is_near)
1360 static char buf[1024];
1361 enum rtx_code code = GET_CODE (operands[0]);
1362 int regop_sign_bit_pos = sign_bit_position (operands[1]);
1364 /* OP2 is a constant zero - utilize this info to simply check the
1365 OP1 sign bit when comparing for LT or GE. */
1366 if (code == LT)
1368 if (is_near)
1369 snprintf (buf, sizeof (buf),
1370 "qbbs\t%%l3, %%1, %d\n\t",
1371 regop_sign_bit_pos);
1372 else
1373 snprintf (buf, sizeof (buf),
1374 "qbbc\t.+8, %%1, %d\n\t"
1375 "jmp\t%%%%label(%%l3)",
1376 regop_sign_bit_pos);
1378 else if (code == GE)
1380 if (is_near)
1381 snprintf (buf, sizeof (buf),
1382 "qbbc\t%%l3, %%1, %d\n\t",
1383 regop_sign_bit_pos);
1384 else
1385 snprintf (buf, sizeof (buf),
1386 "qbbs\t.+8, %%1, %d\n\t"
1387 "jmp\t%%%%label(%%l3)",
1388 regop_sign_bit_pos);
1390 else
1391 gcc_unreachable ();
1393 return buf;
1396 /* Addressing Modes. */
1398 /* Return true if register REGNO is a valid base register.
1399 STRICT_P is true if REG_OK_STRICT is in effect. */
1401 bool
1402 pru_regno_ok_for_base_p (int regno, bool strict_p)
1404 if (!HARD_REGISTER_NUM_P (regno) && !strict_p)
1405 return true;
1407 /* The fake registers will be eliminated to either the stack or
1408 hard frame pointer, both of which are usually valid base registers.
1409 Reload deals with the cases where the eliminated form isn't valid. */
1410 return (GP_REG_P (regno)
1411 || regno == FRAME_POINTER_REGNUM
1412 || regno == ARG_POINTER_REGNUM);
1415 /* Return true if given xbbo constant OFFSET is valid. */
1416 static bool
1417 pru_valid_const_ubyte_offset (machine_mode mode, HOST_WIDE_INT offset)
1419 bool valid = UBYTE_INT (offset);
1421 /* Reload can split multi word accesses, so make sure we can address
1422 the second word in a DI. */
1423 if (valid && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode))
1424 valid = UBYTE_INT (offset + GET_MODE_SIZE (mode) - 1);
1426 return valid;
1429 /* Recognize a CTABLE base address. Return CTABLE entry index, or -1 if
1430 base was not found in the pragma-filled pru_ctable. */
1432 pru_get_ctable_exact_base_index (unsigned HOST_WIDE_INT caddr)
1434 unsigned int i;
1436 for (i = 0; i < ARRAY_SIZE (pru_ctable); i++)
1438 if (pru_ctable[i].valid && pru_ctable[i].base == caddr)
1439 return i;
1441 return -1;
1445 /* Check if the given address can be addressed via CTABLE_BASE + UBYTE_OFFS,
1446 and return the base CTABLE index if possible. */
1448 pru_get_ctable_base_index (unsigned HOST_WIDE_INT caddr)
1450 unsigned int i;
1452 for (i = 0; i < ARRAY_SIZE (pru_ctable); i++)
1454 if (pru_ctable[i].valid && IN_RANGE (caddr,
1455 pru_ctable[i].base,
1456 pru_ctable[i].base + 0xff))
1457 return i;
1459 return -1;
1463 /* Return the offset from some CTABLE base for this address. */
1465 pru_get_ctable_base_offset (unsigned HOST_WIDE_INT caddr)
1467 int i;
1469 i = pru_get_ctable_base_index (caddr);
1470 gcc_assert (i >= 0);
1472 return caddr - pru_ctable[i].base;
1475 /* Return true if the address expression formed by BASE + OFFSET is
1476 valid.
1478 Note that the following address is not handled here:
1479 base CTABLE constant base + UBYTE constant offset
1480 The constants will be folded. The ctable_addr_operand predicate will take
1481 care of the validation. The CTABLE base+offset split will happen during
1482 operand printing. */
1483 static bool
1484 pru_valid_addr_expr_p (machine_mode mode, rtx base, rtx offset, bool strict_p)
1486 if (!strict_p && GET_CODE (base) == SUBREG)
1487 base = SUBREG_REG (base);
1488 if (!strict_p && GET_CODE (offset) == SUBREG)
1489 offset = SUBREG_REG (offset);
1491 if (REG_P (base)
1492 && pru_regno_ok_for_base_p (REGNO (base), strict_p)
1493 && ((CONST_INT_P (offset)
1494 && pru_valid_const_ubyte_offset (mode, INTVAL (offset)))
1495 || (REG_P (offset)
1496 && pru_regno_ok_for_index_p (REGNO (offset), strict_p))))
1497 /* base register + register offset
1498 * OR base register + UBYTE constant offset. */
1499 return true;
1500 else if (REG_P (base)
1501 && pru_regno_ok_for_index_p (REGNO (base), strict_p)
1502 && ctable_base_operand (offset, VOIDmode))
1503 /* base CTABLE constant base + register offset
1504 * Note: GCC always puts the register as a first operand of PLUS. */
1505 return true;
1506 else
1507 return false;
1510 /* Return register number (either for r30 or r31) which maps to the
1511 corresponding symbol OP's name in the __regio_symbol address namespace.
1513 If no mapping can be established (i.e. symbol name is invalid), then
1514 return -1. */
1515 int pru_symref2ioregno (rtx op)
1517 if (!SYMBOL_REF_P (op))
1518 return -1;
1520 const char *name = XSTR (op, 0);
1521 if (!strcmp (name, "__R30"))
1522 return R30_REGNUM;
1523 else if (!strcmp (name, "__R31"))
1524 return R31_REGNUM;
1525 else
1526 return -1;
1529 /* Implement TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P. */
1530 static bool
1531 pru_addr_space_legitimate_address_p (machine_mode mode, rtx operand,
1532 bool strict_p, addr_space_t as,
1533 code_helper = ERROR_MARK)
1535 if (as == ADDR_SPACE_REGIO)
1537 /* Address space constraints for __regio_symbol have been checked in
1538 TARGET_INSERT_ATTRIBUTES, and some more checks will be done
1539 during RTL expansion of "mov<mode>". */
1540 return true;
1542 else if (as != ADDR_SPACE_GENERIC)
1544 gcc_unreachable ();
1547 switch (GET_CODE (operand))
1549 /* Direct. */
1550 case SYMBOL_REF:
1551 case LABEL_REF:
1552 case CONST:
1553 case CONST_WIDE_INT:
1554 return false;
1556 case CONST_INT:
1557 return ctable_addr_operand (operand, VOIDmode);
1559 /* Register indirect. */
1560 case REG:
1561 return pru_regno_ok_for_base_p (REGNO (operand), strict_p);
1563 /* Register indirect with displacement. */
1564 case PLUS:
1566 rtx op0 = XEXP (operand, 0);
1567 rtx op1 = XEXP (operand, 1);
1569 return pru_valid_addr_expr_p (mode, op0, op1, strict_p);
1572 default:
1573 break;
1575 return false;
1578 /* Output assembly language related definitions. */
1580 /* Implement TARGET_ASM_CONSTRUCTOR. */
1581 static void
1582 pru_elf_asm_constructor (rtx symbol, int priority)
1584 char buf[23];
1585 section *s;
1587 if (priority == DEFAULT_INIT_PRIORITY)
1588 snprintf (buf, sizeof (buf), ".init_array");
1589 else
1591 /* While priority is known to be in range [0, 65535], so 18 bytes
1592 would be enough, the compiler might not know that. To avoid
1593 -Wformat-truncation false positive, use a larger size. */
1594 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
1596 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
1597 switch_to_section (s);
1598 assemble_aligned_integer (INIT_ARRAY_ENTRY_BYTES, symbol);
1601 /* Implement TARGET_ASM_DESTRUCTOR. */
1602 static void
1603 pru_elf_asm_destructor (rtx symbol, int priority)
1605 char buf[23];
1606 section *s;
1608 if (priority == DEFAULT_INIT_PRIORITY)
1609 snprintf (buf, sizeof (buf), ".fini_array");
1610 else
1612 /* While priority is known to be in range [0, 65535], so 18 bytes
1613 would be enough, the compiler might not know that. To avoid
1614 -Wformat-truncation false positive, use a larger size. */
1615 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
1617 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
1618 switch_to_section (s);
1619 assemble_aligned_integer (INIT_ARRAY_ENTRY_BYTES, symbol);
1622 /* Map rtx_code to unsigned PRU branch op suffix. Callers must
1623 handle sign comparison themselves for signed operations. */
1624 static const char *
1625 pru_comparison_str (enum rtx_code cond)
1627 switch (cond)
1629 case NE: return "ne";
1630 case EQ: return "eq";
1631 case GEU: return "ge";
1632 case GTU: return "gt";
1633 case LEU: return "le";
1634 case LTU: return "lt";
1635 default: gcc_unreachable ();
1639 /* Access some RTX as INT_MODE. If X is a CONST_FIXED we can get
1640 the bit representation of X by "casting" it to CONST_INT. */
1642 static rtx
1643 pru_to_int_mode (rtx x)
1645 machine_mode mode = GET_MODE (x);
1647 return VOIDmode == mode
1649 : simplify_gen_subreg (int_mode_for_mode (mode).require (), x, mode, 0);
1652 /* Translate between the MachineDescription notion
1653 of 8-bit consecutive registers, to the PRU
1654 assembler syntax of REGWORD[.SUBREG]. */
1655 static const char *
1656 pru_asm_regname (rtx op)
1658 static char canon_reg_names[3][LAST_GP_REGNUM][8];
1659 int speci, regi;
1661 gcc_assert (REG_P (op));
1663 if (!canon_reg_names[0][0][0])
1665 for (regi = 0; regi < LAST_GP_REGNUM; regi++)
1666 for (speci = 0; speci < 3; speci++)
1668 const int sz = (speci == 0) ? 1 : ((speci == 1) ? 2 : 4);
1669 if ((regi + sz) > (32 * 4))
1670 continue; /* Invalid entry. */
1672 /* Construct the lookup table. */
1673 const char *suffix = "";
1675 switch ((sz << 8) | (regi % 4))
1677 case (1 << 8) | 0: suffix = ".b0"; break;
1678 case (1 << 8) | 1: suffix = ".b1"; break;
1679 case (1 << 8) | 2: suffix = ".b2"; break;
1680 case (1 << 8) | 3: suffix = ".b3"; break;
1681 case (2 << 8) | 0: suffix = ".w0"; break;
1682 case (2 << 8) | 1: suffix = ".w1"; break;
1683 case (2 << 8) | 2: suffix = ".w2"; break;
1684 case (4 << 8) | 0: suffix = ""; break;
1685 default:
1686 /* Invalid entry. */
1687 continue;
1689 sprintf (&canon_reg_names[speci][regi][0],
1690 "r%d%s", regi / 4, suffix);
1694 switch (GET_MODE_SIZE (GET_MODE (op)))
1696 case 1: speci = 0; break;
1697 case 2: speci = 1; break;
1698 case 4: speci = 2; break;
1699 case 8: speci = 2; break; /* Existing GCC test cases are not using %F. */
1700 default: gcc_unreachable ();
1702 regi = REGNO (op);
1703 gcc_assert (regi < LAST_GP_REGNUM);
1704 gcc_assert (canon_reg_names[speci][regi][0]);
1706 return &canon_reg_names[speci][regi][0];
1709 /* Print the operand OP to file stream FILE modified by LETTER.
1710 LETTER can be one of:
1712 b: prints the register byte start (used by LBBO/SBBO).
1713 B: prints 'c' or 'b' for CTABLE or REG base in a memory address.
1714 F: Full 32-bit register.
1715 H: Higher 16-bits of a const_int operand.
1716 L: Lower 16-bits of a const_int operand.
1717 N: prints next 32-bit register (upper 32bits of a 64bit REG couple).
1718 P: prints swapped condition.
1719 Q: prints swapped and reversed condition.
1720 R: prints reversed condition.
1721 S: print operand mode size (but do not print the operand itself).
1722 T: print exact_log2 () for const_int operands.
1723 u: print QI constant integer as unsigned. No transformation for regs.
1724 V: print exact_log2 () of negated const_int operands.
1725 w: Lower 32-bits of a const_int operand.
1726 W: Upper 32-bits of a const_int operand.
1728 static void
1729 pru_print_operand (FILE *file, rtx op, int letter)
1731 switch (letter)
1733 case 'S':
1734 fprintf (file, "%d", GET_MODE_SIZE (GET_MODE (op)));
1735 return;
1737 default:
1738 break;
1741 if (comparison_operator (op, VOIDmode))
1743 enum rtx_code cond = GET_CODE (op);
1744 gcc_assert (!pru_signed_cmp_operator (op, VOIDmode));
1746 switch (letter)
1748 case 0:
1749 fprintf (file, "%s", pru_comparison_str (cond));
1750 return;
1751 case 'P':
1752 fprintf (file, "%s", pru_comparison_str (swap_condition (cond)));
1753 return;
1754 case 'Q':
1755 cond = swap_condition (cond);
1756 /* Fall through. */
1757 case 'R':
1758 fprintf (file, "%s", pru_comparison_str (reverse_condition (cond)));
1759 return;
1763 switch (GET_CODE (op))
1765 case REG:
1766 if (letter == 0 || letter == 'u')
1768 fprintf (file, "%s", pru_asm_regname (op));
1769 return;
1771 else if (letter == 'b')
1773 if (REGNO (op) > LAST_NONIO_GP_REGNUM)
1775 output_operand_lossage ("I/O register operand for '%%%c'",
1776 letter);
1777 return;
1779 fprintf (file, "r%d.b%d", REGNO (op) / 4, REGNO (op) % 4);
1780 return;
1782 else if (letter == 'F' || letter == 'N')
1784 if (REGNO (op) > LAST_NONIO_GP_REGNUM - 1)
1786 output_operand_lossage ("I/O register operand for '%%%c'",
1787 letter);
1788 return;
1790 if (REGNO (op) % 4 != 0)
1792 output_operand_lossage ("non 32 bit register operand for '%%%c'",
1793 letter);
1794 return;
1796 fprintf (file, "r%d", REGNO (op) / 4 + (letter == 'N' ? 1 : 0));
1797 return;
1799 break;
1801 case CONST_INT:
1802 if (letter == 'H')
1804 HOST_WIDE_INT val = INTVAL (op);
1805 val = (val >> 16) & 0xFFFF;
1806 output_addr_const (file, gen_int_mode (val, SImode));
1807 return;
1809 else if (letter == 'L')
1811 HOST_WIDE_INT val = INTVAL (op);
1812 val &= 0xFFFF;
1813 output_addr_const (file, gen_int_mode (val, SImode));
1814 return;
1816 else if (letter == 'T')
1818 /* The predicate should have already validated the 1-high-bit
1819 requirement. Use CTZ here to deal with constant's sign
1820 extension. */
1821 HOST_WIDE_INT val = wi::ctz (INTVAL (op));
1822 if (val < 0 || val > 31)
1824 output_operand_lossage ("invalid operand for '%%%c'", letter);
1825 return;
1827 output_addr_const (file, gen_int_mode (val, SImode));
1828 return;
1830 else if (letter == 'V')
1832 HOST_WIDE_INT val = wi::ctz (~INTVAL (op));
1833 if (val < 0 || val > 31)
1835 output_operand_lossage ("invalid operand for '%%%c'", letter);
1836 return;
1838 output_addr_const (file, gen_int_mode (val, SImode));
1839 return;
1841 else if (letter == 'w')
1843 HOST_WIDE_INT val = INTVAL (op) & 0xffffffff;
1844 output_addr_const (file, gen_int_mode (val, SImode));
1845 return;
1847 else if (letter == 'W')
1849 HOST_WIDE_INT val = (INTVAL (op) >> 32) & 0xffffffff;
1850 output_addr_const (file, gen_int_mode (val, SImode));
1851 return;
1853 else if (letter == 'u')
1855 /* Workaround GCC's representation of QI constants in sign-extended
1856 form, and PRU's assembler insistence on unsigned constant
1857 integers. See the notes about O constraint. */
1858 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) & 0xff);
1859 return;
1861 /* Else, fall through. */
1863 case CONST:
1864 case LABEL_REF:
1865 case SYMBOL_REF:
1866 if (letter == 0)
1868 output_addr_const (file, op);
1869 return;
1871 break;
1873 case CONST_FIXED:
1875 HOST_WIDE_INT ival = INTVAL (pru_to_int_mode (op));
1876 if (letter != 0)
1877 output_operand_lossage ("unsupported code '%c' for fixed-point:",
1878 letter);
1879 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
1880 return;
1882 break;
1884 case CONST_DOUBLE:
1885 if (letter == 0)
1887 long val;
1889 if (GET_MODE (op) != SFmode)
1891 output_operand_lossage ("double constants not supported");
1892 return;
1894 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), val);
1895 fprintf (file, "0x%lx", val);
1896 return;
1898 else if (letter == 'w' || letter == 'W')
1900 long t[2];
1901 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), t);
1902 fprintf (file, "0x%lx", t[letter == 'w' ? 0 : 1]);
1903 return;
1905 else
1907 output_operand_lossage ("invalid operand for '%%%c'", letter);
1908 return;
1910 break;
1912 case SUBREG:
1913 /* Subregs should not appear at so late stage. */
1914 gcc_unreachable ();
1915 break;
1917 case MEM:
1918 if (letter == 0)
1920 output_address (VOIDmode, op);
1921 return;
1923 else if (letter == 'B')
1925 rtx base = XEXP (op, 0);
1926 if (GET_CODE (base) == PLUS)
1928 rtx op0 = XEXP (base, 0);
1929 rtx op1 = XEXP (base, 1);
1931 /* PLUS cannot have two constant operands, so first one
1932 of them must be a REG, hence we must check for an
1933 exact base address. */
1934 if (ctable_base_operand (op1, VOIDmode))
1936 fprintf (file, "c");
1937 return;
1939 else if (REG_P (op0))
1941 fprintf (file, "b");
1942 return;
1944 else
1945 gcc_unreachable ();
1947 else if (REG_P (base))
1949 fprintf (file, "b");
1950 return;
1952 else if (ctable_addr_operand (base, VOIDmode))
1954 fprintf (file, "c");
1955 return;
1957 else
1958 gcc_unreachable ();
1960 break;
1962 case CODE_LABEL:
1963 if (letter == 0)
1965 output_addr_const (file, op);
1966 return;
1968 break;
1970 default:
1971 break;
1974 output_operand_lossage ("unsupported operand %s for code '%c'",
1975 GET_RTX_NAME (GET_CODE (op)), letter);
1978 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
1979 static void
1980 pru_print_operand_address (FILE *file, machine_mode mode, rtx op)
1982 if (CONSTANT_ADDRESS_P (op) && text_segment_operand (op, VOIDmode))
1984 output_operand_lossage ("unexpected text address:");
1985 return;
1988 switch (GET_CODE (op))
1990 case CONST:
1991 case LABEL_REF:
1992 case CONST_WIDE_INT:
1993 case SYMBOL_REF:
1994 break;
1996 case CONST_INT:
1998 unsigned HOST_WIDE_INT caddr = INTVAL (op);
1999 int base = pru_get_ctable_base_index (caddr);
2000 int offs = pru_get_ctable_base_offset (caddr);
2001 if (base < 0)
2003 output_operand_lossage ("unsupported constant address:");
2004 return;
2006 fprintf (file, "%d, %d", base, offs);
2007 return;
2009 break;
2011 case PLUS:
2013 int base;
2014 rtx op0 = XEXP (op, 0);
2015 rtx op1 = XEXP (op, 1);
2017 if (REG_P (op0) && CONST_INT_P (op1)
2018 && pru_get_ctable_exact_base_index (INTVAL (op1)) >= 0)
2020 base = pru_get_ctable_exact_base_index (INTVAL (op1));
2021 fprintf (file, "%d, %s", base, pru_asm_regname (op0));
2022 return;
2024 else if (REG_P (op1) && CONST_INT_P (op0)
2025 && pru_get_ctable_exact_base_index (INTVAL (op0)) >= 0)
2027 /* Not a valid RTL. */
2028 gcc_unreachable ();
2030 else if (REG_P (op0) && CONSTANT_P (op1))
2032 fprintf (file, "%s, ", pru_asm_regname (op0));
2033 output_addr_const (file, op1);
2034 return;
2036 else if (REG_P (op1) && CONSTANT_P (op0))
2038 /* Not a valid RTL. */
2039 gcc_unreachable ();
2041 else if (REG_P (op1) && REG_P (op0))
2043 fprintf (file, "%s, %s", pru_asm_regname (op0),
2044 pru_asm_regname (op1));
2045 return;
2048 break;
2050 case REG:
2051 fprintf (file, "%s, 0", pru_asm_regname (op));
2052 return;
2054 case MEM:
2056 rtx base = XEXP (op, 0);
2057 pru_print_operand_address (file, mode, base);
2058 return;
2060 default:
2061 break;
2064 output_operand_lossage ("unsupported memory expression:");
2067 /* Implement TARGET_ASM_FUNCTION_PROLOGUE. */
2068 static void
2069 pru_asm_function_prologue (FILE *file)
2071 if (flag_verbose_asm || flag_debug_asm)
2072 pru_dump_frame_layout (file);
2075 /* Implement `TARGET_ASM_INTEGER'.
2076 Target hook for assembling integer objects. PRU version needs
2077 special handling for references to pmem. Code copied from AVR. */
2079 static bool
2080 pru_assemble_integer (rtx x, unsigned int size, int aligned_p)
2082 if (size == POINTER_SIZE / BITS_PER_UNIT
2083 && aligned_p
2084 && text_segment_operand (x, VOIDmode))
2086 fputs ("\t.4byte\t%pmem(", asm_out_file);
2087 output_addr_const (asm_out_file, x);
2088 fputs (")\n", asm_out_file);
2090 return true;
2092 else if (size == INIT_ARRAY_ENTRY_BYTES
2093 && aligned_p
2094 && text_segment_operand (x, VOIDmode))
2096 fputs ("\t.2byte\t%pmem(", asm_out_file);
2097 output_addr_const (asm_out_file, x);
2098 fputs (")\n", asm_out_file);
2100 return true;
2102 else
2104 return default_assemble_integer (x, size, aligned_p);
2108 /* Implement TARGET_SECTION_TYPE_FLAGS. */
2110 static unsigned int
2111 pru_section_type_flags (tree decl, const char *name, int reloc)
2113 unsigned int flags = default_section_type_flags (decl, name, reloc);
2115 /* The .pru_irq_map section is not meant to be loaded into the target
2116 memory. Instead its contents are read by the host remoteproc loader.
2117 To prevent being marked as a loadable (allocated) section, the
2118 .pru_irq_map section is intercepted and marked as a debug section. */
2119 if (!strcmp (name, ".pru_irq_map"))
2120 flags = SECTION_DEBUG | SECTION_RETAIN;
2122 return flags;
2125 /* Implement TARGET_ASM_FILE_START. */
2127 static void
2128 pru_file_start (void)
2130 default_file_start ();
2132 /* Compiler will take care of placing %label, so there is no
2133 need to confuse users with this warning. */
2134 fprintf (asm_out_file, "\t.set no_warn_regname_label\n");
2137 /* Scan type TYP for pointer references to address space other than
2138 ADDR_SPACE_GENERIC. Return true if such reference is found.
2139 Much of this code was taken from the avr port. */
2141 static bool
2142 pru_nongeneric_pointer_addrspace (tree typ)
2144 while (ARRAY_TYPE == TREE_CODE (typ))
2145 typ = TREE_TYPE (typ);
2147 if (POINTER_TYPE_P (typ))
2149 addr_space_t as;
2150 tree target = TREE_TYPE (typ);
2152 /* Pointer to function: Test the function's return type. */
2153 if (FUNCTION_TYPE == TREE_CODE (target))
2154 return pru_nongeneric_pointer_addrspace (TREE_TYPE (target));
2156 /* "Ordinary" pointers... */
2158 while (TREE_CODE (target) == ARRAY_TYPE)
2159 target = TREE_TYPE (target);
2161 as = TYPE_ADDR_SPACE (target);
2163 if (!ADDR_SPACE_GENERIC_P (as))
2164 return true;
2166 /* Scan pointer's target type. */
2167 return pru_nongeneric_pointer_addrspace (target);
2170 return false;
2173 /* Implement `TARGET_INSERT_ATTRIBUTES'. For PRU it's used as a hook to
2174 provide better diagnostics for some invalid usages of the __regio_symbol
2175 address space.
2177 Any escapes of the following checks are supposed to be caught
2178 during the "mov<mode>" pattern expansion. */
2180 static void
2181 pru_insert_attributes (tree node, tree *)
2184 /* Validate __regio_symbol variable declarations. */
2185 if (VAR_P (node))
2187 const char *name = DECL_NAME (node)
2188 ? IDENTIFIER_POINTER (DECL_NAME (node))
2189 : "<unknown>";
2190 tree typ = TREE_TYPE (node);
2191 addr_space_t as = TYPE_ADDR_SPACE (typ);
2193 if (as == ADDR_SPACE_GENERIC)
2194 return;
2196 if (AGGREGATE_TYPE_P (typ))
2198 error ("aggregate types are prohibited in "
2199 "%<__regio_symbol%> address space");
2200 /* Don't bother anymore. Below checks would pile
2201 meaningless errors, which would confuse user. */
2202 return;
2204 if (DECL_INITIAL (node) != NULL_TREE)
2205 error ("variables in %<__regio_symbol%> address space "
2206 "cannot have initial value");
2207 if (DECL_REGISTER (node))
2208 error ("variables in %<__regio_symbol%> address space "
2209 "cannot be declared %<register%>");
2210 if (!TYPE_VOLATILE (typ))
2211 error ("variables in %<__regio_symbol%> address space "
2212 "must be declared %<volatile%>");
2213 if (!DECL_EXTERNAL (node))
2214 error ("variables in %<__regio_symbol%> address space "
2215 "must be declared %<extern%>");
2216 if (TYPE_MODE (typ) != SImode)
2217 error ("only 32-bit access is supported "
2218 "for %<__regio_symbol%> address space");
2219 if (strcmp (name, "__R30") != 0 && strcmp (name, "__R31") != 0)
2220 error ("register name %<%s%> not recognized "
2221 "in %<__regio_symbol%> address space", name);
2224 tree typ = NULL_TREE;
2226 switch (TREE_CODE (node))
2228 case FUNCTION_DECL:
2229 typ = TREE_TYPE (TREE_TYPE (node));
2230 break;
2231 case TYPE_DECL:
2232 case RESULT_DECL:
2233 case VAR_DECL:
2234 case FIELD_DECL:
2235 case PARM_DECL:
2236 typ = TREE_TYPE (node);
2237 break;
2238 case POINTER_TYPE:
2239 typ = node;
2240 break;
2241 default:
2242 break;
2244 if (typ != NULL_TREE && pru_nongeneric_pointer_addrspace (typ))
2245 error ("pointers to %<__regio_symbol%> address space are prohibited");
2248 /* Function argument related. */
2250 /* Return the number of bytes needed for storing an argument with
2251 the given MODE and TYPE. */
2252 static int
2253 pru_function_arg_size (machine_mode mode, const_tree type)
2255 HOST_WIDE_INT param_size;
2257 if (mode == BLKmode)
2258 param_size = int_size_in_bytes (type);
2259 else
2260 param_size = GET_MODE_SIZE (mode);
2262 /* Convert to words (round up). */
2263 param_size = (UNITS_PER_WORD - 1 + param_size) / UNITS_PER_WORD;
2264 gcc_assert (param_size >= 0);
2266 return param_size;
2269 /* Check if argument with the given size must be
2270 passed/returned in a register.
2272 Reference:
2273 https://e2e.ti.com/support/development_tools/compiler/f/343/p/650176/2393029
2275 Arguments other than 8/16/24/32/64bits are passed on stack. */
2276 static bool
2277 pru_arg_in_reg_bysize (size_t sz)
2279 return sz == 1 || sz == 2 || sz == 3 || sz == 4 || sz == 8;
2282 /* Helper function to get the starting storage HW register for an argument,
2283 or -1 if it must be passed on stack. The cum_v state is not changed. */
2284 static int
2285 pru_function_arg_regi (cumulative_args_t cum_v,
2286 machine_mode mode, const_tree type,
2287 bool named)
2289 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2290 size_t argsize = pru_function_arg_size (mode, type);
2291 size_t i, bi;
2292 int regi = -1;
2294 if (!pru_arg_in_reg_bysize (argsize))
2295 return -1;
2297 if (!named)
2298 return -1;
2300 /* Find the first available slot that fits. Yes, that's the PRU ABI. */
2301 for (i = 0; regi < 0 && i < ARRAY_SIZE (cum->regs_used); i++)
2303 /* VLAs and vector types are not defined in the PRU ABI. Let's
2304 handle them the same as their same-sized counterparts. This way
2305 we do not need to treat BLKmode differently, and need only to check
2306 the size. */
2307 gcc_assert (argsize == 1 || argsize == 2 || argsize == 3
2308 || argsize == 4 || argsize == 8);
2310 /* Ensure SI and DI arguments are stored in full registers only. */
2311 if ((argsize >= 4) && (i % 4) != 0)
2312 continue;
2314 /* Structures with size 24 bits are passed starting at a full
2315 register boundary. */
2316 if (argsize == 3 && (i % 4) != 0)
2317 continue;
2319 /* rX.w0/w1/w2 are OK. But avoid spreading the second byte
2320 into a different full register. */
2321 if (argsize == 2 && (i % 4) == 3)
2322 continue;
2324 for (bi = 0;
2325 bi < argsize && (bi + i) < ARRAY_SIZE (cum->regs_used);
2326 bi++)
2328 if (cum->regs_used[bi + i])
2329 break;
2331 if (bi == argsize)
2332 regi = FIRST_ARG_REGNUM + i;
2335 return regi;
2338 /* Mark CUM_V that a function argument will occupy HW register slot starting
2339 at REGI. The number of consecutive 8-bit HW registers marked as occupied
2340 depends on the MODE and TYPE of the argument. */
2341 static void
2342 pru_function_arg_regi_mark_slot (int regi,
2343 cumulative_args_t cum_v,
2344 machine_mode mode, const_tree type,
2345 bool named)
2347 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2348 HOST_WIDE_INT param_size = pru_function_arg_size (mode, type);
2350 gcc_assert (named);
2352 /* Mark all byte sub-registers occupied by argument as used. */
2353 while (param_size--)
2355 gcc_assert (regi >= FIRST_ARG_REGNUM && regi <= LAST_ARG_REGNUM);
2356 gcc_assert (!cum->regs_used[regi - FIRST_ARG_REGNUM]);
2357 cum->regs_used[regi - FIRST_ARG_REGNUM] = true;
2358 regi++;
2362 /* Define where to put the arguments to a function. Value is zero to
2363 push the argument on the stack, or a hard register in which to
2364 store the argument.
2366 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2367 the preceding args and about the function being called.
2368 ARG is a description of the argument. */
2370 static rtx
2371 pru_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
2373 rtx return_rtx = NULL_RTX;
2374 int regi = pru_function_arg_regi (cum_v, arg.mode, arg.type, arg.named);
2376 if (regi >= 0)
2377 return_rtx = gen_rtx_REG (arg.mode, regi);
2379 return return_rtx;
2382 /* Implement TARGET_ARG_PARTIAL_BYTES. PRU never splits any arguments
2383 between registers and memory, so we can return 0. */
2385 static int
2386 pru_arg_partial_bytes (cumulative_args_t, const function_arg_info &)
2388 return 0;
2391 /* Update the data in CUM to advance over argument ARG. */
2393 static void
2394 pru_function_arg_advance (cumulative_args_t cum_v,
2395 const function_arg_info &arg)
2397 int regi = pru_function_arg_regi (cum_v, arg.mode, arg.type, arg.named);
2399 if (regi >= 0)
2400 pru_function_arg_regi_mark_slot (regi, cum_v, arg.mode,
2401 arg.type, arg.named);
2404 /* Implement TARGET_FUNCTION_VALUE. */
2405 static rtx
2406 pru_function_value (const_tree ret_type, const_tree, bool)
2408 return gen_rtx_REG (TYPE_MODE (ret_type), FIRST_RETVAL_REGNUM);
2411 /* Implement TARGET_LIBCALL_VALUE. */
2412 static rtx
2413 pru_libcall_value (machine_mode mode, const_rtx)
2415 return gen_rtx_REG (mode, FIRST_RETVAL_REGNUM);
2418 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
2419 static bool
2420 pru_function_value_regno_p (const unsigned int regno)
2422 return regno == FIRST_RETVAL_REGNUM;
2425 /* Implement TARGET_RETURN_IN_MEMORY. */
2426 bool
2427 pru_return_in_memory (const_tree type, const_tree)
2429 bool in_memory = (!pru_arg_in_reg_bysize (int_size_in_bytes (type))
2430 || int_size_in_bytes (type) == -1);
2432 return in_memory;
2435 /* Implement TARGET_CAN_USE_DOLOOP_P. */
2437 static bool
2438 pru_can_use_doloop_p (const widest_int &, const widest_int &iterations_max,
2439 unsigned int loop_depth, bool)
2441 /* Considering limitations in the hardware, only use doloop
2442 for innermost loops which must be entered from the top. */
2443 if (loop_depth > 1)
2444 return false;
2445 /* PRU internal loop counter is 16bits wide. Remember that iterations_max
2446 holds the maximum number of loop latch executions, while PRU loop
2447 instruction needs the count of loop body executions. */
2448 if (iterations_max == 0 || wi::geu_p (iterations_max, 0xffff))
2449 return false;
2451 return true;
2454 /* NULL if INSN insn is valid within a low-overhead loop.
2455 Otherwise return why doloop cannot be applied. */
2457 static const char *
2458 pru_invalid_within_doloop (const rtx_insn *insn)
2460 if (CALL_P (insn))
2461 return "Function call in the loop.";
2463 if (JUMP_P (insn) && INSN_CODE (insn) == CODE_FOR_return)
2464 return "Return from a call instruction in the loop.";
2466 if (NONDEBUG_INSN_P (insn)
2467 && INSN_CODE (insn) < 0
2468 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
2469 || asm_noperands (PATTERN (insn)) >= 0))
2470 return "Loop contains asm statement.";
2472 return NULL;
2476 /* Figure out where to put LABEL, which is the label for a repeat loop.
2477 The loop ends just before LAST_INSN. If SHARED, insns other than the
2478 "repeat" might use LABEL to jump to the loop's continuation point.
2480 Return the last instruction in the adjusted loop. */
2482 static rtx_insn *
2483 pru_insert_loop_label_last (rtx_insn *last_insn, rtx_code_label *label,
2484 bool shared)
2486 rtx_insn *next, *prev;
2487 int count = 0, code, icode;
2489 if (dump_file)
2490 fprintf (dump_file, "considering end of repeat loop at insn %d\n",
2491 INSN_UID (last_insn));
2493 /* Set PREV to the last insn in the loop. */
2494 prev = PREV_INSN (last_insn);
2496 /* Set NEXT to the next insn after the loop label. */
2497 next = last_insn;
2498 if (!shared)
2499 while (prev != 0)
2501 code = GET_CODE (prev);
2502 if (code == CALL_INSN || code == CODE_LABEL || code == BARRIER)
2503 break;
2505 if (INSN_P (prev))
2507 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
2508 prev = as_a <rtx_insn *> (XVECEXP (PATTERN (prev), 0, 1));
2510 /* Other insns that should not be in the last two opcodes. */
2511 icode = recog_memoized (prev);
2512 if (icode < 0
2513 || icode == CODE_FOR_pruloophi
2514 || icode == CODE_FOR_pruloopsi)
2515 break;
2517 count++;
2518 next = prev;
2519 if (dump_file)
2520 print_rtl_single (dump_file, next);
2521 if (count == 2)
2522 break;
2524 prev = PREV_INSN (prev);
2527 /* Insert the nops. */
2528 if (dump_file && count < 2)
2529 fprintf (dump_file, "Adding %d nop%s inside loop\n\n",
2530 2 - count, count == 1 ? "" : "s");
2532 for (; count < 2; count++)
2533 emit_insn_before (gen_nop (), last_insn);
2535 /* Insert the label. */
2536 emit_label_before (label, last_insn);
2538 return last_insn;
2541 /* If IS_END is false, expand a canonical doloop_begin RTL into the
2542 PRU-specific doloop_begin_internal. Otherwise expand doloop_end to
2543 doloop_end_internal. */
2544 void
2545 pru_emit_doloop (rtx *operands, int is_end)
2547 rtx tag;
2549 if (cfun->machine->doloop_tags == 0
2550 || cfun->machine->doloop_tag_from_end == is_end)
2552 cfun->machine->doloop_tags++;
2553 cfun->machine->doloop_tag_from_end = is_end;
2556 tag = GEN_INT (cfun->machine->doloop_tags - 1);
2557 machine_mode opmode = GET_MODE (operands[0]);
2558 gcc_assert (opmode == HImode || opmode == SImode);
2560 if (is_end)
2561 emit_jump_insn (gen_doloop_end_internal (opmode, operands[0],
2562 operands[1], tag));
2563 else
2564 emit_insn (gen_doloop_begin_internal (opmode, operands[0],
2565 operands[0], tag));
2569 /* Code for converting doloop_begins and doloop_ends into valid
2570 PRU instructions. Idea and code snippets borrowed from mep port.
2572 A doloop_begin is just a placeholder:
2574 $count = unspec ($count)
2576 where $count is initially the number of iterations.
2577 doloop_end has the form:
2579 if (--$count == 0) goto label
2581 The counter variable is private to the doloop insns, nothing else
2582 relies on its value.
2584 There are three cases, in decreasing order of preference:
2586 1. A loop has exactly one doloop_begin and one doloop_end.
2587 The doloop_end branches to the first instruction after
2588 the doloop_begin.
2590 In this case we can replace the doloop_begin with a LOOP
2591 instruction and remove the doloop_end. I.e.:
2593 $count1 = unspec ($count1)
2594 label:
2596 if (--$count2 != 0) goto label
2598 becomes:
2600 LOOP end_label,$count1
2601 label:
2603 end_label:
2604 # end loop
2606 2. As for (1), except there are several doloop_ends. One of them
2607 (call it X) falls through to a label L. All the others fall
2608 through to branches to L.
2610 In this case, we remove X and replace the other doloop_ends
2611 with branches to the LOOP label. For example:
2613 $count1 = unspec ($count1)
2614 label:
2616 if (--$count1 != 0) goto label
2617 end_label:
2619 if (--$count2 != 0) goto label
2620 goto end_label
2622 becomes:
2624 LOOP end_label,$count1
2625 label:
2627 end_label:
2628 # end repeat
2630 goto end_label
2632 3. The fallback case. Replace doloop_begins with:
2634 $count = $count
2636 Replace doloop_ends with the equivalent of:
2638 $count = $count - 1
2639 if ($count != 0) goto loop_label
2643 /* A structure describing one doloop_begin. */
2644 struct pru_doloop_begin {
2645 /* The next doloop_begin with the same tag. */
2646 struct pru_doloop_begin *next;
2648 /* The instruction itself. */
2649 rtx_insn *insn;
2651 /* The initial counter value. */
2652 rtx loop_count;
2654 /* The counter register. */
2655 rtx counter;
2658 /* A structure describing a doloop_end. */
2659 struct pru_doloop_end {
2660 /* The next doloop_end with the same loop tag. */
2661 struct pru_doloop_end *next;
2663 /* The instruction itself. */
2664 rtx_insn *insn;
2666 /* The first instruction after INSN when the branch isn't taken. */
2667 rtx_insn *fallthrough;
2669 /* The location of the counter value. Since doloop_end_internal is a
2670 jump instruction, it has to allow the counter to be stored anywhere
2671 (any non-fixed register). */
2672 rtx counter;
2674 /* The target label (the place where the insn branches when the counter
2675 isn't zero). */
2676 rtx label;
2678 /* A scratch register. Only available when COUNTER isn't stored
2679 in a general register. */
2680 rtx scratch;
2684 /* One do-while loop. */
2685 struct pru_doloop {
2686 /* All the doloop_begins for this loop (in no particular order). */
2687 struct pru_doloop_begin *begin;
2689 /* All the doloop_ends. When there is more than one, arrange things
2690 so that the first one is the most likely to be X in case (2) above. */
2691 struct pru_doloop_end *end;
2695 /* Return true if LOOP can be converted into LOOP form
2696 (that is, if it matches cases (1) or (2) above). */
2698 static bool
2699 pru_repeat_loop_p (struct pru_doloop *loop)
2701 struct pru_doloop_end *end;
2702 rtx_insn *fallthrough;
2704 /* There must be exactly one doloop_begin and at least one doloop_end. */
2705 if (loop->begin == 0 || loop->end == 0 || loop->begin->next != 0)
2706 return false;
2708 /* The first doloop_end (X) must branch back to the insn after
2709 the doloop_begin. */
2710 if (prev_real_insn (as_a<rtx_insn *> (loop->end->label)) != loop->begin->insn)
2711 return false;
2713 /* Check that the first doloop_end (X) can actually reach
2714 doloop_begin () with U8_PCREL relocation for LOOP instruction. */
2715 if (get_attr_length (loop->end->insn) != 4)
2716 return false;
2718 /* All the other doloop_ends must branch to the same place as X.
2719 When the branch isn't taken, they must jump to the instruction
2720 after X. */
2721 fallthrough = loop->end->fallthrough;
2722 for (end = loop->end->next; end != 0; end = end->next)
2723 if (end->label != loop->end->label
2724 || !simplejump_p (end->fallthrough)
2725 || fallthrough
2726 != next_real_insn (JUMP_LABEL_AS_INSN (end->fallthrough)))
2727 return false;
2729 return true;
2733 /* The main repeat reorg function. See comment above for details. */
2735 static void
2736 pru_reorg_loop (rtx_insn *insns)
2738 rtx_insn *insn;
2739 struct pru_doloop *loops, *loop;
2740 struct pru_doloop_begin *begin;
2741 struct pru_doloop_end *end;
2742 size_t tmpsz;
2744 /* Quick exit if we haven't created any loops. */
2745 if (cfun->machine->doloop_tags == 0)
2746 return;
2748 /* Create an array of pru_doloop structures. */
2749 tmpsz = sizeof (loops[0]) * cfun->machine->doloop_tags;
2750 loops = (struct pru_doloop *) alloca (tmpsz);
2751 memset (loops, 0, sizeof (loops[0]) * cfun->machine->doloop_tags);
2753 /* Search the function for do-while insns and group them by loop tag. */
2754 for (insn = insns; insn; insn = NEXT_INSN (insn))
2755 if (INSN_P (insn))
2756 switch (recog_memoized (insn))
2758 case CODE_FOR_doloop_begin_internalhi:
2759 case CODE_FOR_doloop_begin_internalsi:
2760 insn_extract (insn);
2761 loop = &loops[INTVAL (recog_data.operand[2])];
2763 tmpsz = sizeof (struct pru_doloop_begin);
2764 begin = (struct pru_doloop_begin *) alloca (tmpsz);
2765 begin->next = loop->begin;
2766 begin->insn = insn;
2767 begin->loop_count = recog_data.operand[1];
2768 begin->counter = recog_data.operand[0];
2770 loop->begin = begin;
2771 break;
2773 case CODE_FOR_doloop_end_internalhi:
2774 case CODE_FOR_doloop_end_internalsi:
2775 insn_extract (insn);
2776 loop = &loops[INTVAL (recog_data.operand[2])];
2778 tmpsz = sizeof (struct pru_doloop_end);
2779 end = (struct pru_doloop_end *) alloca (tmpsz);
2780 end->insn = insn;
2781 end->fallthrough = next_real_insn (insn);
2782 end->counter = recog_data.operand[0];
2783 end->label = recog_data.operand[1];
2784 end->scratch = recog_data.operand[3];
2786 /* If this insn falls through to an unconditional jump,
2787 give it a lower priority than the others. */
2788 if (loop->end != 0 && simplejump_p (end->fallthrough))
2790 end->next = loop->end->next;
2791 loop->end->next = end;
2793 else
2795 end->next = loop->end;
2796 loop->end = end;
2798 break;
2801 /* Convert the insns for each loop in turn. */
2802 for (loop = loops; loop < loops + cfun->machine->doloop_tags; loop++)
2803 if (pru_repeat_loop_p (loop))
2805 /* Case (1) or (2). */
2806 rtx_code_label *repeat_label;
2807 rtx label_ref;
2808 rtx loop_rtx;
2810 /* Create a new label for the repeat insn. */
2811 repeat_label = gen_label_rtx ();
2813 /* Replace the doloop_begin with a repeat. We get rid
2814 of the iteration register because LOOP instruction
2815 will utilize an internal for the PRU core LOOP register. */
2816 label_ref = gen_rtx_LABEL_REF (VOIDmode, repeat_label);
2817 machine_mode loop_mode = GET_MODE (loop->begin->loop_count);
2818 if (loop_mode == VOIDmode)
2820 gcc_assert (CONST_INT_P (loop->begin->loop_count));
2821 gcc_assert (UBYTE_INT ( INTVAL (loop->begin->loop_count)));
2822 loop_mode = SImode;
2824 gcc_assert (loop_mode == HImode || loop_mode == SImode);
2825 loop_rtx = gen_pruloop (loop_mode, loop->begin->loop_count, label_ref);
2826 emit_insn_before (loop_rtx, loop->begin->insn);
2828 delete_insn (loop->begin->insn);
2830 /* Insert the repeat label before the first doloop_end.
2831 Fill the gap with nops if LOOP insn is less than 2
2832 instructions away than loop->end. */
2833 pru_insert_loop_label_last (loop->end->insn, repeat_label,
2834 loop->end->next != 0);
2836 /* Emit a pruloop_end (to improve the readability of the output). */
2837 emit_insn_before (gen_pruloop_end (), loop->end->insn);
2839 /* HACK: TODO: This is usually not needed, but is required for
2840 a few rare cases where a JUMP that breaks the loop
2841 references the LOOP_END address. In other words, since
2842 we're missing a real "loop_end" instruction, a loop "break"
2843 may accidentally reference the loop end itself, and thus
2844 continuing the cycle. */
2845 for (insn = NEXT_INSN (loop->end->insn);
2846 insn != next_real_insn (loop->end->insn);
2847 insn = NEXT_INSN (insn))
2849 if (LABEL_P (insn) && LABEL_NUSES (insn) > 0)
2850 emit_insn_before (gen_nop_loop_guard (), loop->end->insn);
2853 /* Delete the first doloop_end. */
2854 delete_insn (loop->end->insn);
2856 /* Replace the others with branches to REPEAT_LABEL. */
2857 for (end = loop->end->next; end != 0; end = end->next)
2859 rtx_insn *newjmp;
2860 newjmp = emit_jump_insn_before (gen_jump (repeat_label), end->insn);
2861 JUMP_LABEL (newjmp) = repeat_label;
2862 delete_insn (end->insn);
2863 delete_insn (end->fallthrough);
2866 else
2868 /* Case (3). First replace all the doloop_begins with setting
2869 the HW register used for loop counter. */
2870 for (begin = loop->begin; begin != 0; begin = begin->next)
2872 insn = gen_move_insn (copy_rtx (begin->counter),
2873 copy_rtx (begin->loop_count));
2874 emit_insn_before (insn, begin->insn);
2875 delete_insn (begin->insn);
2878 /* Replace all the doloop_ends with decrement-and-branch sequences. */
2879 for (end = loop->end; end != 0; end = end->next)
2881 rtx reg;
2883 start_sequence ();
2885 /* Load the counter value into a general register. */
2886 reg = end->counter;
2887 if (!REG_P (reg) || REGNO (reg) > LAST_NONIO_GP_REGNUM)
2889 reg = end->scratch;
2890 emit_move_insn (copy_rtx (reg), copy_rtx (end->counter));
2893 /* Decrement the counter. */
2894 emit_insn (gen_add3_insn (copy_rtx (reg), copy_rtx (reg),
2895 constm1_rtx));
2897 /* Copy it back to its original location. */
2898 if (reg != end->counter)
2899 emit_move_insn (copy_rtx (end->counter), copy_rtx (reg));
2901 /* Jump back to the start label. */
2902 insn = emit_jump_insn (gen_cbranchsi4 (gen_rtx_NE (VOIDmode, reg,
2903 const0_rtx),
2904 reg,
2905 const0_rtx,
2906 end->label));
2908 JUMP_LABEL (insn) = end->label;
2909 LABEL_NUSES (end->label)++;
2911 /* Emit the whole sequence before the doloop_end. */
2912 insn = get_insns ();
2913 end_sequence ();
2914 emit_insn_before (insn, end->insn);
2916 /* Delete the doloop_end. */
2917 delete_insn (end->insn);
2922 /* Implement TARGET_MACHINE_DEPENDENT_REORG. */
2923 static void
2924 pru_reorg (void)
2926 rtx_insn *insns = get_insns ();
2928 compute_bb_for_insn ();
2929 df_analyze ();
2931 /* Need correct insn lengths for allowing LOOP instruction
2932 emitting due to U8_PCREL limitations. */
2933 shorten_branches (get_insns ());
2935 /* The generic reorg_loops () is not suitable for PRU because
2936 it doesn't handle doloop_begin/end tying. And we need our
2937 doloop_begin emitted before reload. It is difficult to coalesce
2938 UBYTE constant initial loop values into the LOOP insn during
2939 machine reorg phase. */
2940 pru_reorg_loop (insns);
2942 df_finish_pass (false);
2945 /* Enumerate all PRU-specific builtins. */
2946 enum pru_builtin
2948 PRU_BUILTIN_DELAY_CYCLES,
2949 PRU_BUILTIN_HALT,
2950 PRU_BUILTIN_LMBD,
2951 PRU_BUILTIN_max
2954 static GTY(()) tree pru_builtins [(int) PRU_BUILTIN_max];
2956 /* Implement TARGET_INIT_BUILTINS. */
2958 static void
2959 pru_init_builtins (void)
2961 tree void_ftype_longlong
2962 = build_function_type_list (void_type_node,
2963 long_long_integer_type_node,
2964 NULL);
2965 tree uint_ftype_uint_uint
2966 = build_function_type_list (unsigned_type_node,
2967 unsigned_type_node,
2968 unsigned_type_node,
2969 NULL);
2971 tree void_ftype_void
2972 = build_function_type_list (void_type_node,
2973 void_type_node,
2974 NULL);
2976 pru_builtins[PRU_BUILTIN_DELAY_CYCLES]
2977 = add_builtin_function ("__delay_cycles", void_ftype_longlong,
2978 PRU_BUILTIN_DELAY_CYCLES, BUILT_IN_MD, NULL,
2979 NULL_TREE);
2981 pru_builtins[PRU_BUILTIN_HALT]
2982 = add_builtin_function ("__halt", void_ftype_void,
2983 PRU_BUILTIN_HALT, BUILT_IN_MD, NULL,
2984 NULL_TREE);
2986 pru_builtins[PRU_BUILTIN_LMBD]
2987 = add_builtin_function ("__lmbd", uint_ftype_uint_uint,
2988 PRU_BUILTIN_LMBD, BUILT_IN_MD, NULL,
2989 NULL_TREE);
2992 /* Implement TARGET_BUILTIN_DECL. */
2994 static tree
2995 pru_builtin_decl (unsigned code, bool)
2997 switch (code)
2999 case PRU_BUILTIN_DELAY_CYCLES:
3000 case PRU_BUILTIN_HALT:
3001 case PRU_BUILTIN_LMBD:
3002 return pru_builtins[code];
3003 default:
3004 return error_mark_node;
3008 /* Emit a sequence of one or more delay_cycles_X insns, in order to generate
3009 code that delays exactly ARG cycles. */
3011 static rtx
3012 pru_expand_delay_cycles (rtx arg)
3014 HOST_WIDE_INT c, n;
3016 if (GET_CODE (arg) != CONST_INT)
3018 error ("%<__delay_cycles%> only takes constant arguments");
3019 return NULL_RTX;
3022 c = INTVAL (arg);
3024 gcc_assert (HOST_BITS_PER_WIDE_INT > 32);
3025 if (c < 0)
3027 error ("%<__delay_cycles%> only takes non-negative cycle counts");
3028 return NULL_RTX;
3031 emit_insn (gen_delay_cycles_start (arg));
3033 /* For 32-bit loops, there's 2 + 2x cycles. */
3034 if (c > 2 * 0xffff + 1)
3036 n = (c - 2) / 2;
3037 c -= (n * 2) + 2;
3038 if ((unsigned long long) n > 0xffffffffULL)
3040 error ("%<__delay_cycles%> is limited to 32-bit loop counts");
3041 return NULL_RTX;
3043 emit_insn (gen_delay_cycles_2x_plus2_si (GEN_INT (n)));
3046 /* For 16-bit loops, there's 1 + 2x cycles. */
3047 if (c > 2)
3049 n = (c - 1) / 2;
3050 c -= (n * 2) + 1;
3052 emit_insn (gen_delay_cycles_2x_plus1_hi (GEN_INT (n)));
3055 while (c > 0)
3057 emit_insn (gen_delay_cycles_1 ());
3058 c -= 1;
3061 emit_insn (gen_delay_cycles_end (arg));
3063 return NULL_RTX;
3067 /* Implement TARGET_EXPAND_BUILTIN. Expand an expression EXP that calls
3068 a built-in function, with result going to TARGET if that's convenient
3069 (and in mode MODE if that's convenient).
3070 SUBTARGET may be used as the target for computing one of EXP's operands.
3071 IGNORE is nonzero if the value is to be ignored. */
3073 static rtx
3074 pru_expand_builtin (tree exp, rtx target, rtx, machine_mode mode, int)
3076 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3077 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
3079 switch (fcode)
3081 case PRU_BUILTIN_DELAY_CYCLES:
3083 rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
3084 return pru_expand_delay_cycles (arg1);
3086 break;
3087 case PRU_BUILTIN_HALT:
3089 emit_insn (gen_pru_halt ());
3090 return NULL_RTX;
3092 break;
3093 case PRU_BUILTIN_LMBD:
3095 rtx arg1 = expand_normal (CALL_EXPR_ARG (exp, 0));
3096 rtx arg2 = expand_normal (CALL_EXPR_ARG (exp, 1));
3098 if (target == NULL_RTX || GET_MODE (target) != mode)
3100 target = gen_reg_rtx (mode);
3103 emit_insn (gen_pru_lmbd (mode, target, arg1, arg2));
3104 return target;
3106 break;
3107 default:
3108 internal_error ("bad builtin code");
3111 return NULL_RTX;
3114 /* Remember the last target of pru_set_current_function. */
3115 static GTY(()) tree pru_previous_fndecl;
3117 /* Establish appropriate back-end context for processing the function
3118 FNDECL. The argument might be NULL to indicate processing at top
3119 level, outside of any function scope. */
3120 static void
3121 pru_set_current_function (tree fndecl)
3123 tree old_tree = (pru_previous_fndecl
3124 ? DECL_FUNCTION_SPECIFIC_TARGET (pru_previous_fndecl)
3125 : NULL_TREE);
3127 tree new_tree = (fndecl
3128 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3129 : NULL_TREE);
3131 if (fndecl && fndecl != pru_previous_fndecl)
3133 pru_previous_fndecl = fndecl;
3134 if (old_tree == new_tree)
3137 else if (new_tree)
3139 cl_target_option_restore (&global_options, &global_options_set,
3140 TREE_TARGET_OPTION (new_tree));
3141 target_reinit ();
3144 else if (old_tree)
3146 struct cl_target_option *def
3147 = TREE_TARGET_OPTION (target_option_current_node);
3149 cl_target_option_restore (&global_options, &global_options_set, def);
3150 target_reinit ();
3155 /* Implement TARGET_UNWIND_WORD_MODE.
3157 Since PRU is really a 32-bit CPU, the default word_mode is not suitable. */
3158 static scalar_int_mode
3159 pru_unwind_word_mode (void)
3161 return SImode;
3165 /* Initialize the GCC target structure. */
3166 #undef TARGET_ASM_FUNCTION_PROLOGUE
3167 #define TARGET_ASM_FUNCTION_PROLOGUE pru_asm_function_prologue
3168 #undef TARGET_ASM_INTEGER
3169 #define TARGET_ASM_INTEGER pru_assemble_integer
3170 #undef TARGET_SECTION_TYPE_FLAGS
3171 #define TARGET_SECTION_TYPE_FLAGS pru_section_type_flags
3173 #undef TARGET_ASM_FILE_START
3174 #define TARGET_ASM_FILE_START pru_file_start
3176 #undef TARGET_INSERT_ATTRIBUTES
3177 #define TARGET_INSERT_ATTRIBUTES pru_insert_attributes
3179 #undef TARGET_INIT_BUILTINS
3180 #define TARGET_INIT_BUILTINS pru_init_builtins
3181 #undef TARGET_EXPAND_BUILTIN
3182 #define TARGET_EXPAND_BUILTIN pru_expand_builtin
3183 #undef TARGET_BUILTIN_DECL
3184 #define TARGET_BUILTIN_DECL pru_builtin_decl
3186 #undef TARGET_COMPUTE_FRAME_LAYOUT
3187 #define TARGET_COMPUTE_FRAME_LAYOUT pru_compute_frame_layout
3189 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
3190 #define TARGET_FUNCTION_OK_FOR_SIBCALL hook_bool_tree_tree_true
3192 #undef TARGET_CAN_ELIMINATE
3193 #define TARGET_CAN_ELIMINATE pru_can_eliminate
3195 #undef TARGET_CLASS_LIKELY_SPILLED_P
3196 #define TARGET_CLASS_LIKELY_SPILLED_P pru_class_likely_spilled_p
3198 #undef TARGET_HARD_REGNO_MODE_OK
3199 #define TARGET_HARD_REGNO_MODE_OK pru_hard_regno_mode_ok
3201 #undef TARGET_HARD_REGNO_SCRATCH_OK
3202 #define TARGET_HARD_REGNO_SCRATCH_OK pru_hard_regno_scratch_ok
3204 #undef TARGET_FUNCTION_ARG
3205 #define TARGET_FUNCTION_ARG pru_function_arg
3207 #undef TARGET_FUNCTION_ARG_ADVANCE
3208 #define TARGET_FUNCTION_ARG_ADVANCE pru_function_arg_advance
3210 #undef TARGET_ARG_PARTIAL_BYTES
3211 #define TARGET_ARG_PARTIAL_BYTES pru_arg_partial_bytes
3213 #undef TARGET_FUNCTION_VALUE
3214 #define TARGET_FUNCTION_VALUE pru_function_value
3216 #undef TARGET_LIBCALL_VALUE
3217 #define TARGET_LIBCALL_VALUE pru_libcall_value
3219 #undef TARGET_FUNCTION_VALUE_REGNO_P
3220 #define TARGET_FUNCTION_VALUE_REGNO_P pru_function_value_regno_p
3222 #undef TARGET_RETURN_IN_MEMORY
3223 #define TARGET_RETURN_IN_MEMORY pru_return_in_memory
3225 #undef TARGET_MUST_PASS_IN_STACK
3226 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
3228 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
3229 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
3230 pru_addr_space_legitimate_address_p
3232 #undef TARGET_INIT_LIBFUNCS
3233 #define TARGET_INIT_LIBFUNCS pru_init_libfuncs
3234 #undef TARGET_LIBFUNC_GNU_PREFIX
3235 #define TARGET_LIBFUNC_GNU_PREFIX true
3237 #undef TARGET_RTX_COSTS
3238 #define TARGET_RTX_COSTS pru_rtx_costs
3240 #undef TARGET_ADDRESS_COST
3241 #define TARGET_ADDRESS_COST pru_address_cost
3243 #undef TARGET_INSN_COST
3244 #define TARGET_INSN_COST pru_insn_cost
3246 #undef TARGET_PRINT_OPERAND
3247 #define TARGET_PRINT_OPERAND pru_print_operand
3249 #undef TARGET_PRINT_OPERAND_ADDRESS
3250 #define TARGET_PRINT_OPERAND_ADDRESS pru_print_operand_address
3252 #undef TARGET_MIN_ANCHOR_OFFSET
3253 #define TARGET_MIN_ANCHOR_OFFSET 0
3255 #undef TARGET_MAX_ANCHOR_OFFSET
3256 #define TARGET_MAX_ANCHOR_OFFSET 255
3258 #undef TARGET_OPTION_OVERRIDE
3259 #define TARGET_OPTION_OVERRIDE pru_option_override
3261 #undef TARGET_SET_CURRENT_FUNCTION
3262 #define TARGET_SET_CURRENT_FUNCTION pru_set_current_function
3264 #undef TARGET_MACHINE_DEPENDENT_REORG
3265 #define TARGET_MACHINE_DEPENDENT_REORG pru_reorg
3267 #undef TARGET_CAN_USE_DOLOOP_P
3268 #define TARGET_CAN_USE_DOLOOP_P pru_can_use_doloop_p
3270 #undef TARGET_INVALID_WITHIN_DOLOOP
3271 #define TARGET_INVALID_WITHIN_DOLOOP pru_invalid_within_doloop
3273 #undef TARGET_UNWIND_WORD_MODE
3274 #define TARGET_UNWIND_WORD_MODE pru_unwind_word_mode
3276 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
3277 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
3279 struct gcc_target targetm = TARGET_INITIALIZER;
3281 #include "gt-pru.h"