libcpp, c, middle-end: Optimize initializers using #embed in C
[official-gcc.git] / gcc / config / riscv / riscv.cc
blobe111cb07284028646dd61302e35234e197ae10ed
1 /* Subroutines used for code generation for RISC-V.
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 Contributed by Andrew Waterman (andrew@sifive.com).
4 Based on MIPS target for GNU compiler.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #define IN_TARGET_CODE 1
24 #define INCLUDE_MEMORY
25 #define INCLUDE_STRING
26 #include "config.h"
27 #include "system.h"
28 #include "coretypes.h"
29 #include "target.h"
30 #include "backend.h"
31 #include "tm.h"
32 #include "rtl.h"
33 #include "regs.h"
34 #include "insn-config.h"
35 #include "insn-attr.h"
36 #include "recog.h"
37 #include "output.h"
38 #include "alias.h"
39 #include "tree.h"
40 #include "stringpool.h"
41 #include "attribs.h"
42 #include "varasm.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "function.h"
46 #include "explow.h"
47 #include "ifcvt.h"
48 #include "memmodel.h"
49 #include "emit-rtl.h"
50 #include "reload.h"
51 #include "tm_p.h"
52 #include "basic-block.h"
53 #include "expr.h"
54 #include "optabs.h"
55 #include "bitmap.h"
56 #include "df.h"
57 #include "function-abi.h"
58 #include "diagnostic.h"
59 #include "builtins.h"
60 #include "predict.h"
61 #include "tree-pass.h"
62 #include "opts.h"
63 #include "tm-constrs.h"
64 #include "rtl-iter.h"
65 #include "gimple.h"
66 #include "cfghooks.h"
67 #include "cfgloop.h"
68 #include "cfgrtl.h"
69 #include "shrink-wrap.h"
70 #include "sel-sched.h"
71 #include "sched-int.h"
72 #include "fold-const.h"
73 #include "gimple-iterator.h"
74 #include "gimple-expr.h"
75 #include "tree-vectorizer.h"
76 #include "gcse.h"
77 #include "tree-dfa.h"
78 #include "target-globals.h"
79 #include "riscv-v.h"
81 /* This file should be included last. */
82 #include "target-def.h"
83 #include "riscv-vector-costs.h"
84 #include "riscv-subset.h"
86 /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */
87 #define UNSPEC_ADDRESS_P(X) \
88 (GET_CODE (X) == UNSPEC \
89 && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST \
90 && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES)
92 /* Extract the symbol or label from UNSPEC wrapper X. */
93 #define UNSPEC_ADDRESS(X) \
94 XVECEXP (X, 0, 0)
96 /* Extract the symbol type from UNSPEC wrapper X. */
97 #define UNSPEC_ADDRESS_TYPE(X) \
98 ((enum riscv_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST))
100 /* Extract the backup dynamic frm rtl. */
101 #define DYNAMIC_FRM_RTL(c) ((c)->machine->mode_sw_info.dynamic_frm)
103 /* True the mode switching has static frm, or false. */
104 #define STATIC_FRM_P(c) ((c)->machine->mode_sw_info.static_frm_p)
106 /* True if we can use the instructions in the XTheadInt extension
107 to handle interrupts, or false. */
108 #define TH_INT_INTERRUPT(c) \
109 (TARGET_XTHEADINT \
110 /* The XTheadInt extension only supports rv32. */ \
111 && !TARGET_64BIT \
112 && (c)->machine->interrupt_handler_p \
113 /* The XTheadInt instructions can only be executed in M-mode. */ \
114 && (c)->machine->interrupt_mode == MACHINE_MODE)
116 /* Information about a function's frame layout. */
117 struct GTY(()) riscv_frame_info {
118 /* The size of the frame in bytes. */
119 poly_int64 total_size;
121 /* Bit X is set if the function saves or restores GPR X. */
122 unsigned int mask;
124 /* Likewise FPR X. */
125 unsigned int fmask;
127 /* Likewise for vector registers. */
128 unsigned int vmask;
130 /* How much the GPR save/restore routines adjust sp (or 0 if unused). */
131 unsigned save_libcall_adjustment;
133 /* the minimum number of bytes, in multiples of 16-byte address increments,
134 required to cover the registers in a multi push & pop. */
135 unsigned multi_push_adj_base;
137 /* the number of additional 16-byte address increments allocated for the stack
138 frame in a multi push & pop. */
139 unsigned multi_push_adj_addi;
141 /* Offsets of fixed-point and floating-point save areas from frame bottom */
142 poly_int64 gp_sp_offset;
143 poly_int64 fp_sp_offset;
145 /* Top and bottom offsets of vector save areas from frame bottom. */
146 poly_int64 v_sp_offset_top;
147 poly_int64 v_sp_offset_bottom;
149 /* Offset of virtual frame pointer from stack pointer/frame bottom */
150 poly_int64 frame_pointer_offset;
152 /* Offset of hard frame pointer from stack pointer/frame bottom */
153 poly_int64 hard_frame_pointer_offset;
155 /* The offset of arg_pointer_rtx from the bottom of the frame. */
156 poly_int64 arg_pointer_offset;
158 /* Reset this struct, clean all field to zero. */
159 void reset(void);
162 enum riscv_privilege_levels {
163 UNKNOWN_MODE, USER_MODE, SUPERVISOR_MODE, MACHINE_MODE
166 struct GTY(()) mode_switching_info {
167 /* The RTL variable which stores the dynamic FRM value. We always use this
168 RTX to restore dynamic FRM rounding mode in mode switching. */
169 rtx dynamic_frm;
171 /* The boolean variables indicates there is at least one static rounding
172 mode instruction in the function or not. */
173 bool static_frm_p;
175 mode_switching_info ()
177 dynamic_frm = NULL_RTX;
178 static_frm_p = false;
182 struct GTY(()) machine_function {
183 /* The number of extra stack bytes taken up by register varargs.
184 This area is allocated by the callee at the very top of the frame. */
185 int varargs_size;
187 /* True if current function is a naked function. */
188 bool naked_p;
190 /* True if current function is an interrupt function. */
191 bool interrupt_handler_p;
192 /* For an interrupt handler, indicates the privilege level. */
193 enum riscv_privilege_levels interrupt_mode;
195 /* True if attributes on current function have been checked. */
196 bool attributes_checked_p;
198 /* True if RA must be saved because of a far jump. */
199 bool far_jump_used;
201 /* The current frame information, calculated by riscv_compute_frame_info. */
202 struct riscv_frame_info frame;
204 /* The components already handled by separate shrink-wrapping, which should
205 not be considered by the prologue and epilogue. */
206 bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
208 /* The mode switching information for the FRM rounding modes. */
209 struct mode_switching_info mode_sw_info;
212 /* Information about a single argument. */
213 struct riscv_arg_info {
214 /* True if the argument is at least partially passed on the stack. */
215 bool stack_p;
217 /* The number of integer registers allocated to this argument. */
218 unsigned int num_gprs;
220 /* The offset of the first register used, provided num_gprs is nonzero.
221 If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS. */
222 unsigned int gpr_offset;
224 /* The number of floating-point registers allocated to this argument. */
225 unsigned int num_fprs;
227 /* The offset of the first register used, provided num_fprs is nonzero. */
228 unsigned int fpr_offset;
230 /* The number of vector registers allocated to this argument. */
231 unsigned int num_vrs;
233 /* The offset of the first register used, provided num_vrs is nonzero. */
234 unsigned int vr_offset;
236 /* The number of mask registers allocated to this argument. */
237 unsigned int num_mrs;
239 /* The offset of the first register used, provided num_mrs is nonzero. */
240 unsigned int mr_offset;
243 /* One stage in a constant building sequence. These sequences have
244 the form:
246 A = VALUE[0]
247 A = A CODE[1] VALUE[1]
248 A = A CODE[2] VALUE[2]
251 where A is an accumulator, each CODE[i] is a binary rtl operation
252 and each VALUE[i] is a constant integer. CODE[0] is undefined. */
253 struct riscv_integer_op {
254 bool use_uw;
255 bool save_temporary;
256 enum rtx_code code;
257 unsigned HOST_WIDE_INT value;
260 /* The largest number of operations needed to load an integer constant.
261 The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI. */
262 #define RISCV_MAX_INTEGER_OPS 8
264 enum riscv_fusion_pairs
266 RISCV_FUSE_NOTHING = 0,
267 RISCV_FUSE_ZEXTW = (1 << 0),
268 RISCV_FUSE_ZEXTH = (1 << 1),
269 RISCV_FUSE_ZEXTWS = (1 << 2),
270 RISCV_FUSE_LDINDEXED = (1 << 3),
271 RISCV_FUSE_LUI_ADDI = (1 << 4),
272 RISCV_FUSE_AUIPC_ADDI = (1 << 5),
273 RISCV_FUSE_LUI_LD = (1 << 6),
274 RISCV_FUSE_AUIPC_LD = (1 << 7),
275 RISCV_FUSE_LDPREINCREMENT = (1 << 8),
276 RISCV_FUSE_ALIGNED_STD = (1 << 9),
279 /* Costs of various operations on the different architectures. */
281 struct riscv_tune_param
283 unsigned short fp_add[2];
284 unsigned short fp_mul[2];
285 unsigned short fp_div[2];
286 unsigned short int_mul[2];
287 unsigned short int_div[2];
288 unsigned short issue_rate;
289 unsigned short branch_cost;
290 unsigned short memory_cost;
291 unsigned short fmv_cost;
292 bool slow_unaligned_access;
293 bool vector_unaligned_access;
294 bool use_divmod_expansion;
295 bool overlap_op_by_pieces;
296 unsigned int fusible_ops;
297 const struct cpu_vector_cost *vec_costs;
301 /* Global variables for machine-dependent things. */
303 /* Whether unaligned accesses execute very slowly. */
304 bool riscv_slow_unaligned_access_p;
306 /* Whether misaligned vector accesses are supported (i.e. do not
307 throw an exception). */
308 bool riscv_vector_unaligned_access_p;
310 /* Whether user explicitly passed -mstrict-align. */
311 bool riscv_user_wants_strict_align;
313 /* Stack alignment to assume/maintain. */
314 unsigned riscv_stack_boundary;
316 /* Whether in riscv_output_mi_thunk. */
317 static bool riscv_in_thunk_func = false;
319 /* If non-zero, this is an offset to be added to SP to redefine the CFA
320 when restoring the FP register from the stack. Only valid when generating
321 the epilogue. */
322 static poly_int64 epilogue_cfa_sp_offset;
324 /* Which tuning parameters to use. */
325 static const struct riscv_tune_param *tune_param;
327 /* Which automaton to use for tuning. */
328 enum riscv_microarchitecture_type riscv_microarchitecture;
330 /* The number of chunks in a single vector register. */
331 poly_uint16 riscv_vector_chunks;
333 /* The number of bytes in a vector chunk. */
334 unsigned riscv_bytes_per_vector_chunk;
336 /* Index R is the smallest register class that contains register R. */
337 const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
338 GR_REGS, GR_REGS, GR_REGS, GR_REGS,
339 GR_REGS, GR_REGS, SIBCALL_REGS, SIBCALL_REGS,
340 JALR_REGS, JALR_REGS, SIBCALL_REGS, SIBCALL_REGS,
341 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
342 SIBCALL_REGS, SIBCALL_REGS, JALR_REGS, JALR_REGS,
343 JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS,
344 JALR_REGS, JALR_REGS, JALR_REGS, JALR_REGS,
345 SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS,
346 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
347 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
348 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
349 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
350 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
351 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
352 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
353 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
354 FRAME_REGS, FRAME_REGS, NO_REGS, NO_REGS,
355 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
356 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
357 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
358 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
359 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
360 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
361 NO_REGS, NO_REGS, NO_REGS, NO_REGS,
362 VM_REGS, VD_REGS, VD_REGS, VD_REGS,
363 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
364 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
365 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
366 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
367 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
368 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
369 VD_REGS, VD_REGS, VD_REGS, VD_REGS,
372 /* RVV costs for VLS vector operations. */
373 static const common_vector_cost rvv_vls_vector_cost = {
374 1, /* int_stmt_cost */
375 1, /* fp_stmt_cost */
376 1, /* gather_load_cost */
377 1, /* scatter_store_cost */
378 1, /* segment_permute (2) */
379 1, /* segment_permute (3) */
380 1, /* segment_permute (4) */
381 1, /* segment_permute (5) */
382 1, /* segment_permute (6) */
383 1, /* segment_permute (7) */
384 1, /* segment_permute (8) */
385 1, /* vec_to_scalar_cost */
386 1, /* scalar_to_vec_cost */
387 1, /* permute_cost */
388 1, /* align_load_cost */
389 1, /* align_store_cost */
390 2, /* unalign_load_cost */
391 2, /* unalign_store_cost */
394 /* RVV costs for VLA vector operations. */
395 static const scalable_vector_cost rvv_vla_vector_cost = {
397 1, /* int_stmt_cost */
398 1, /* fp_stmt_cost */
399 1, /* gather_load_cost */
400 1, /* scatter_store_cost */
401 1, /* segment_permute (2) */
402 1, /* segment_permute (3) */
403 1, /* segment_permute (4) */
404 1, /* segment_permute (5) */
405 1, /* segment_permute (6) */
406 1, /* segment_permute (7) */
407 1, /* segment_permute (8) */
408 1, /* vec_to_scalar_cost */
409 1, /* scalar_to_vec_cost */
410 1, /* permute_cost */
411 1, /* align_load_cost */
412 1, /* align_store_cost */
413 2, /* unalign_load_cost */
414 2, /* unalign_store_cost */
418 /* RVV register move cost. */
419 static const regmove_vector_cost rvv_regmove_vector_cost = {
420 2, /* GR2VR */
421 2, /* FR2VR */
422 2, /* VR2GR */
423 2, /* VR2FR */
426 /* Generic costs for vector insn classes. It is supposed to be the vector cost
427 models used by default if no other cost model was specified. */
428 static const struct cpu_vector_cost generic_vector_cost = {
429 1, /* scalar_int_stmt_cost */
430 1, /* scalar_fp_stmt_cost */
431 1, /* scalar_load_cost */
432 1, /* scalar_store_cost */
433 3, /* cond_taken_branch_cost */
434 1, /* cond_not_taken_branch_cost */
435 &rvv_vls_vector_cost, /* vls */
436 &rvv_vla_vector_cost, /* vla */
437 &rvv_regmove_vector_cost, /* regmove */
440 /* Costs to use when optimizing for rocket. */
441 static const struct riscv_tune_param rocket_tune_info = {
442 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
443 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
444 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
445 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
446 {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */
447 1, /* issue_rate */
448 3, /* branch_cost */
449 5, /* memory_cost */
450 8, /* fmv_cost */
451 true, /* slow_unaligned_access */
452 false, /* vector_unaligned_access */
453 false, /* use_divmod_expansion */
454 false, /* overlap_op_by_pieces */
455 RISCV_FUSE_NOTHING, /* fusible_ops */
456 NULL, /* vector cost */
459 /* Costs to use when optimizing for Sifive 7 Series. */
460 static const struct riscv_tune_param sifive_7_tune_info = {
461 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
462 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
463 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
464 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
465 {COSTS_N_INSNS (33), COSTS_N_INSNS (65)}, /* int_div */
466 2, /* issue_rate */
467 4, /* branch_cost */
468 3, /* memory_cost */
469 8, /* fmv_cost */
470 true, /* slow_unaligned_access */
471 false, /* vector_unaligned_access */
472 false, /* use_divmod_expansion */
473 false, /* overlap_op_by_pieces */
474 RISCV_FUSE_NOTHING, /* fusible_ops */
475 NULL, /* vector cost */
478 /* Costs to use when optimizing for Sifive p400 Series. */
479 static const struct riscv_tune_param sifive_p400_tune_info = {
480 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */
481 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_mul */
482 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
483 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
484 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
485 3, /* issue_rate */
486 4, /* branch_cost */
487 3, /* memory_cost */
488 4, /* fmv_cost */
489 true, /* slow_unaligned_access */
490 false, /* vector_unaligned_access */
491 false, /* use_divmod_expansion */
492 false, /* overlap_op_by_pieces */
493 RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
494 &generic_vector_cost, /* vector cost */
497 /* Costs to use when optimizing for Sifive p600 Series. */
498 static const struct riscv_tune_param sifive_p600_tune_info = {
499 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */
500 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_mul */
501 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
502 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
503 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
504 4, /* issue_rate */
505 4, /* branch_cost */
506 3, /* memory_cost */
507 4, /* fmv_cost */
508 true, /* slow_unaligned_access */
509 false, /* vector_unaligned_access */
510 false, /* use_divmod_expansion */
511 false, /* overlap_op_by_pieces */
512 RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */
513 &generic_vector_cost, /* vector cost */
516 /* Costs to use when optimizing for T-HEAD c906. */
517 static const struct riscv_tune_param thead_c906_tune_info = {
518 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
519 {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
520 {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
521 {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
522 {COSTS_N_INSNS (18), COSTS_N_INSNS (34)}, /* int_div */
523 1, /* issue_rate */
524 3, /* branch_cost */
525 5, /* memory_cost */
526 8, /* fmv_cost */
527 false, /* slow_unaligned_access */
528 false, /* vector_unaligned_access */
529 false, /* use_divmod_expansion */
530 false, /* overlap_op_by_pieces */
531 RISCV_FUSE_NOTHING, /* fusible_ops */
532 NULL, /* vector cost */
535 /* Costs to use when optimizing for xiangshan nanhu. */
536 static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
537 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* fp_add */
538 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* fp_mul */
539 {COSTS_N_INSNS (10), COSTS_N_INSNS (20)}, /* fp_div */
540 {COSTS_N_INSNS (3), COSTS_N_INSNS (3)}, /* int_mul */
541 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
542 6, /* issue_rate */
543 3, /* branch_cost */
544 3, /* memory_cost */
545 3, /* fmv_cost */
546 true, /* slow_unaligned_access */
547 false, /* vector_unaligned_access */
548 false, /* use_divmod_expansion */
549 false, /* overlap_op_by_pieces */
550 RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */
551 NULL, /* vector cost */
554 /* Costs to use when optimizing for a generic ooo profile. */
555 static const struct riscv_tune_param generic_ooo_tune_info = {
556 {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* fp_add */
557 {COSTS_N_INSNS (5), COSTS_N_INSNS (6)}, /* fp_mul */
558 {COSTS_N_INSNS (7), COSTS_N_INSNS (8)}, /* fp_div */
559 {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */
560 {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */
561 1, /* issue_rate */
562 3, /* branch_cost */
563 4, /* memory_cost */
564 4, /* fmv_cost */
565 false, /* slow_unaligned_access */
566 true, /* vector_unaligned_access */
567 false, /* use_divmod_expansion */
568 true, /* overlap_op_by_pieces */
569 RISCV_FUSE_NOTHING, /* fusible_ops */
570 &generic_vector_cost, /* vector cost */
573 /* Costs to use when optimizing for size. */
574 static const struct riscv_tune_param optimize_size_tune_info = {
575 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */
576 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_mul */
577 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_div */
578 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_mul */
579 {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_div */
580 1, /* issue_rate */
581 1, /* branch_cost */
582 2, /* memory_cost */
583 8, /* fmv_cost */
584 false, /* slow_unaligned_access */
585 false, /* vector_unaligned_access */
586 false, /* use_divmod_expansion */
587 false, /* overlap_op_by_pieces */
588 RISCV_FUSE_NOTHING, /* fusible_ops */
589 NULL, /* vector cost */
592 static bool riscv_avoid_shrink_wrapping_separate ();
593 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
594 static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
595 static tree riscv_handle_rvv_vector_bits_attribute (tree *, tree, tree, int,
596 bool *);
598 /* Defining target-specific uses of __attribute__. */
599 static const attribute_spec riscv_gnu_attributes[] =
601 /* Syntax: { name, min_len, max_len, decl_required, type_required,
602 function_type_required, affects_type_identity, handler,
603 exclude } */
605 /* The attribute telling no prologue/epilogue. */
606 {"naked", 0, 0, true, false, false, false, riscv_handle_fndecl_attribute,
607 NULL},
608 /* This attribute generates prologue/epilogue for interrupt handlers. */
609 {"interrupt", 0, 1, false, true, true, false, riscv_handle_type_attribute,
610 NULL},
612 /* The following two are used for the built-in properties of the Vector type
613 and are not used externally */
614 {"RVV sizeless type", 4, 4, false, true, false, true, NULL, NULL},
615 {"RVV type", 0, 0, false, true, false, true, NULL, NULL},
616 /* This attribute is used to declare a function, forcing it to use the
617 standard vector calling convention variant. Syntax:
618 __attribute__((riscv_vector_cc)). */
619 {"riscv_vector_cc", 0, 0, false, true, true, true, NULL, NULL},
620 /* This attribute is used to declare a new type, to appoint the exactly
621 bits size of the type. For example:
623 typedef vint8m1_t f_vint8m1_t __attribute__((riscv_rvv_vector_bits(256)));
625 The new created type f_vint8m1_t will be exactly 256 bits. It can be
626 be used in globals, structs, unions, and arrays instead of sizeless
627 types. */
628 {"riscv_rvv_vector_bits", 1, 1, false, true, false, true,
629 riscv_handle_rvv_vector_bits_attribute, NULL},
632 static const scoped_attribute_specs riscv_gnu_attribute_table =
634 "gnu", {riscv_gnu_attributes}
637 static const attribute_spec riscv_attributes[] =
639 /* This attribute is used to declare a function, forcing it to use the
640 standard vector calling convention variant. Syntax:
641 [[riscv::vector_cc]]. */
642 {"vector_cc", 0, 0, false, true, true, true, NULL, NULL},
643 /* This attribute is used to declare a new type, to appoint the exactly
644 bits size of the type. For example:
646 typedef vint8m1_t f_vint8m1_t __attribute__((riscv_rvv_vector_bits(256)));
648 The new created type f_vint8m1_t will be exactly 256 bits. It can be
649 be used in globals, structs, unions, and arrays instead of sizeless
650 types. */
651 {"rvv_vector_bits", 1, 1, false, true, false, true,
652 riscv_handle_rvv_vector_bits_attribute, NULL},
655 static const scoped_attribute_specs riscv_nongnu_attribute_table =
657 "riscv", {riscv_attributes}
660 static const scoped_attribute_specs *const riscv_attribute_table[] =
662 &riscv_gnu_attribute_table,
663 &riscv_nongnu_attribute_table
666 /* Order for the CLOBBERs/USEs of gpr_save. */
667 static const unsigned gpr_save_reg_order[] = {
668 INVALID_REGNUM, T0_REGNUM, T1_REGNUM, RETURN_ADDR_REGNUM,
669 S0_REGNUM, S1_REGNUM, S2_REGNUM, S3_REGNUM, S4_REGNUM,
670 S5_REGNUM, S6_REGNUM, S7_REGNUM, S8_REGNUM, S9_REGNUM,
671 S10_REGNUM, S11_REGNUM
674 /* A table describing all the processors GCC knows about. */
675 static const struct riscv_tune_info riscv_tune_info_table[] = {
676 #define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
677 { TUNE_NAME, PIPELINE_MODEL, & TUNE_INFO},
678 #include "riscv-cores.def"
681 /* Global variable to distinguish whether we should save and restore s0/fp for
682 function. */
683 static bool riscv_save_frame_pointer;
685 typedef enum
687 PUSH_IDX = 0,
688 POP_IDX,
689 POPRET_IDX,
690 POPRETZ_IDX,
691 ZCMP_OP_NUM
692 } riscv_zcmp_op_t;
694 typedef insn_code (*code_for_push_pop_t) (machine_mode);
696 void riscv_frame_info::reset(void)
698 total_size = 0;
699 mask = 0;
700 fmask = 0;
701 vmask = 0;
702 save_libcall_adjustment = 0;
704 gp_sp_offset = 0;
705 fp_sp_offset = 0;
706 v_sp_offset_top = 0;
707 v_sp_offset_bottom = 0;
709 frame_pointer_offset = 0;
711 hard_frame_pointer_offset = 0;
713 arg_pointer_offset = 0;
716 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
718 static unsigned int
719 riscv_min_arithmetic_precision (void)
721 return 32;
724 /* Get the arch string from an options object. */
726 template <class T>
727 static const char *
728 get_arch_str (const T *opts)
730 return opts->x_riscv_arch_string;
733 template <class T>
734 static const char *
735 get_tune_str (const T *opts)
737 const char *tune_string = RISCV_TUNE_STRING_DEFAULT;
738 if (opts->x_riscv_tune_string)
739 tune_string = opts->x_riscv_tune_string;
740 else if (opts->x_riscv_cpu_string)
741 tune_string = opts->x_riscv_cpu_string;
742 return tune_string;
745 /* Return the riscv_tune_info entry for the given name string, return nullptr
746 if NULL_P is true, otherwise return an placeholder and report error. */
748 const struct riscv_tune_info *
749 riscv_parse_tune (const char *tune_string, bool null_p)
751 const riscv_cpu_info *cpu = riscv_find_cpu (tune_string);
753 if (cpu)
754 tune_string = cpu->tune;
756 for (unsigned i = 0; i < ARRAY_SIZE (riscv_tune_info_table); i++)
757 if (strcmp (riscv_tune_info_table[i].name, tune_string) == 0)
758 return riscv_tune_info_table + i;
760 if (null_p)
761 return nullptr;
763 error ("unknown cpu %qs for %<-mtune%>", tune_string);
764 return riscv_tune_info_table;
767 /* Helper function for riscv_build_integer; arguments are as for
768 riscv_build_integer. */
770 static int
771 riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
772 HOST_WIDE_INT value, machine_mode mode)
774 HOST_WIDE_INT low_part = CONST_LOW_PART (value);
775 int cost = RISCV_MAX_INTEGER_OPS + 1, alt_cost;
776 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
777 int upper_trailing_ones = ctz_hwi (~value >> 32);
778 int lower_leading_ones = clz_hwi (~value << 32);
781 if (SMALL_OPERAND (value) || LUI_OPERAND (value))
783 /* Simply ADDI or LUI. */
784 codes[0].code = UNKNOWN;
785 codes[0].value = value;
786 codes[0].use_uw = false;
787 codes[0].save_temporary = false;
788 return 1;
790 if (TARGET_ZBS && SINGLE_BIT_MASK_OPERAND (value))
792 /* Simply BSETI. */
793 codes[0].code = UNKNOWN;
794 codes[0].value = value;
795 codes[0].use_uw = false;
796 codes[0].save_temporary = false;
798 /* RISC-V sign-extends all 32bit values that live in a 32bit
799 register. To avoid paradoxes, we thus need to use the
800 sign-extended (negative) representation (-1 << 31) for the
801 value, if we want to build (1 << 31) in SImode. This will
802 then expand to an LUI instruction. */
803 if (TARGET_64BIT && mode == SImode && value == (HOST_WIDE_INT_1U << 31))
804 codes[0].value = (HOST_WIDE_INT_M1U << 31);
806 return 1;
809 /* End with ADDI. When constructing HImode constants, do not generate any
810 intermediate value that is not itself a valid HImode constant. The
811 XORI case below will handle those remaining HImode constants. */
812 if (low_part != 0
813 && (mode != HImode
814 || value - low_part <= ((1 << (GET_MODE_BITSIZE (HImode) - 1)) - 1)))
816 HOST_WIDE_INT upper_part = value - low_part;
817 if (mode != VOIDmode)
818 upper_part = trunc_int_for_mode (value - low_part, mode);
820 alt_cost = 1 + riscv_build_integer_1 (alt_codes, upper_part, mode);
821 if (alt_cost < cost)
823 alt_codes[alt_cost-1].code = PLUS;
824 alt_codes[alt_cost-1].value = low_part;
825 alt_codes[alt_cost-1].use_uw = false;
826 alt_codes[alt_cost-1].save_temporary = false;
827 memcpy (codes, alt_codes, sizeof (alt_codes));
828 cost = alt_cost;
832 /* End with XORI. */
833 if (cost > 2 && (low_part < 0 || mode == HImode))
835 alt_cost = 1 + riscv_build_integer_1 (alt_codes, value ^ low_part, mode);
836 if (alt_cost < cost)
838 alt_codes[alt_cost-1].code = XOR;
839 alt_codes[alt_cost-1].value = low_part;
840 alt_codes[alt_cost-1].use_uw = false;
841 alt_codes[alt_cost-1].save_temporary = false;
842 memcpy (codes, alt_codes, sizeof (alt_codes));
843 cost = alt_cost;
847 /* Eliminate trailing zeros and end with SLLI. */
848 if (cost > 2 && (value & 1) == 0)
850 int shift = ctz_hwi (value);
851 unsigned HOST_WIDE_INT x = value;
852 bool use_uw = false;
853 x = sext_hwi (x >> shift, HOST_BITS_PER_WIDE_INT - shift);
855 /* Don't eliminate the lower 12 bits if LUI might apply. */
856 if (shift > IMM_BITS
857 && !SMALL_OPERAND (x)
858 && (LUI_OPERAND (x << IMM_BITS)
859 || (TARGET_64BIT
860 && TARGET_ZBA
861 && LUI_OPERAND ((x << IMM_BITS)
862 & ~HOST_WIDE_INT_C (0x80000000)))))
863 shift -= IMM_BITS, x <<= IMM_BITS;
865 /* If X has bits 32..63 clear and bit 31 set, then go ahead and mark
866 it as desiring a "uw" operation for the shift. That way we can have
867 LUI+ADDI to generate the constant, then shift it into position
868 clearing out the undesirable bits. */
869 if (!LUI_OPERAND (x)
870 && TARGET_64BIT
871 && TARGET_ZBA
872 && clz_hwi (x) == 32)
874 x = sext_hwi (x, 32);
875 use_uw = true;
878 alt_cost = 1 + riscv_build_integer_1 (alt_codes, x, mode);
879 if (alt_cost < cost)
881 alt_codes[alt_cost-1].code = ASHIFT;
882 alt_codes[alt_cost-1].value = shift;
883 alt_codes[alt_cost-1].use_uw = use_uw;
884 alt_codes[alt_cost-1].save_temporary = false;
885 memcpy (codes, alt_codes, sizeof (alt_codes));
886 cost = alt_cost;
890 if (cost > 2 && TARGET_64BIT && (TARGET_ZBB || TARGET_XTHEADBB))
892 int leading_ones = clz_hwi (~value);
893 int trailing_ones = ctz_hwi (~value);
895 /* If all bits are one except a few that are zero, and the zero bits
896 are within a range of 11 bits, then we can synthesize a constant
897 by loading a small negative constant and rotating. */
898 if (leading_ones < 64
899 && ((64 - leading_ones - trailing_ones) < 12))
901 codes[0].code = UNKNOWN;
902 /* The sign-bit might be zero, so just rotate to be safe. */
903 codes[0].value = (((unsigned HOST_WIDE_INT) value >> trailing_ones)
904 | (value << (64 - trailing_ones)));
905 codes[0].use_uw = false;
906 codes[0].save_temporary = false;
907 codes[1].code = ROTATERT;
908 codes[1].value = 64 - trailing_ones;
909 codes[1].use_uw = false;
910 codes[1].save_temporary = false;
911 cost = 2;
913 /* Handle the case where the 11 bit range of zero bits wraps around. */
914 else if (upper_trailing_ones < 32 && lower_leading_ones < 32
915 && ((64 - upper_trailing_ones - lower_leading_ones) < 12))
917 codes[0].code = UNKNOWN;
918 /* The sign-bit might be zero, so just rotate to be safe. */
919 codes[0].value = ((value << (32 - upper_trailing_ones))
920 | ((unsigned HOST_WIDE_INT) value
921 >> (32 + upper_trailing_ones)));
922 codes[0].use_uw = false;
923 codes[0].save_temporary = false;
924 codes[1].code = ROTATERT;
925 codes[1].value = 32 - upper_trailing_ones;
926 codes[1].use_uw = false;
927 codes[1].save_temporary = false;
928 cost = 2;
931 /* If LUI/ADDI are going to set bits 32..63 and we need a small
932 number of them cleared, we might be able to use bclri profitably.
934 Note we may allow clearing of bit 31 using bclri. There's a class
935 of constants with that bit clear where this helps. */
936 else if (TARGET_64BIT
937 && TARGET_ZBS
938 && (32 - popcount_hwi (value & HOST_WIDE_INT_C (0xffffffff80000000))) + 1 < cost)
940 /* Turn on all those upper bits and synthesize the result. */
941 HOST_WIDE_INT nval = value | HOST_WIDE_INT_C (0xffffffff80000000);
942 alt_cost = riscv_build_integer_1 (alt_codes, nval, mode);
944 /* Now iterate over the bits we want to clear until the cost is
945 too high or we're done. */
946 nval = value ^ HOST_WIDE_INT_C (-1);
947 nval &= HOST_WIDE_INT_C (~0x7fffffff);
948 while (nval && alt_cost < cost)
950 HOST_WIDE_INT bit = ctz_hwi (nval);
951 alt_codes[alt_cost].code = AND;
952 alt_codes[alt_cost].value = ~(1UL << bit);
953 alt_codes[alt_cost].use_uw = false;
954 alt_codes[alt_cost].save_temporary = false;
955 alt_cost++;
956 nval &= ~(1UL << bit);
959 if (nval == 0 && alt_cost <= cost)
961 memcpy (codes, alt_codes, sizeof (alt_codes));
962 cost = alt_cost;
967 if (cost > 2 && TARGET_64BIT && TARGET_ZBA)
969 if ((value % 9) == 0
970 && (alt_cost
971 = riscv_build_integer_1 (alt_codes, value / 9, mode) + 1) < cost)
973 alt_codes[alt_cost - 1].code = FMA;
974 alt_codes[alt_cost - 1].value = 9;
975 alt_codes[alt_cost - 1].use_uw = false;
976 alt_codes[alt_cost - 1].save_temporary = false;
977 memcpy (codes, alt_codes, sizeof (alt_codes));
978 cost = alt_cost;
980 if ((value % 5) == 0
981 && (alt_cost
982 = riscv_build_integer_1 (alt_codes, value / 5, mode) + 1) < cost)
984 alt_codes[alt_cost - 1].code = FMA;
985 alt_codes[alt_cost - 1].value = 5;
986 alt_codes[alt_cost - 1].use_uw = false;
987 alt_codes[alt_cost - 1].save_temporary = false;
988 memcpy (codes, alt_codes, sizeof (alt_codes));
989 cost = alt_cost;
991 if ((value % 3) == 0
992 && (alt_cost
993 = riscv_build_integer_1 (alt_codes, value / 3, mode) + 1) < cost)
995 alt_codes[alt_cost - 1].code = FMA;
996 alt_codes[alt_cost - 1].value = 3;
997 alt_codes[alt_cost - 1].use_uw = false;
998 alt_codes[alt_cost - 1].save_temporary = false;
999 memcpy (codes, alt_codes, sizeof (alt_codes));
1000 cost = alt_cost;
1004 /* We might be able to generate a constant close to our target
1005 then a final ADDI to get the desired constant. */
1006 if (cost > 2
1007 && (value & 0xfff) != 0
1008 && (value & 0x1800) == 0x1000)
1010 HOST_WIDE_INT adjustment = -(0x800 - (value & 0xfff));
1011 alt_cost = 1 + riscv_build_integer_1 (alt_codes,
1012 value - adjustment, mode);
1014 if (alt_cost < cost)
1016 alt_codes[alt_cost - 1].code = PLUS;
1017 alt_codes[alt_cost - 1].value = adjustment;
1018 alt_codes[alt_cost - 1].use_uw = false;
1019 alt_codes[alt_cost - 1].save_temporary = false;
1020 memcpy (codes, alt_codes, sizeof (alt_codes));
1021 cost = alt_cost;
1025 /* Final cases, particularly focused on bseti. */
1026 if (cost > 2 && TARGET_ZBS)
1028 int i = 0;
1030 /* First handle any bits set by LUI. Be careful of the
1031 SImode sign bit!. */
1032 if (value & 0x7ffff000)
1034 alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
1035 alt_codes[i].value = value & 0x7ffff000;
1036 alt_codes[i].use_uw = false;
1037 alt_codes[i].save_temporary = false;
1038 value &= ~0x7ffff000;
1039 i++;
1042 /* Next, any bits we can handle with addi. */
1043 if (value & 0x7ff)
1045 alt_codes[i].code = (i == 0 ? UNKNOWN : PLUS);
1046 alt_codes[i].value = value & 0x7ff;
1047 alt_codes[i].use_uw = false;
1048 alt_codes[i].save_temporary = false;
1049 value &= ~0x7ff;
1050 i++;
1053 /* And any residuals with bseti. */
1054 while (i < cost && value)
1056 HOST_WIDE_INT bit = ctz_hwi (value);
1057 alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
1058 alt_codes[i].value = 1UL << bit;
1059 alt_codes[i].use_uw = false;
1060 alt_codes[i].save_temporary = false;
1061 value &= ~(1ULL << bit);
1062 i++;
1065 /* If LUI+ADDI+BSETI resulted in a more efficient
1066 sequence, then use it. */
1067 if (value == 0 && i < cost)
1069 memcpy (codes, alt_codes, sizeof (alt_codes));
1070 cost = i;
1074 gcc_assert (cost <= RISCV_MAX_INTEGER_OPS);
1075 return cost;
1078 /* Fill CODES with a sequence of rtl operations to load VALUE.
1079 Return the number of operations needed.
1081 ALLOW_NEW_PSEUDOS indicates if or caller wants to allow new pseudo
1082 registers or not. This is needed for cases where the integer synthesis and
1083 costing code are used in insn conditions, we can't have costing allow
1084 recognition at some points and reject at others. */
1086 static int
1087 riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
1088 machine_mode mode, bool allow_new_pseudos)
1090 int cost = riscv_build_integer_1 (codes, value, mode);
1092 /* Eliminate leading zeros and end with SRLI. */
1093 if (value > 0 && cost > 2)
1095 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
1096 int alt_cost, shift = clz_hwi (value);
1097 HOST_WIDE_INT shifted_val;
1099 /* Try filling trailing bits with 1s. */
1100 shifted_val = (value << shift) | ((((HOST_WIDE_INT) 1) << shift) - 1);
1101 alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode);
1102 if (alt_cost < cost)
1104 alt_codes[alt_cost-1].code = LSHIFTRT;
1105 alt_codes[alt_cost-1].value = shift;
1106 alt_codes[alt_cost-1].use_uw = false;
1107 alt_codes[alt_cost-1].save_temporary = false;
1108 memcpy (codes, alt_codes, sizeof (alt_codes));
1109 cost = alt_cost;
1112 /* Try filling trailing bits with 0s. */
1113 shifted_val = value << shift;
1114 alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode);
1115 if (alt_cost < cost)
1117 alt_codes[alt_cost-1].code = LSHIFTRT;
1118 alt_codes[alt_cost-1].value = shift;
1119 alt_codes[alt_cost-1].use_uw = false;
1120 alt_codes[alt_cost-1].save_temporary = false;
1121 memcpy (codes, alt_codes, sizeof (alt_codes));
1122 cost = alt_cost;
1126 /* See if we can generate the inverted constant, then use
1127 not to get the desired constant.
1129 This can't be in riscv_build_integer_1 as it'll mutually
1130 recurse with another case in there. And it has to recurse
1131 into riscv_build_integer so we get the trailing 0s case
1132 above. */
1133 if (cost > 2 && value < 0)
1135 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
1136 int alt_cost;
1138 HOST_WIDE_INT nval = ~value;
1139 alt_cost = 1 + riscv_build_integer (alt_codes, nval,
1140 mode, allow_new_pseudos);
1141 if (alt_cost < cost)
1143 alt_codes[alt_cost - 1].code = XOR;
1144 alt_codes[alt_cost - 1].value = -1;
1145 alt_codes[alt_cost - 1].use_uw = false;
1146 alt_codes[alt_cost - 1].save_temporary = false;
1147 memcpy (codes, alt_codes, sizeof (alt_codes));
1148 cost = alt_cost;
1153 if (!TARGET_64BIT
1154 && (value > INT32_MAX || value < INT32_MIN))
1156 unsigned HOST_WIDE_INT loval = sext_hwi (value, 32);
1157 unsigned HOST_WIDE_INT hival = sext_hwi ((value - loval) >> 32, 32);
1158 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
1159 struct riscv_integer_op hicode[RISCV_MAX_INTEGER_OPS];
1160 int hi_cost, lo_cost;
1162 hi_cost = riscv_build_integer_1 (hicode, hival, mode);
1163 if (hi_cost < cost)
1165 lo_cost = riscv_build_integer_1 (alt_codes, loval, mode);
1166 if (lo_cost + hi_cost < cost)
1168 memcpy (codes, alt_codes,
1169 lo_cost * sizeof (struct riscv_integer_op));
1170 memcpy (codes + lo_cost, hicode,
1171 hi_cost * sizeof (struct riscv_integer_op));
1172 cost = lo_cost + hi_cost;
1177 /* With pack we can generate a 64 bit constant with the same high
1178 and low 32 bits trivially. */
1179 if (cost > 3 && TARGET_64BIT && TARGET_ZBKB)
1181 unsigned HOST_WIDE_INT loval = value & 0xffffffff;
1182 unsigned HOST_WIDE_INT hival = (value & ~loval) >> 32;
1183 if (hival == loval)
1185 cost = 1 + riscv_build_integer_1 (codes, sext_hwi (loval, 32), mode);
1186 codes[cost - 1].code = CONCAT;
1187 codes[cost - 1].value = 0;
1188 codes[cost - 1].use_uw = false;
1189 codes[cost - 1].save_temporary = false;
1192 /* An arbitrary 64 bit constant can be synthesized in 5 instructions
1193 using zbkb. We may do better than that if the upper or lower half
1194 can be synthesized with a single LUI, ADDI or BSET. Regardless the
1195 basic steps are the same. */
1196 if (cost > 3 && can_create_pseudo_p () && allow_new_pseudos)
1198 struct riscv_integer_op hi_codes[RISCV_MAX_INTEGER_OPS];
1199 struct riscv_integer_op lo_codes[RISCV_MAX_INTEGER_OPS];
1200 int hi_cost, lo_cost;
1202 /* Synthesize and get cost for each half. */
1203 lo_cost
1204 = riscv_build_integer_1 (lo_codes, sext_hwi (loval, 32), mode);
1205 hi_cost
1206 = riscv_build_integer_1 (hi_codes, sext_hwi (hival, 32), mode);
1208 /* If profitable, finish synthesis using zbkb. */
1209 if (cost > hi_cost + lo_cost + 1)
1211 /* We need the low half independent of the high half. So
1212 mark it has creating a temporary we'll use later. */
1213 memcpy (codes, lo_codes,
1214 lo_cost * sizeof (struct riscv_integer_op));
1215 codes[lo_cost - 1].save_temporary = true;
1217 /* Now the high half synthesis. */
1218 memcpy (codes + lo_cost, hi_codes,
1219 hi_cost * sizeof (struct riscv_integer_op));
1221 /* Adjust the cost. */
1222 cost = hi_cost + lo_cost + 1;
1224 /* And finally (ab)use VEC_MERGE to indicate we want to
1225 put merge the two parts together. */
1226 codes[cost - 1].code = VEC_MERGE;
1227 codes[cost - 1].value = 0;
1228 codes[cost - 1].use_uw = false;
1229 codes[cost - 1].save_temporary = false;
1234 else if (cost > 4 && TARGET_64BIT && can_create_pseudo_p ()
1235 && allow_new_pseudos)
1237 struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
1238 int alt_cost;
1240 unsigned HOST_WIDE_INT loval = value & 0xffffffff;
1241 unsigned HOST_WIDE_INT hival = (value & ~loval) >> 32;
1242 bool bit31 = (loval & 0x80000000) != 0;
1243 int trailing_shift = ctz_hwi (loval) - ctz_hwi (hival);
1244 int leading_shift = clz_hwi (loval) - clz_hwi (hival);
1245 int shiftval = 0;
1247 /* Adjust the shift into the high half accordingly. */
1248 if ((trailing_shift > 0 && hival == (loval >> trailing_shift)))
1249 shiftval = 32 - trailing_shift;
1250 else if ((leading_shift > 0 && hival == (loval << leading_shift)))
1251 shiftval = 32 + leading_shift;
1253 if (shiftval && !bit31)
1254 alt_cost = 2 + riscv_build_integer_1 (alt_codes, sext_hwi (loval, 32),
1255 mode);
1257 /* For constants where the upper half is a shift of the lower half we
1258 can do a shift followed by an or. */
1259 if (shiftval && !bit31 && alt_cost < cost)
1261 /* We need to save the first constant we build. */
1262 alt_codes[alt_cost - 3].save_temporary = true;
1264 /* Now we want to shift the previously generated constant into the
1265 high half. */
1266 alt_codes[alt_cost - 2].code = ASHIFT;
1267 alt_codes[alt_cost - 2].value = shiftval;
1268 alt_codes[alt_cost - 2].use_uw = false;
1269 alt_codes[alt_cost - 2].save_temporary = false;
1271 /* And the final step, IOR the two halves together. Since this uses
1272 the saved temporary, use CONCAT similar to what we do for Zbkb. */
1273 alt_codes[alt_cost - 1].code = CONCAT;
1274 alt_codes[alt_cost - 1].value = 0;
1275 alt_codes[alt_cost - 1].use_uw = false;
1276 alt_codes[alt_cost - 1].save_temporary = false;
1278 memcpy (codes, alt_codes, sizeof (alt_codes));
1279 cost = alt_cost;
1282 if (cost > 4 && !bit31 && TARGET_ZBA)
1284 int value = 0;
1286 /* Check for a shNadd. */
1287 if (hival == loval * 3)
1288 value = 3;
1289 else if (hival == loval * 5)
1290 value = 5;
1291 else if (hival == loval * 9)
1292 value = 9;
1294 if (value)
1295 alt_cost = 2 + riscv_build_integer_1 (alt_codes,
1296 sext_hwi (loval, 32), mode);
1298 /* For constants where the upper half is a shNadd of the lower half
1299 we can do a similar transformation. */
1300 if (value && alt_cost < cost)
1302 alt_codes[alt_cost - 3].save_temporary = true;
1303 alt_codes[alt_cost - 2].code = FMA;
1304 alt_codes[alt_cost - 2].value = value;
1305 alt_codes[alt_cost - 2].use_uw = false;
1306 alt_codes[alt_cost - 2].save_temporary = false;
1307 alt_codes[alt_cost - 1].code = CONCAT;
1308 alt_codes[alt_cost - 1].value = 0;
1309 alt_codes[alt_cost - 1].use_uw = false;
1310 alt_codes[alt_cost - 1].save_temporary = false;
1312 memcpy (codes, alt_codes, sizeof (alt_codes));
1313 cost = alt_cost;
1317 if (cost > 4 && !bit31)
1319 int value = hival - loval;
1321 /* For constants were the halves differ by less than 2048 we can
1322 generate the upper half by using an addi on the lower half then
1323 using a shift 32 followed by an or. */
1324 if (IN_RANGE (value, -2048, 2047))
1326 alt_cost = 3 + riscv_build_integer_1 (alt_codes,
1327 sext_hwi (loval, 32), mode);
1328 if (alt_cost < cost)
1330 alt_codes[alt_cost - 4].save_temporary = true;
1331 alt_codes[alt_cost - 3].code = PLUS;
1332 alt_codes[alt_cost - 3].value = value;
1333 alt_codes[alt_cost - 3].use_uw = false;
1334 alt_codes[alt_cost - 3].save_temporary = false;
1335 alt_codes[alt_cost - 2].code = ASHIFT;
1336 alt_codes[alt_cost - 2].value = 32;
1337 alt_codes[alt_cost - 2].use_uw = false;
1338 alt_codes[alt_cost - 2].save_temporary = false;
1339 alt_codes[alt_cost - 1].code = CONCAT;
1340 alt_codes[alt_cost - 1].value = 0;
1341 alt_codes[alt_cost - 1].use_uw = false;
1342 alt_codes[alt_cost - 1].save_temporary = false;
1344 memcpy (codes, alt_codes, sizeof (alt_codes));
1345 cost = alt_cost;
1350 if (cost > 5 && !bit31)
1352 /* For constants where the upper half is the lower half inverted we can flip
1353 it with an xor and do a shift 32 followed by an or. */
1354 if (hival == (~loval & 0xffffffff))
1356 alt_cost = 3 + riscv_build_integer_1 (alt_codes,
1357 sext_hwi (loval, 32), mode);
1358 if (alt_cost < cost)
1360 alt_codes[alt_cost - 4].save_temporary = true;
1361 alt_codes[alt_cost - 3].code = XOR;
1362 alt_codes[alt_cost - 3].value = -1;
1363 alt_codes[alt_cost - 3].use_uw = false;
1364 alt_codes[alt_cost - 3].save_temporary = false;
1365 alt_codes[alt_cost - 2].code = ASHIFT;
1366 alt_codes[alt_cost - 2].value = 32;
1367 alt_codes[alt_cost - 2].use_uw = false;
1368 alt_codes[alt_cost - 2].save_temporary = false;
1369 alt_codes[alt_cost - 1].code = CONCAT;
1370 alt_codes[alt_cost - 1].value = 0;
1371 alt_codes[alt_cost - 1].use_uw = false;
1372 alt_codes[alt_cost - 1].save_temporary = false;
1374 memcpy (codes, alt_codes, sizeof (alt_codes));
1375 cost = alt_cost;
1381 return cost;
1384 /* Return the cost of constructing VAL in the event that a scratch
1385 register is available. */
1387 static int
1388 riscv_split_integer_cost (HOST_WIDE_INT val)
1390 int cost;
1391 unsigned HOST_WIDE_INT loval = val & 0xffffffff;
1392 unsigned HOST_WIDE_INT hival = (val & ~loval) >> 32;
1393 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
1395 /* This routine isn't used by pattern conditions, so whether or
1396 not to allow new pseudos can be a function of where we are in the
1397 RTL pipeline. */
1398 bool allow_new_pseudos = can_create_pseudo_p ();
1399 cost = 2 + riscv_build_integer (codes, loval, VOIDmode, allow_new_pseudos);
1400 if (loval != hival)
1401 cost += riscv_build_integer (codes, hival, VOIDmode, allow_new_pseudos);
1402 else if ((loval & 0x80000000) != 0)
1403 cost = 3 + riscv_build_integer (codes, ~loval & 0xffffffff,
1404 VOIDmode, allow_new_pseudos);
1406 return cost;
1409 /* Return the cost of constructing the integer constant VAL. ALLOW_NEW_PSEUDOS
1410 potentially restricts if riscv_build_integer is allowed to create new
1411 pseudo registers. It must be false for calls directly or indirectly from
1412 conditions in patterns. */
1414 static int
1415 riscv_integer_cost (HOST_WIDE_INT val, bool allow_new_pseudos)
1417 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
1418 return MIN (riscv_build_integer (codes, val, VOIDmode, allow_new_pseudos),
1419 riscv_split_integer_cost (val));
1422 /* Try to split a 64b integer into 32b parts, then reassemble. */
1424 static rtx
1425 riscv_split_integer (HOST_WIDE_INT val, machine_mode mode)
1427 unsigned HOST_WIDE_INT loval = val & 0xffffffff;
1428 unsigned HOST_WIDE_INT hival = (val & ~loval) >> 32;
1429 rtx hi = gen_reg_rtx (mode), lo = gen_reg_rtx (mode);
1430 rtx x = gen_reg_rtx (mode);
1431 bool eq_neg = (loval == hival) && ((loval & 0x80000000) != 0);
1433 if (eq_neg)
1434 riscv_move_integer (lo, lo, ~loval & 0xffffffff, mode);
1435 else
1436 riscv_move_integer (lo, lo, loval, mode);
1438 if (loval == hival)
1439 hi = gen_rtx_ASHIFT (mode, lo, GEN_INT (32));
1440 else
1442 riscv_move_integer (hi, hi, hival, mode);
1443 hi = gen_rtx_ASHIFT (mode, hi, GEN_INT (32));
1446 hi = force_reg (mode, hi);
1447 x = gen_rtx_PLUS (mode, hi, lo);
1448 if (eq_neg)
1450 x = force_reg (mode, x);
1451 x = gen_rtx_XOR (mode, x, GEN_INT (-1));
1453 return x;
1456 /* Return true if X is a thread-local symbol. */
1458 static bool
1459 riscv_tls_symbol_p (const_rtx x)
1461 return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0;
1464 /* Return true if symbol X binds locally. */
1466 static bool
1467 riscv_symbol_binds_local_p (const_rtx x)
1469 if (SYMBOL_REF_P (x))
1470 return (SYMBOL_REF_DECL (x)
1471 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
1472 : SYMBOL_REF_LOCAL_P (x));
1473 else
1474 return false;
1477 /* Return the method that should be used to access SYMBOL_REF or
1478 LABEL_REF X. */
1480 static enum riscv_symbol_type
1481 riscv_classify_symbol (const_rtx x)
1483 if (riscv_tls_symbol_p (x))
1484 return SYMBOL_TLS;
1486 if (GET_CODE (x) == SYMBOL_REF && flag_pic && !riscv_symbol_binds_local_p (x))
1487 return SYMBOL_GOT_DISP;
1489 switch (riscv_cmodel)
1491 case CM_MEDLOW:
1492 return SYMBOL_ABSOLUTE;
1493 case CM_LARGE:
1494 if (SYMBOL_REF_P (x))
1495 return CONSTANT_POOL_ADDRESS_P (x) ? SYMBOL_PCREL : SYMBOL_FORCE_TO_MEM;
1496 return SYMBOL_PCREL;
1497 default:
1498 return SYMBOL_PCREL;
1502 /* Classify the base of symbolic expression X. */
1504 enum riscv_symbol_type
1505 riscv_classify_symbolic_expression (rtx x)
1507 rtx offset;
1509 split_const (x, &x, &offset);
1510 if (UNSPEC_ADDRESS_P (x))
1511 return UNSPEC_ADDRESS_TYPE (x);
1513 return riscv_classify_symbol (x);
1516 /* Return true if X is a symbolic constant. If it is, store the type of
1517 the symbol in *SYMBOL_TYPE. */
1519 bool
1520 riscv_symbolic_constant_p (rtx x, enum riscv_symbol_type *symbol_type)
1522 rtx offset;
1524 split_const (x, &x, &offset);
1525 if (UNSPEC_ADDRESS_P (x))
1527 *symbol_type = UNSPEC_ADDRESS_TYPE (x);
1528 x = UNSPEC_ADDRESS (x);
1530 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
1531 *symbol_type = riscv_classify_symbol (x);
1532 else
1533 return false;
1535 if (offset == const0_rtx)
1536 return true;
1538 /* Nonzero offsets are only valid for references that don't use the GOT. */
1539 switch (*symbol_type)
1541 case SYMBOL_ABSOLUTE:
1542 case SYMBOL_PCREL:
1543 case SYMBOL_TLS_LE:
1544 /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */
1545 return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
1547 default:
1548 return false;
1552 /* Returns the number of instructions necessary to reference a symbol. */
1554 static int riscv_symbol_insns (enum riscv_symbol_type type)
1556 switch (type)
1558 case SYMBOL_TLS: return 0; /* Depends on the TLS model. */
1559 case SYMBOL_ABSOLUTE: return 2; /* LUI + the reference. */
1560 case SYMBOL_PCREL: return 2; /* AUIPC + the reference. */
1561 case SYMBOL_TLS_LE: return 3; /* LUI + ADD TP + the reference. */
1562 case SYMBOL_TLSDESC: return 6; /* 4-instruction call + ADD TP + the reference. */
1563 case SYMBOL_GOT_DISP: return 3; /* AUIPC + LD GOT + the reference. */
1564 case SYMBOL_FORCE_TO_MEM: return 3; /* AUIPC + LD + the reference. */
1565 default: gcc_unreachable ();
1569 /* Immediate values loaded by the FLI.S instruction in Chapter 25 of the latest RISC-V ISA
1570 Manual draft. For details, please see:
1571 https://github.com/riscv/riscv-isa-manual/releases/tag/isa-449cd0c */
1573 static unsigned HOST_WIDE_INT fli_value_hf[32] =
1575 0xbcp8, 0x4p8, 0x1p8, 0x2p8, 0x1cp8, 0x20p8, 0x2cp8, 0x30p8,
1576 0x34p8, 0x35p8, 0x36p8, 0x37p8, 0x38p8, 0x39p8, 0x3ap8, 0x3bp8,
1577 0x3cp8, 0x3dp8, 0x3ep8, 0x3fp8, 0x40p8, 0x41p8, 0x42p8, 0x44p8,
1578 0x48p8, 0x4cp8, 0x58p8, 0x5cp8, 0x78p8,
1579 /* Only used for filling, ensuring that 29 and 30 of HF are the same. */
1580 0x78p8,
1581 0x7cp8, 0x7ep8
1584 static unsigned HOST_WIDE_INT fli_value_sf[32] =
1586 0xbf8p20, 0x008p20, 0x378p20, 0x380p20, 0x3b8p20, 0x3c0p20, 0x3d8p20, 0x3e0p20,
1587 0x3e8p20, 0x3eap20, 0x3ecp20, 0x3eep20, 0x3f0p20, 0x3f2p20, 0x3f4p20, 0x3f6p20,
1588 0x3f8p20, 0x3fap20, 0x3fcp20, 0x3fep20, 0x400p20, 0x402p20, 0x404p20, 0x408p20,
1589 0x410p20, 0x418p20, 0x430p20, 0x438p20, 0x470p20, 0x478p20, 0x7f8p20, 0x7fcp20
1592 static unsigned HOST_WIDE_INT fli_value_df[32] =
1594 0xbff0p48, 0x10p48, 0x3ef0p48, 0x3f00p48,
1595 0x3f70p48, 0x3f80p48, 0x3fb0p48, 0x3fc0p48,
1596 0x3fd0p48, 0x3fd4p48, 0x3fd8p48, 0x3fdcp48,
1597 0x3fe0p48, 0x3fe4p48, 0x3fe8p48, 0x3fecp48,
1598 0x3ff0p48, 0x3ff4p48, 0x3ff8p48, 0x3ffcp48,
1599 0x4000p48, 0x4004p48, 0x4008p48, 0x4010p48,
1600 0x4020p48, 0x4030p48, 0x4060p48, 0x4070p48,
1601 0x40e0p48, 0x40f0p48, 0x7ff0p48, 0x7ff8p48
1604 /* Display floating-point values at the assembly level, which is consistent
1605 with the zfa extension of llvm:
1606 https://reviews.llvm.org/D145645. */
1608 const char *fli_value_print[32] =
1610 "-1.0", "min", "1.52587890625e-05", "3.0517578125e-05", "0.00390625", "0.0078125", "0.0625", "0.125",
1611 "0.25", "0.3125", "0.375", "0.4375", "0.5", "0.625", "0.75", "0.875",
1612 "1.0", "1.25", "1.5", "1.75", "2.0", "2.5", "3.0", "4.0",
1613 "8.0", "16.0", "128.0", "256.0", "32768.0", "65536.0", "inf", "nan"
1616 /* Return index of the FLI instruction table if rtx X is an immediate constant that can
1617 be moved using a single FLI instruction in zfa extension. Return -1 if not found. */
1620 riscv_float_const_rtx_index_for_fli (rtx x)
1622 unsigned HOST_WIDE_INT *fli_value_array;
1624 machine_mode mode = GET_MODE (x);
1626 if (!TARGET_ZFA
1627 || !CONST_DOUBLE_P(x)
1628 || mode == VOIDmode
1629 || (mode == HFmode && !(TARGET_ZFH || TARGET_ZVFH))
1630 || (mode == SFmode && !TARGET_HARD_FLOAT)
1631 || (mode == DFmode && !TARGET_DOUBLE_FLOAT))
1632 return -1;
1634 if (!SCALAR_FLOAT_MODE_P (mode)
1635 || GET_MODE_BITSIZE (mode).to_constant () > HOST_BITS_PER_WIDE_INT
1636 /* Only support up to DF mode. */
1637 || GET_MODE_BITSIZE (mode).to_constant () > GET_MODE_BITSIZE (DFmode))
1638 return -1;
1640 unsigned HOST_WIDE_INT ival = 0;
1642 long res[2];
1643 real_to_target (res,
1644 CONST_DOUBLE_REAL_VALUE (x),
1645 REAL_MODE_FORMAT (mode));
1647 if (mode == DFmode)
1649 int order = BYTES_BIG_ENDIAN ? 1 : 0;
1650 ival = zext_hwi (res[order], 32);
1651 ival |= (zext_hwi (res[1 - order], 32) << 32);
1653 /* When the lower 32 bits are not all 0, it is impossible to be in the table. */
1654 if (ival & (unsigned HOST_WIDE_INT)0xffffffff)
1655 return -1;
1657 else
1658 ival = zext_hwi (res[0], 32);
1660 switch (mode)
1662 case E_HFmode:
1663 fli_value_array = fli_value_hf;
1664 break;
1665 case E_SFmode:
1666 fli_value_array = fli_value_sf;
1667 break;
1668 case E_DFmode:
1669 fli_value_array = fli_value_df;
1670 break;
1671 default:
1672 return -1;
1675 if (fli_value_array[0] == ival)
1676 return 0;
1678 if (fli_value_array[1] == ival)
1679 return 1;
1681 /* Perform a binary search to find target index. */
1682 unsigned l, r, m;
1684 l = 2;
1685 r = 31;
1687 while (l <= r)
1689 m = (l + r) / 2;
1690 if (fli_value_array[m] == ival)
1691 return m;
1692 else if (fli_value_array[m] < ival)
1693 l = m+1;
1694 else
1695 r = m-1;
1698 return -1;
1701 /* Implement TARGET_LEGITIMATE_CONSTANT_P. */
1703 static bool
1704 riscv_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1706 /* With the post-reload usage, it seems best to just pass in FALSE
1707 rather than pass ALLOW_NEW_PSEUDOS through the call chain. */
1708 return riscv_const_insns (x, false) > 0;
1711 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.
1712 Return true if X cannot (or should not) be spilled to the
1713 constant pool. */
1715 static bool
1716 riscv_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1718 enum riscv_symbol_type type;
1719 rtx base, offset;
1721 /* There's no way to calculate VL-based values using relocations. */
1722 subrtx_iterator::array_type array;
1723 FOR_EACH_SUBRTX (iter, array, x, ALL)
1724 if (GET_CODE (*iter) == CONST_POLY_INT)
1725 return true;
1727 /* There is no assembler syntax for expressing an address-sized
1728 high part. */
1729 if (GET_CODE (x) == HIGH)
1730 return true;
1732 if (satisfies_constraint_zfli (x))
1733 return true;
1735 split_const (x, &base, &offset);
1736 if (riscv_symbolic_constant_p (base, &type))
1738 if (type == SYMBOL_FORCE_TO_MEM)
1739 return false;
1741 /* As an optimization, don't spill symbolic constants that are as
1742 cheap to rematerialize as to access in the constant pool. */
1743 if (SMALL_OPERAND (INTVAL (offset)) && riscv_symbol_insns (type) > 0)
1744 return true;
1746 /* As an optimization, avoid needlessly generate dynamic relocations. */
1747 if (flag_pic)
1748 return true;
1751 /* TLS symbols must be computed by riscv_legitimize_move. */
1752 if (tls_referenced_p (x))
1753 return true;
1755 return false;
1758 /* Return true if register REGNO is a valid base register for mode MODE.
1759 STRICT_P is true if REG_OK_STRICT is in effect. */
1762 riscv_regno_mode_ok_for_base_p (int regno,
1763 machine_mode mode ATTRIBUTE_UNUSED,
1764 bool strict_p)
1766 if (!HARD_REGISTER_NUM_P (regno))
1768 if (!strict_p)
1769 return true;
1770 regno = reg_renumber[regno];
1773 /* These fake registers will be eliminated to either the stack or
1774 hard frame pointer, both of which are usually valid base registers.
1775 Reload deals with the cases where the eliminated form isn't valid. */
1776 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
1777 return true;
1779 return GP_REG_P (regno);
1782 /* Get valid index register class.
1783 The RISC-V base instructions don't support index registers,
1784 but extensions might support that. */
1786 enum reg_class
1787 riscv_index_reg_class ()
1789 if (TARGET_XTHEADMEMIDX || TARGET_XTHEADFMEMIDX)
1790 return GR_REGS;
1792 return NO_REGS;
1795 /* Return true if register REGNO is a valid index register.
1796 The RISC-V base instructions don't support index registers,
1797 but extensions might support that. */
1800 riscv_regno_ok_for_index_p (int regno)
1802 if (TARGET_XTHEADMEMIDX || TARGET_XTHEADFMEMIDX)
1803 return riscv_regno_mode_ok_for_base_p (regno, VOIDmode, 1);
1805 return 0;
1808 /* Return true if X is a valid base register for mode MODE.
1809 STRICT_P is true if REG_OK_STRICT is in effect. */
1811 bool
1812 riscv_valid_base_register_p (rtx x, machine_mode mode, bool strict_p)
1814 if (!strict_p && GET_CODE (x) == SUBREG)
1815 x = SUBREG_REG (x);
1817 return (REG_P (x)
1818 && riscv_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p));
1821 /* Return true if, for every base register BASE_REG, (plus BASE_REG X)
1822 can address a value of mode MODE. */
1824 static bool
1825 riscv_valid_offset_p (rtx x, machine_mode mode)
1827 /* Check that X is a signed 12-bit number. */
1828 if (!const_arith_operand (x, Pmode))
1829 return false;
1831 /* We may need to split multiword moves, so make sure that every word
1832 is accessible. */
1833 if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
1834 && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode).to_constant () - UNITS_PER_WORD))
1835 return false;
1837 return true;
1840 /* Should a symbol of type SYMBOL_TYPE should be split in two? */
1842 bool
1843 riscv_split_symbol_type (enum riscv_symbol_type symbol_type)
1845 if (symbol_type == SYMBOL_TLS_LE)
1846 return true;
1848 if (!TARGET_EXPLICIT_RELOCS)
1849 return false;
1851 return symbol_type == SYMBOL_ABSOLUTE || symbol_type == SYMBOL_PCREL;
1854 /* Return true if a LO_SUM can address a value of mode MODE when the
1855 LO_SUM symbol has type SYM_TYPE. X is the LO_SUM second operand, which
1856 is used when the mode is BLKmode. */
1858 static bool
1859 riscv_valid_lo_sum_p (enum riscv_symbol_type sym_type, machine_mode mode,
1860 rtx x)
1862 int align, size;
1864 /* Check that symbols of type SYMBOL_TYPE can be used to access values
1865 of mode MODE. */
1866 if (riscv_symbol_insns (sym_type) == 0)
1867 return false;
1869 /* Check that there is a known low-part relocation. */
1870 if (!riscv_split_symbol_type (sym_type))
1871 return false;
1873 /* We can't tell size or alignment when we have BLKmode, so try extracting a
1874 decl from the symbol if possible. */
1875 if (mode == BLKmode)
1877 rtx offset;
1879 /* Extract the symbol from the LO_SUM operand, if any. */
1880 split_const (x, &x, &offset);
1882 /* Might be a CODE_LABEL. We can compute align but not size for that,
1883 so don't bother trying to handle it. */
1884 if (!SYMBOL_REF_P (x))
1885 return false;
1887 /* Use worst case assumptions if we don't have a SYMBOL_REF_DECL. */
1888 align = (SYMBOL_REF_DECL (x)
1889 ? DECL_ALIGN (SYMBOL_REF_DECL (x))
1890 : 1);
1891 size = (SYMBOL_REF_DECL (x)
1892 && DECL_SIZE (SYMBOL_REF_DECL (x))
1893 && tree_fits_uhwi_p (DECL_SIZE (SYMBOL_REF_DECL (x)))
1894 ? tree_to_uhwi (DECL_SIZE (SYMBOL_REF_DECL (x)))
1895 : 2*BITS_PER_WORD);
1897 else
1899 align = GET_MODE_ALIGNMENT (mode);
1900 size = GET_MODE_BITSIZE (mode).to_constant ();
1903 /* We may need to split multiword moves, so make sure that each word
1904 can be accessed without inducing a carry. */
1905 if (size > BITS_PER_WORD
1906 && (!TARGET_STRICT_ALIGN || size > align))
1907 return false;
1909 return true;
1912 /* Return true if mode is the RVV enabled mode.
1913 For example: 'RVVMF2SI' mode is disabled,
1914 whereas 'RVVM1SI' mode is enabled if MIN_VLEN == 32. */
1916 bool
1917 riscv_v_ext_vector_mode_p (machine_mode mode)
1919 #define ENTRY(MODE, REQUIREMENT, ...) \
1920 case MODE##mode: \
1921 return REQUIREMENT;
1922 switch (mode)
1924 #include "riscv-vector-switch.def"
1925 default:
1926 return false;
1929 return false;
1932 /* Return true if mode is the RVV enabled tuple mode. */
1934 bool
1935 riscv_v_ext_tuple_mode_p (machine_mode mode)
1937 #define TUPLE_ENTRY(MODE, REQUIREMENT, ...) \
1938 case MODE##mode: \
1939 return REQUIREMENT;
1940 switch (mode)
1942 #include "riscv-vector-switch.def"
1943 default:
1944 return false;
1947 return false;
1950 /* Return true if mode is the RVV enabled vls mode. */
1952 bool
1953 riscv_v_ext_vls_mode_p (machine_mode mode)
1955 #define VLS_ENTRY(MODE, REQUIREMENT) \
1956 case MODE##mode: \
1957 return REQUIREMENT;
1958 switch (mode)
1960 #include "riscv-vector-switch.def"
1961 default:
1962 return false;
1965 return false;
1968 /* Return true if it is either of below modes.
1969 1. RVV vector mode.
1970 2. RVV tuple mode.
1971 3. RVV vls mode. */
1973 static bool
1974 riscv_v_ext_mode_p (machine_mode mode)
1976 return riscv_v_ext_vector_mode_p (mode) || riscv_v_ext_tuple_mode_p (mode)
1977 || riscv_v_ext_vls_mode_p (mode);
1980 static unsigned
1981 riscv_v_vls_mode_aggregate_gpr_count (unsigned vls_unit_size,
1982 unsigned scalar_unit_size)
1984 gcc_assert (vls_unit_size != 0 && scalar_unit_size != 0);
1986 if (vls_unit_size < scalar_unit_size)
1987 return 1;
1989 /* Ensure the vls mode is exact_div by scalar_unit_size. */
1990 gcc_assert ((vls_unit_size % scalar_unit_size) == 0);
1992 return vls_unit_size / scalar_unit_size;
1995 static machine_mode
1996 riscv_v_vls_to_gpr_mode (unsigned vls_mode_size)
1998 switch (vls_mode_size)
2000 case 16:
2001 return TImode;
2002 case 8:
2003 return DImode;
2004 case 4:
2005 return SImode;
2006 case 2:
2007 return HImode;
2008 case 1:
2009 return QImode;
2010 default:
2011 gcc_unreachable ();
2015 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
2016 NUNITS size for corresponding machine_mode. */
2018 poly_int64
2019 riscv_v_adjust_nunits (machine_mode mode, int scale)
2021 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
2022 if (riscv_v_ext_mode_p (mode))
2024 if (TARGET_MIN_VLEN == 32)
2025 scale = scale / 2;
2026 return riscv_vector_chunks * scale;
2028 return scale;
2031 /* Call from ADJUST_NUNITS in riscv-modes.def. Return the correct
2032 NUNITS size for corresponding machine_mode. */
2034 poly_int64
2035 riscv_v_adjust_nunits (machine_mode mode, bool fractional_p, int lmul, int nf)
2037 if (riscv_v_ext_mode_p (mode))
2039 scalar_mode smode = GET_MODE_INNER (mode);
2040 int size = GET_MODE_SIZE (smode);
2041 int nunits_per_chunk = riscv_bytes_per_vector_chunk / size;
2042 if (fractional_p)
2043 return nunits_per_chunk / lmul * riscv_vector_chunks * nf;
2044 else
2045 return nunits_per_chunk * lmul * riscv_vector_chunks * nf;
2047 /* Set the disabled RVV modes size as 1 by default. */
2048 return 1;
2051 /* Call from ADJUST_BYTESIZE in riscv-modes.def. Return the correct
2052 BYTE size for corresponding machine_mode. */
2054 poly_int64
2055 riscv_v_adjust_bytesize (machine_mode mode, int scale)
2057 if (riscv_v_ext_vector_mode_p (mode))
2059 if (TARGET_XTHEADVECTOR)
2060 return BYTES_PER_RISCV_VECTOR;
2062 poly_int64 nunits = GET_MODE_NUNITS (mode);
2064 if (nunits.coeffs[0] > 8)
2065 return exact_div (nunits, 8);
2066 else if (nunits.is_constant ())
2067 return 1;
2068 else
2069 return poly_int64 (1, 1);
2072 return scale;
2075 /* Call from ADJUST_PRECISION in riscv-modes.def. Return the correct
2076 PRECISION size for corresponding machine_mode. */
2078 poly_int64
2079 riscv_v_adjust_precision (machine_mode mode, int scale)
2081 return riscv_v_adjust_nunits (mode, scale);
2084 /* Return true if X is a valid address for machine mode MODE. If it is,
2085 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
2086 effect. */
2088 static bool
2089 riscv_classify_address (struct riscv_address_info *info, rtx x,
2090 machine_mode mode, bool strict_p)
2092 if (th_classify_address (info, x, mode, strict_p))
2093 return true;
2095 switch (GET_CODE (x))
2097 case REG:
2098 case SUBREG:
2099 info->type = ADDRESS_REG;
2100 info->reg = x;
2101 info->offset = const0_rtx;
2102 return riscv_valid_base_register_p (info->reg, mode, strict_p);
2104 case PLUS:
2105 /* RVV load/store disallow any offset. */
2106 if (riscv_v_ext_mode_p (mode))
2107 return false;
2109 info->type = ADDRESS_REG;
2110 info->reg = XEXP (x, 0);
2111 info->offset = XEXP (x, 1);
2112 return (riscv_valid_base_register_p (info->reg, mode, strict_p)
2113 && riscv_valid_offset_p (info->offset, mode));
2115 case LO_SUM:
2116 /* RVV load/store disallow LO_SUM. */
2117 if (riscv_v_ext_mode_p (mode))
2118 return false;
2120 info->type = ADDRESS_LO_SUM;
2121 info->reg = XEXP (x, 0);
2122 info->offset = XEXP (x, 1);
2123 /* We have to trust the creator of the LO_SUM to do something vaguely
2124 sane. Target-independent code that creates a LO_SUM should also
2125 create and verify the matching HIGH. Target-independent code that
2126 adds an offset to a LO_SUM must prove that the offset will not
2127 induce a carry. Failure to do either of these things would be
2128 a bug, and we are not required to check for it here. The RISC-V
2129 backend itself should only create LO_SUMs for valid symbolic
2130 constants, with the high part being either a HIGH or a copy
2131 of _gp. */
2132 info->symbol_type
2133 = riscv_classify_symbolic_expression (info->offset);
2134 return (riscv_valid_base_register_p (info->reg, mode, strict_p)
2135 && riscv_valid_lo_sum_p (info->symbol_type, mode, info->offset));
2137 case CONST_INT:
2138 /* We only allow the const0_rtx for the RVV load/store. For example:
2139 +----------------------------------------------------------+
2140 | li a5,0 |
2141 | vsetvli zero,a1,e32,m1,ta,ma |
2142 | vle32.v v24,0(a5) <- propagate the const 0 to a5 here. |
2143 | vs1r.v v24,0(a0) |
2144 +----------------------------------------------------------+
2145 It can be folded to:
2146 +----------------------------------------------------------+
2147 | vsetvli zero,a1,e32,m1,ta,ma |
2148 | vle32.v v24,0(zero) |
2149 | vs1r.v v24,0(a0) |
2150 +----------------------------------------------------------+
2151 This behavior will benefit the underlying RVV auto vectorization. */
2152 if (riscv_v_ext_mode_p (mode))
2153 return x == const0_rtx;
2155 /* Small-integer addresses don't occur very often, but they
2156 are legitimate if x0 is a valid base register. */
2157 info->type = ADDRESS_CONST_INT;
2158 return SMALL_OPERAND (INTVAL (x));
2160 default:
2161 return false;
2165 /* Implement TARGET_LEGITIMATE_ADDRESS_P. */
2167 static bool
2168 riscv_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
2169 code_helper = ERROR_MARK)
2171 /* Disallow RVV modes base address.
2172 E.g. (mem:SI (subreg:DI (reg:V1DI 155) 0). */
2173 if (SUBREG_P (x) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (x))))
2174 return false;
2175 struct riscv_address_info addr;
2177 return riscv_classify_address (&addr, x, mode, strict_p);
2180 /* Return true if hard reg REGNO can be used in compressed instructions. */
2182 static bool
2183 riscv_compressed_reg_p (int regno)
2185 /* x8-x15/f8-f15 are compressible registers. */
2186 return ((TARGET_RVC || TARGET_ZCA)
2187 && (IN_RANGE (regno, GP_REG_FIRST + 8, GP_REG_FIRST + 15)
2188 || IN_RANGE (regno, FP_REG_FIRST + 8, FP_REG_FIRST + 15)));
2191 /* Return true if x is an unsigned 5-bit immediate scaled by 4. */
2193 static bool
2194 riscv_compressed_lw_offset_p (rtx x)
2196 return (CONST_INT_P (x)
2197 && (INTVAL (x) & 3) == 0
2198 && IN_RANGE (INTVAL (x), 0, CSW_MAX_OFFSET));
2201 /* Return true if load/store from/to address x can be compressed. */
2203 static bool
2204 riscv_compressed_lw_address_p (rtx x)
2206 struct riscv_address_info addr;
2207 bool result = riscv_classify_address (&addr, x, GET_MODE (x),
2208 reload_completed);
2210 /* Return false if address is not compressed_reg + small_offset. */
2211 if (!result
2212 || addr.type != ADDRESS_REG
2213 /* Before reload, assume all registers are OK. */
2214 || (reload_completed
2215 && !riscv_compressed_reg_p (REGNO (addr.reg))
2216 && addr.reg != stack_pointer_rtx)
2217 || !riscv_compressed_lw_offset_p (addr.offset))
2218 return false;
2220 return result;
2223 /* Return the number of instructions needed to load or store a value
2224 of mode MODE at address X. Return 0 if X isn't valid for MODE.
2225 Assume that multiword moves may need to be split into word moves
2226 if MIGHT_SPLIT_P, otherwise assume that a single load or store is
2227 enough. */
2230 riscv_address_insns (rtx x, machine_mode mode, bool might_split_p)
2232 struct riscv_address_info addr = {};
2233 int n = 1;
2235 if (!riscv_classify_address (&addr, x, mode, false))
2237 /* This could be a pattern from the pic.md file. In which case we want
2238 this address to always have a cost of 3 to make it as expensive as the
2239 most expensive symbol. This prevents constant propagation from
2240 preferring symbols over register plus offset. */
2241 return 3;
2244 /* BLKmode is used for single unaligned loads and stores and should
2245 not count as a multiword mode. */
2246 if (!riscv_v_ext_vector_mode_p (mode) && mode != BLKmode && might_split_p)
2247 n += (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2249 if (addr.type == ADDRESS_LO_SUM)
2250 n += riscv_symbol_insns (addr.symbol_type) - 1;
2252 return n;
2255 /* Return the number of instructions needed to load constant X.
2256 Return 0 if X isn't a valid constant.
2258 ALLOW_NEW_PSEUDOS controls whether or not we're going to be allowed
2259 to create new pseduos. It must be FALSE for any call directly or
2260 indirectly from a pattern's condition. */
2263 riscv_const_insns (rtx x, bool allow_new_pseudos)
2265 enum riscv_symbol_type symbol_type;
2266 rtx offset;
2268 switch (GET_CODE (x))
2270 case HIGH:
2271 if (!riscv_symbolic_constant_p (XEXP (x, 0), &symbol_type)
2272 || !riscv_split_symbol_type (symbol_type))
2273 return 0;
2275 /* This is simply an LUI. */
2276 return 1;
2278 case CONST_INT:
2280 int cost = riscv_integer_cost (INTVAL (x), allow_new_pseudos);
2281 /* Force complicated constants to memory. */
2282 return cost < 4 ? cost : 0;
2285 case CONST_DOUBLE:
2286 /* See if we can use FMV directly. */
2287 if (satisfies_constraint_zfli (x))
2288 return 1;
2290 /* We can use x0 to load floating-point zero. */
2291 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
2292 case CONST_VECTOR:
2294 /* TODO: This is not accurate, we will need to
2295 adapt the COST of CONST_VECTOR in the future
2296 for the following cases:
2298 - 1. const duplicate vector with element value
2299 in range of [-16, 15].
2300 - 2. const duplicate vector with element value
2301 out range of [-16, 15].
2302 - 3. const series vector.
2303 ...etc. */
2304 if (riscv_v_ext_mode_p (GET_MODE (x)))
2306 rtx elt;
2307 if (const_vec_duplicate_p (x, &elt))
2309 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_BOOL)
2310 /* Duplicate values of 0/1 can be emitted using vmv.v.i. */
2311 return 1;
2313 /* We don't allow CONST_VECTOR for DI vector on RV32
2314 system since the ELT constant value can not held
2315 within a single register to disable reload a DI
2316 register vec_duplicate into vmv.v.x. */
2317 scalar_mode smode = GET_MODE_INNER (GET_MODE (x));
2318 if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD)
2319 && !immediate_operand (elt, Pmode))
2320 return 0;
2321 /* Constants in range -16 ~ 15 integer or 0.0 floating-point
2322 can be emitted using vmv.v.i. */
2323 if (valid_vec_immediate_p (x))
2324 return 1;
2326 /* Any int/FP constants can always be broadcast from a
2327 scalar register. Loading of a floating-point
2328 constant incurs a literal-pool access. Allow this in
2329 order to increase vectorization possibilities. */
2330 int n = riscv_const_insns (elt, allow_new_pseudos);
2331 if (CONST_DOUBLE_P (elt))
2332 return 1 + 4; /* vfmv.v.f + memory access. */
2333 else
2335 /* We need as many insns as it takes to load the constant
2336 into a GPR and one vmv.v.x. */
2337 if (n != 0)
2338 return 1 + n;
2339 else
2340 return 1 + 4; /*vmv.v.x + memory access. */
2344 /* const series vector. */
2345 rtx base, step;
2346 if (const_vec_series_p (x, &base, &step))
2348 /* This cost is not accurate, we will need to adapt the COST
2349 accurately according to BASE && STEP. */
2350 return 1;
2353 if (CONST_VECTOR_STEPPED_P (x))
2355 /* Some cases are unhandled so we need construct a builder to
2356 detect/allow those cases to be handled by the fallthrough
2357 handler. */
2358 unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
2359 unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
2360 rvv_builder builder (GET_MODE(x), npatterns, nelts_per_pattern);
2361 for (unsigned int i = 0; i < nelts_per_pattern; i++)
2363 for (unsigned int j = 0; j < npatterns; j++)
2364 builder.quick_push (CONST_VECTOR_ELT (x, i * npatterns + j));
2366 builder.finalize ();
2368 if (builder.single_step_npatterns_p ())
2370 if (builder.npatterns_all_equal_p ())
2372 /* TODO: This cost is not accurate. */
2373 return 1;
2375 else
2377 /* TODO: This cost is not accurate. */
2378 return 1;
2381 else if (builder.interleaved_stepped_npatterns_p ())
2383 /* TODO: This cost is not accurate. */
2384 return 1;
2387 /* Fallthrough. */
2391 /* TODO: We may support more const vector in the future. */
2392 return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
2395 case CONST:
2396 /* See if we can refer to X directly. */
2397 if (riscv_symbolic_constant_p (x, &symbol_type))
2398 return riscv_symbol_insns (symbol_type);
2400 /* Otherwise try splitting the constant into a base and offset. */
2401 split_const (x, &x, &offset);
2402 if (offset != 0)
2404 int n = riscv_const_insns (x, allow_new_pseudos);
2405 if (n != 0)
2406 return n + riscv_integer_cost (INTVAL (offset), allow_new_pseudos);
2408 return 0;
2410 case SYMBOL_REF:
2411 case LABEL_REF:
2412 return riscv_symbol_insns (riscv_classify_symbol (x));
2414 /* TODO: In RVV, we get CONST_POLY_INT by using csrr VLENB
2415 instruction and several scalar shift or mult instructions,
2416 it is so far unknown. We set it to 4 temporarily. */
2417 case CONST_POLY_INT:
2418 return 4;
2420 default:
2421 return 0;
2425 /* X is a doubleword constant that can be handled by splitting it into
2426 two words and loading each word separately. Return the number of
2427 instructions required to do this. */
2430 riscv_split_const_insns (rtx x)
2432 unsigned int low, high;
2434 /* This is not called from pattern conditions, so we can let
2435 our location in the RTL pipeline control whether or not
2436 new pseudos are created. */
2437 bool allow_new_pseudos = can_create_pseudo_p ();
2438 low = riscv_const_insns (riscv_subword (x, false), allow_new_pseudos);
2439 high = riscv_const_insns (riscv_subword (x, true), allow_new_pseudos);
2440 gcc_assert (low > 0 && high > 0);
2441 return low + high;
2444 /* Return the number of instructions needed to implement INSN,
2445 given that it loads from or stores to MEM. */
2448 riscv_load_store_insns (rtx mem, rtx_insn *insn)
2450 machine_mode mode;
2451 bool might_split_p;
2452 rtx set;
2454 gcc_assert (MEM_P (mem));
2455 mode = GET_MODE (mem);
2457 /* Try to prove that INSN does not need to be split. */
2458 might_split_p = true;
2459 if (GET_MODE_BITSIZE (mode).to_constant () <= 32)
2460 might_split_p = false;
2461 else if (GET_MODE_BITSIZE (mode).to_constant () == 64)
2463 set = single_set (insn);
2464 if (set && !riscv_split_64bit_move_p (SET_DEST (set), SET_SRC (set)))
2465 might_split_p = false;
2468 return riscv_address_insns (XEXP (mem, 0), mode, might_split_p);
2471 /* Emit a move from SRC to DEST. Assume that the move expanders can
2472 handle all moves if !can_create_pseudo_p (). The distinction is
2473 important because, unlike emit_move_insn, the move expanders know
2474 how to force Pmode objects into the constant pool even when the
2475 constant pool address is not itself legitimate. */
2478 riscv_emit_move (rtx dest, rtx src)
2480 return (can_create_pseudo_p ()
2481 ? emit_move_insn (dest, src)
2482 : emit_move_insn_1 (dest, src));
2485 /* Emit an instruction of the form (set TARGET SRC). */
2487 static rtx
2488 riscv_emit_set (rtx target, rtx src)
2490 emit_insn (gen_rtx_SET (target, src));
2491 return target;
2494 /* Emit an instruction of the form (set DEST (CODE X)). */
2497 riscv_emit_unary (enum rtx_code code, rtx dest, rtx x)
2499 return riscv_emit_set (dest, gen_rtx_fmt_e (code, GET_MODE (dest), x));
2502 /* Emit an instruction of the form (set DEST (CODE X Y)). */
2505 riscv_emit_binary (enum rtx_code code, rtx dest, rtx x, rtx y)
2507 return riscv_emit_set (dest, gen_rtx_fmt_ee (code, GET_MODE (dest), x, y));
2510 /* Compute (CODE X Y) and store the result in a new register
2511 of mode MODE. Return that new register. */
2513 static rtx
2514 riscv_force_binary (machine_mode mode, enum rtx_code code, rtx x, rtx y)
2516 return riscv_emit_binary (code, gen_reg_rtx (mode), x, y);
2519 static rtx
2520 riscv_swap_instruction (rtx inst)
2522 gcc_assert (GET_MODE (inst) == SImode);
2523 if (BYTES_BIG_ENDIAN)
2524 inst = expand_unop (SImode, bswap_optab, inst, gen_reg_rtx (SImode), 1);
2525 return inst;
2528 /* Copy VALUE to a register and return that register. If new pseudos
2529 are allowed, copy it into a new register, otherwise use DEST. */
2531 static rtx
2532 riscv_force_temporary (rtx dest, rtx value)
2534 if (can_create_pseudo_p ())
2535 return force_reg (Pmode, value);
2536 else
2538 riscv_emit_move (dest, value);
2539 return dest;
2543 /* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE,
2544 then add CONST_INT OFFSET to the result. */
2546 static rtx
2547 riscv_unspec_address_offset (rtx base, rtx offset,
2548 enum riscv_symbol_type symbol_type)
2550 base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
2551 UNSPEC_ADDRESS_FIRST + symbol_type);
2552 if (offset != const0_rtx)
2553 base = gen_rtx_PLUS (Pmode, base, offset);
2554 return gen_rtx_CONST (Pmode, base);
2557 /* Return an UNSPEC address with underlying address ADDRESS and symbol
2558 type SYMBOL_TYPE. */
2561 riscv_unspec_address (rtx address, enum riscv_symbol_type symbol_type)
2563 rtx base, offset;
2565 split_const (address, &base, &offset);
2566 return riscv_unspec_address_offset (base, offset, symbol_type);
2569 /* If OP is an UNSPEC address, return the address to which it refers,
2570 otherwise return OP itself. */
2572 static rtx
2573 riscv_strip_unspec_address (rtx op)
2575 rtx base, offset;
2577 split_const (op, &base, &offset);
2578 if (UNSPEC_ADDRESS_P (base))
2579 op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset));
2580 return op;
2583 /* If riscv_unspec_address (ADDR, SYMBOL_TYPE) is a 32-bit value, add the
2584 high part to BASE and return the result. Just return BASE otherwise.
2585 TEMP is as for riscv_force_temporary.
2587 The returned expression can be used as the first operand to a LO_SUM. */
2589 static rtx
2590 riscv_unspec_offset_high (rtx temp, rtx addr, enum riscv_symbol_type symbol_type)
2592 addr = gen_rtx_HIGH (Pmode, riscv_unspec_address (addr, symbol_type));
2593 return riscv_force_temporary (temp, addr);
2596 /* Load an entry from the GOT for a TLS GD access. */
2598 static rtx riscv_got_load_tls_gd (rtx dest, rtx sym)
2600 if (Pmode == DImode)
2601 return gen_got_load_tls_gddi (dest, sym);
2602 else
2603 return gen_got_load_tls_gdsi (dest, sym);
2606 /* Load an entry from the GOT for a TLS IE access. */
2608 static rtx riscv_got_load_tls_ie (rtx dest, rtx sym)
2610 if (Pmode == DImode)
2611 return gen_got_load_tls_iedi (dest, sym);
2612 else
2613 return gen_got_load_tls_iesi (dest, sym);
2616 /* Add in the thread pointer for a TLS LE access. */
2618 static rtx riscv_tls_add_tp_le (rtx dest, rtx base, rtx sym)
2620 rtx tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2621 if (Pmode == DImode)
2622 return gen_tls_add_tp_ledi (dest, base, tp, sym);
2623 else
2624 return gen_tls_add_tp_lesi (dest, base, tp, sym);
2627 /* If MODE is MAX_MACHINE_MODE, ADDR appears as a move operand, otherwise
2628 it appears in a MEM of that mode. Return true if ADDR is a legitimate
2629 constant in that context and can be split into high and low parts.
2630 If so, and if LOW_OUT is nonnull, emit the high part and store the
2631 low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise.
2633 TEMP is as for riscv_force_temporary and is used to load the high
2634 part into a register.
2636 When MODE is MAX_MACHINE_MODE, the low part is guaranteed to be
2637 a legitimize SET_SRC for an .md pattern, otherwise the low part
2638 is guaranteed to be a legitimate address for mode MODE. */
2640 bool
2641 riscv_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
2643 enum riscv_symbol_type symbol_type;
2645 if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE)
2646 || !riscv_symbolic_constant_p (addr, &symbol_type)
2647 || riscv_symbol_insns (symbol_type) == 0
2648 || !riscv_split_symbol_type (symbol_type))
2649 return false;
2651 if (low_out)
2652 switch (symbol_type)
2654 case SYMBOL_FORCE_TO_MEM:
2655 return false;
2657 case SYMBOL_ABSOLUTE:
2659 rtx high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
2660 high = riscv_force_temporary (temp, high);
2661 *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
2663 break;
2665 case SYMBOL_PCREL:
2667 static unsigned seqno;
2668 char buf[32];
2669 rtx label;
2671 ssize_t bytes = snprintf (buf, sizeof (buf), ".LA%u", seqno);
2672 gcc_assert ((size_t) bytes < sizeof (buf));
2674 label = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
2675 SYMBOL_REF_FLAGS (label) |= SYMBOL_FLAG_LOCAL;
2676 /* ??? Ugly hack to make weak symbols work. May need to change the
2677 RTL for the auipc and/or low patterns to get a better fix for
2678 this. */
2679 if (! nonzero_address_p (addr))
2680 SYMBOL_REF_WEAK (label) = 1;
2682 if (temp == NULL)
2683 temp = gen_reg_rtx (Pmode);
2685 if (Pmode == DImode)
2686 emit_insn (gen_auipcdi (temp, copy_rtx (addr), GEN_INT (seqno)));
2687 else
2688 emit_insn (gen_auipcsi (temp, copy_rtx (addr), GEN_INT (seqno)));
2690 *low_out = gen_rtx_LO_SUM (Pmode, temp, label);
2692 seqno++;
2694 break;
2696 default:
2697 gcc_unreachable ();
2700 return true;
2703 /* Return a legitimate address for REG + OFFSET. TEMP is as for
2704 riscv_force_temporary; it is only needed when OFFSET is not a
2705 SMALL_OPERAND. */
2707 static rtx
2708 riscv_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
2710 if (!SMALL_OPERAND (offset))
2712 rtx high;
2714 /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
2715 The addition inside the macro CONST_HIGH_PART may cause an
2716 overflow, so we need to force a sign-extension check. */
2717 high = gen_int_mode (CONST_HIGH_PART (offset), Pmode);
2718 offset = CONST_LOW_PART (offset);
2719 high = riscv_force_temporary (temp, high);
2720 reg = riscv_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
2722 return plus_constant (Pmode, reg, offset);
2725 /* The __tls_get_attr symbol. */
2726 static GTY(()) rtx riscv_tls_symbol;
2728 /* Return an instruction sequence that calls __tls_get_addr. SYM is
2729 the TLS symbol we are referencing and TYPE is the symbol type to use
2730 (either global dynamic or local dynamic). RESULT is an RTX for the
2731 return value location. */
2733 static rtx_insn *
2734 riscv_call_tls_get_addr (rtx sym, rtx result)
2736 rtx a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST), func;
2737 rtx_insn *insn;
2739 if (!riscv_tls_symbol)
2740 riscv_tls_symbol = init_one_libfunc ("__tls_get_addr");
2741 func = gen_rtx_MEM (FUNCTION_MODE, riscv_tls_symbol);
2743 start_sequence ();
2745 emit_insn (riscv_got_load_tls_gd (a0, sym));
2746 insn = emit_call_insn (gen_call_value (result, func, const0_rtx,
2747 gen_int_mode (RISCV_CC_BASE, SImode)));
2748 RTL_CONST_CALL_P (insn) = 1;
2749 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
2750 insn = get_insns ();
2752 end_sequence ();
2754 return insn;
2757 /* Generate the code to access LOC, a thread-local SYMBOL_REF, and return
2758 its address. The return value will be both a valid address and a valid
2759 SET_SRC (either a REG or a LO_SUM). */
2761 static rtx
2762 riscv_legitimize_tls_address (rtx loc)
2764 rtx dest, tp, tmp, a0;
2765 enum tls_model model = SYMBOL_REF_TLS_MODEL (loc);
2767 #if 0
2768 /* TLS copy relocs are now deprecated and should not be used. */
2769 /* Since we support TLS copy relocs, non-PIC TLS accesses may all use LE. */
2770 if (!flag_pic)
2771 model = TLS_MODEL_LOCAL_EXEC;
2772 #endif
2774 switch (model)
2776 case TLS_MODEL_LOCAL_DYNAMIC:
2777 /* Rely on section anchors for the optimization that LDM TLS
2778 provides. The anchor's address is loaded with GD TLS. */
2779 case TLS_MODEL_GLOBAL_DYNAMIC:
2780 if (TARGET_TLSDESC)
2782 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2783 a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST);
2784 dest = gen_reg_rtx (Pmode);
2786 emit_insn (gen_tlsdesc (Pmode, loc));
2787 emit_insn (gen_add3_insn (dest, a0, tp));
2789 else
2791 tmp = gen_rtx_REG (Pmode, GP_RETURN);
2792 dest = gen_reg_rtx (Pmode);
2793 emit_libcall_block (riscv_call_tls_get_addr (loc, tmp), dest, tmp,
2794 loc);
2796 break;
2798 case TLS_MODEL_INITIAL_EXEC:
2799 /* la.tls.ie; tp-relative add */
2800 tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
2801 tmp = gen_reg_rtx (Pmode);
2802 emit_insn (riscv_got_load_tls_ie (tmp, loc));
2803 dest = gen_reg_rtx (Pmode);
2804 emit_insn (gen_add3_insn (dest, tmp, tp));
2805 break;
2807 case TLS_MODEL_LOCAL_EXEC:
2808 tmp = riscv_unspec_offset_high (NULL, loc, SYMBOL_TLS_LE);
2809 dest = gen_reg_rtx (Pmode);
2810 emit_insn (riscv_tls_add_tp_le (dest, tmp, loc));
2811 dest = gen_rtx_LO_SUM (Pmode, dest,
2812 riscv_unspec_address (loc, SYMBOL_TLS_LE));
2813 break;
2815 default:
2816 gcc_unreachable ();
2818 return dest;
2821 /* If X is not a valid address for mode MODE, force it into a register. */
2823 static rtx
2824 riscv_force_address (rtx x, machine_mode mode)
2826 if (!riscv_legitimate_address_p (mode, x, false))
2828 if (can_create_pseudo_p ())
2829 return force_reg (Pmode, x);
2830 else
2832 /* It's only safe for the thunk function.
2833 Use ra as the temp register. */
2834 gcc_assert (riscv_in_thunk_func);
2835 rtx reg = RISCV_PROLOGUE_TEMP2 (Pmode);
2836 riscv_emit_move (reg, x);
2837 return reg;
2841 return x;
2844 /* Modify base + offset so that offset fits within a compressed load/store insn
2845 and the excess is added to base. */
2847 static rtx
2848 riscv_shorten_lw_offset (rtx base, HOST_WIDE_INT offset)
2850 rtx addr, high;
2851 /* Leave OFFSET as an unsigned 5-bit offset scaled by 4 and put the excess
2852 into HIGH. */
2853 high = GEN_INT (offset & ~CSW_MAX_OFFSET);
2854 offset &= CSW_MAX_OFFSET;
2855 if (!SMALL_OPERAND (INTVAL (high)))
2856 high = force_reg (Pmode, high);
2857 base = force_reg (Pmode, gen_rtx_PLUS (Pmode, high, base));
2858 addr = plus_constant (Pmode, base, offset);
2859 return addr;
2862 /* Helper for riscv_legitimize_address. Given X, return true if it
2863 is a left shift by 1, 2 or 3 positions or a multiply by 2, 4 or 8.
2865 This respectively represent canonical shift-add rtxs or scaled
2866 memory addresses. */
2867 static bool
2868 mem_shadd_or_shadd_rtx_p (rtx x)
2870 return ((GET_CODE (x) == ASHIFT
2871 || GET_CODE (x) == MULT)
2872 && register_operand (XEXP (x, 0), GET_MODE (x))
2873 && CONST_INT_P (XEXP (x, 1))
2874 && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3))
2875 || (GET_CODE (x) == MULT
2876 && IN_RANGE (exact_log2 (INTVAL (XEXP (x, 1))), 1, 3))));
2879 /* This function is used to implement LEGITIMIZE_ADDRESS. If X can
2880 be legitimized in a way that the generic machinery might not expect,
2881 return a new address, otherwise return NULL. MODE is the mode of
2882 the memory being accessed. */
2884 static rtx
2885 riscv_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2886 machine_mode mode)
2888 rtx addr;
2890 if (riscv_tls_symbol_p (x))
2891 return riscv_legitimize_tls_address (x);
2893 /* See if the address can split into a high part and a LO_SUM. */
2894 if (riscv_split_symbol (NULL, x, mode, &addr))
2895 return riscv_force_address (addr, mode);
2897 /* Handle BASE + OFFSET. */
2898 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
2899 && INTVAL (XEXP (x, 1)) != 0)
2901 rtx base = XEXP (x, 0);
2902 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
2904 /* Handle (plus (plus (mult (a) (mem_shadd_constant)) (fp)) (C)) case. */
2905 if (GET_CODE (base) == PLUS && mem_shadd_or_shadd_rtx_p (XEXP (base, 0))
2906 && SMALL_OPERAND (offset))
2908 rtx index = XEXP (base, 0);
2909 rtx fp = XEXP (base, 1);
2910 if (REG_P (fp) && REGNO (fp) == VIRTUAL_STACK_VARS_REGNUM)
2913 /* If we were given a MULT, we must fix the constant
2914 as we're going to create the ASHIFT form. */
2915 int shift_val = INTVAL (XEXP (index, 1));
2916 if (GET_CODE (index) == MULT)
2917 shift_val = exact_log2 (shift_val);
2919 rtx reg1 = gen_reg_rtx (Pmode);
2920 rtx reg2 = gen_reg_rtx (Pmode);
2921 rtx reg3 = gen_reg_rtx (Pmode);
2922 riscv_emit_binary (PLUS, reg1, fp, GEN_INT (offset));
2923 riscv_emit_binary (ASHIFT, reg2, XEXP (index, 0), GEN_INT (shift_val));
2924 riscv_emit_binary (PLUS, reg3, reg2, reg1);
2926 return reg3;
2930 if (!riscv_valid_base_register_p (base, mode, false))
2931 base = copy_to_mode_reg (Pmode, base);
2932 if (optimize_function_for_size_p (cfun)
2933 && (strcmp (current_pass->name, "shorten_memrefs") == 0)
2934 && mode == SImode)
2935 /* Convert BASE + LARGE_OFFSET into NEW_BASE + SMALL_OFFSET to allow
2936 possible compressed load/store. */
2937 addr = riscv_shorten_lw_offset (base, offset);
2938 else
2939 addr = riscv_add_offset (NULL, base, offset);
2940 return riscv_force_address (addr, mode);
2943 return x;
2946 /* Load VALUE into DEST. TEMP is as for riscv_force_temporary. ORIG_MODE
2947 is the original src mode before promotion. */
2949 void
2950 riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value,
2951 machine_mode orig_mode)
2953 struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
2954 machine_mode mode;
2955 int i, num_ops;
2956 rtx x = NULL_RTX;
2958 mode = GET_MODE (dest);
2959 /* We use the original mode for the riscv_build_integer call, because HImode
2960 values are given special treatment. */
2961 num_ops = riscv_build_integer (codes, value, orig_mode, can_create_pseudo_p ());
2963 if (can_create_pseudo_p () && num_ops > 2 /* not a simple constant */
2964 && num_ops >= riscv_split_integer_cost (value))
2965 x = riscv_split_integer (value, mode);
2966 else
2968 rtx old_value = NULL_RTX;
2969 for (i = 0; i < num_ops; i++)
2971 if (i != 0 && !can_create_pseudo_p ())
2972 x = riscv_emit_set (temp, x);
2973 else if (i != 0)
2974 x = force_reg (mode, x);
2975 codes[i].value = trunc_int_for_mode (codes[i].value, mode);
2976 if (codes[i].code == UNKNOWN)
2978 /* UNKNOWN means load the constant value into X. */
2979 x = GEN_INT (codes[i].value);
2981 else if (codes[i].use_uw)
2983 /* If the sequence requires using a "uw" form of an insn, we're
2984 going to have to construct the RTL ourselves and put it in
2985 a register to avoid force_reg/force_operand from mucking
2986 things up. */
2987 gcc_assert (TARGET_64BIT || TARGET_ZBA);
2988 rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
2990 /* Create the proper mask for the slli.uw instruction. */
2991 unsigned HOST_WIDE_INT value = 0xffffffff;
2992 value <<= codes[i].value;
2994 /* Right now the only "uw" form we use is slli, we may add more
2995 in the future. */
2996 x = gen_rtx_fmt_ee (codes[i].code, mode,
2997 x, GEN_INT (codes[i].value));
2998 x = gen_rtx_fmt_ee (AND, mode, x, GEN_INT (value));
2999 x = riscv_emit_set (t, x);
3001 else if (codes[i].code == FMA)
3003 HOST_WIDE_INT value = exact_log2 (codes[i].value - 1);
3004 rtx ashift = gen_rtx_fmt_ee (ASHIFT, mode, x, GEN_INT (value));
3005 x = gen_rtx_fmt_ee (PLUS, mode, ashift, x);
3006 rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
3007 x = riscv_emit_set (t, x);
3009 else if (codes[i].code == CONCAT || codes[i].code == VEC_MERGE)
3011 if (codes[i].code == CONCAT && !TARGET_ZBKB)
3013 /* The two values should have no bits in common, so we can
3014 use PLUS instead of IOR which has a higher chance of
3015 using a compressed instruction. */
3016 x = gen_rtx_PLUS (mode, x, old_value);
3018 else
3020 rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
3021 rtx t2 = codes[i].code == VEC_MERGE ? old_value : x;
3022 gcc_assert (t2);
3023 t2 = gen_lowpart (SImode, t2);
3024 emit_insn (gen_riscv_xpack_di_si_2 (t, x, GEN_INT (32), t2));
3025 x = t;
3028 else
3029 x = gen_rtx_fmt_ee (codes[i].code, mode,
3030 x, GEN_INT (codes[i].value));
3032 /* If this entry in the code table indicates we should save away
3033 the temporary holding the current value of X, then do so. */
3034 if (codes[i].save_temporary)
3036 gcc_assert (old_value == NULL_RTX);
3037 x = force_reg (mode, x);
3038 old_value = x;
3043 riscv_emit_set (dest, x);
3046 /* Subroutine of riscv_legitimize_move. Move constant SRC into register
3047 DEST given that SRC satisfies immediate_operand but doesn't satisfy
3048 move_operand. */
3050 static void
3051 riscv_legitimize_const_move (machine_mode mode, rtx dest, rtx src)
3053 rtx base, offset;
3055 /* Split moves of big integers into smaller pieces. */
3056 if (splittable_const_int_operand (src, mode))
3058 riscv_move_integer (dest, dest, INTVAL (src), mode);
3059 return;
3062 if (satisfies_constraint_zfli (src))
3064 riscv_emit_set (dest, src);
3065 return;
3068 /* Split moves of symbolic constants into high/low pairs. */
3069 if (riscv_split_symbol (dest, src, MAX_MACHINE_MODE, &src))
3071 riscv_emit_set (dest, src);
3072 return;
3075 /* Generate the appropriate access sequences for TLS symbols. */
3076 if (riscv_tls_symbol_p (src))
3078 riscv_emit_move (dest, riscv_legitimize_tls_address (src));
3079 return;
3082 /* If we have (const (plus symbol offset)), and that expression cannot
3083 be forced into memory, load the symbol first and add in the offset. Also
3084 prefer to do this even if the constant _can_ be forced into memory, as it
3085 usually produces better code. */
3086 split_const (src, &base, &offset);
3087 if (offset != const0_rtx
3088 && (targetm.cannot_force_const_mem (mode, src) || can_create_pseudo_p ()))
3090 base = riscv_force_temporary (dest, base);
3091 riscv_emit_move (dest, riscv_add_offset (NULL, base, INTVAL (offset)));
3092 return;
3095 /* Handle below format.
3096 (const:DI
3097 (plus:DI
3098 (symbol_ref:DI ("ic") [flags 0x2] <var_decl 0x7fe57740be10 ic>) <- op_0
3099 (const_poly_int:DI [16, 16]) // <- op_1
3102 if (GET_CODE (src) == CONST && GET_CODE (XEXP (src, 0)) == PLUS
3103 && CONST_POLY_INT_P (XEXP (XEXP (src, 0), 1)))
3105 rtx dest_tmp = gen_reg_rtx (mode);
3106 rtx tmp = gen_reg_rtx (mode);
3108 riscv_emit_move (dest, XEXP (XEXP (src, 0), 0));
3109 riscv_legitimize_poly_move (mode, dest_tmp, tmp, XEXP (XEXP (src, 0), 1));
3111 emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, dest, dest_tmp)));
3112 return;
3115 src = force_const_mem (mode, src);
3117 /* When using explicit relocs, constant pool references are sometimes
3118 not legitimate addresses. */
3119 riscv_split_symbol (dest, XEXP (src, 0), mode, &XEXP (src, 0));
3120 riscv_emit_move (dest, src);
3123 /* Report when we try to do something that requires vector when vector is
3124 disabled. This is an error of last resort and isn't very high-quality. It
3125 usually involves attempts to measure the vector length in some way. */
3127 static void
3128 riscv_report_v_required (void)
3130 static bool reported_p = false;
3132 /* Avoid reporting a slew of messages for a single oversight. */
3133 if (reported_p)
3134 return;
3136 error ("this operation requires the RVV ISA extension");
3137 inform (input_location, "you can enable RVV using the command-line"
3138 " option %<-march%>, or by using the %<target%>"
3139 " attribute or pragma");
3140 reported_p = true;
3143 /* Helper function to operation for rtx_code CODE. */
3144 static void
3145 riscv_expand_op (enum rtx_code code, machine_mode mode, rtx op0, rtx op1,
3146 rtx op2)
3148 if (can_create_pseudo_p ())
3150 rtx result;
3151 if (GET_RTX_CLASS (code) == RTX_UNARY)
3152 result = expand_simple_unop (mode, code, op1, NULL_RTX, false);
3153 else
3154 result = expand_simple_binop (mode, code, op1, op2, NULL_RTX, false,
3155 OPTAB_DIRECT);
3156 riscv_emit_move (op0, result);
3158 else
3160 rtx pat;
3161 /* The following implementation is for prologue and epilogue.
3162 Because prologue and epilogue can not use pseudo register.
3163 We can't using expand_simple_binop or expand_simple_unop. */
3164 if (GET_RTX_CLASS (code) == RTX_UNARY)
3165 pat = gen_rtx_fmt_e (code, mode, op1);
3166 else
3167 pat = gen_rtx_fmt_ee (code, mode, op1, op2);
3168 emit_insn (gen_rtx_SET (op0, pat));
3172 /* Expand mult operation with constant integer, multiplicand also used as a
3173 * temporary register. */
3175 static void
3176 riscv_expand_mult_with_const_int (machine_mode mode, rtx dest, rtx multiplicand,
3177 HOST_WIDE_INT multiplier)
3179 if (multiplier == 0)
3181 riscv_emit_move (dest, GEN_INT (0));
3182 return;
3185 bool neg_p = multiplier < 0;
3186 unsigned HOST_WIDE_INT multiplier_abs = abs (multiplier);
3188 if (multiplier_abs == 1)
3190 if (neg_p)
3191 riscv_expand_op (NEG, mode, dest, multiplicand, NULL_RTX);
3192 else
3193 riscv_emit_move (dest, multiplicand);
3195 else
3197 if (pow2p_hwi (multiplier_abs))
3200 multiplicand = [BYTES_PER_RISCV_VECTOR].
3201 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 8].
3202 Sequence:
3203 csrr a5, vlenb
3204 slli a5, a5, 3
3205 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 8].
3206 Sequence:
3207 csrr a5, vlenb
3208 slli a5, a5, 3
3209 neg a5, a5
3211 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
3212 gen_int_mode (exact_log2 (multiplier_abs), QImode));
3213 if (neg_p)
3214 riscv_expand_op (NEG, mode, dest, dest, NULL_RTX);
3216 else if (pow2p_hwi (multiplier_abs + 1))
3219 multiplicand = [BYTES_PER_RISCV_VECTOR].
3220 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 7].
3221 Sequence:
3222 csrr a5, vlenb
3223 slli a4, a5, 3
3224 sub a5, a4, a5
3225 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 7].
3226 Sequence:
3227 csrr a5, vlenb
3228 slli a4, a5, 3
3229 sub a5, a4, a5 + neg a5, a5 => sub a5, a5, a4
3231 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
3232 gen_int_mode (exact_log2 (multiplier_abs + 1),
3233 QImode));
3234 if (neg_p)
3235 riscv_expand_op (MINUS, mode, dest, multiplicand, dest);
3236 else
3237 riscv_expand_op (MINUS, mode, dest, dest, multiplicand);
3239 else if (pow2p_hwi (multiplier - 1))
3242 multiplicand = [BYTES_PER_RISCV_VECTOR].
3243 1. const_poly_int:P [BYTES_PER_RISCV_VECTOR * 9].
3244 Sequence:
3245 csrr a5, vlenb
3246 slli a4, a5, 3
3247 add a5, a4, a5
3248 2. const_poly_int:P [-BYTES_PER_RISCV_VECTOR * 9].
3249 Sequence:
3250 csrr a5, vlenb
3251 slli a4, a5, 3
3252 add a5, a4, a5
3253 neg a5, a5
3255 riscv_expand_op (ASHIFT, mode, dest, multiplicand,
3256 gen_int_mode (exact_log2 (multiplier_abs - 1),
3257 QImode));
3258 riscv_expand_op (PLUS, mode, dest, dest, multiplicand);
3259 if (neg_p)
3260 riscv_expand_op (NEG, mode, dest, dest, NULL_RTX);
3262 else
3264 /* We use multiplication for remaining cases. */
3265 gcc_assert (
3266 TARGET_MUL
3267 && "M-extension must be enabled to calculate the poly_int "
3268 "size/offset.");
3269 riscv_emit_move (dest, gen_int_mode (multiplier, mode));
3270 riscv_expand_op (MULT, mode, dest, dest, multiplicand);
3275 /* Analyze src and emit const_poly_int mov sequence. */
3277 void
3278 riscv_legitimize_poly_move (machine_mode mode, rtx dest, rtx tmp, rtx src)
3280 poly_int64 value = rtx_to_poly_int64 (src);
3281 /* It use HOST_WIDE_INT instead of int since 32bit type is not enough
3282 for e.g. (const_poly_int:DI [549755813888, 549755813888]). */
3283 HOST_WIDE_INT offset = value.coeffs[0];
3284 HOST_WIDE_INT factor = value.coeffs[1];
3285 int vlenb = BYTES_PER_RISCV_VECTOR.coeffs[1];
3286 int div_factor = 0;
3287 /* Calculate (const_poly_int:MODE [m, n]) using scalar instructions.
3288 For any (const_poly_int:MODE [m, n]), the calculation formula is as
3289 follows.
3290 constant = m - n.
3291 When minimum VLEN = 32, poly of VLENB = (4, 4).
3292 base = vlenb(4, 4) or vlenb/2(2, 2) or vlenb/4(1, 1).
3293 When minimum VLEN > 32, poly of VLENB = (8, 8).
3294 base = vlenb(8, 8) or vlenb/2(4, 4) or vlenb/4(2, 2) or vlenb/8(1, 1).
3295 magn = (n, n) / base.
3296 (m, n) = base * magn + constant.
3297 This calculation doesn't need div operation. */
3299 if (known_le (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode)))
3300 emit_move_insn (tmp, gen_int_mode (BYTES_PER_RISCV_VECTOR, mode));
3301 else
3303 emit_move_insn (gen_highpart (Pmode, tmp), CONST0_RTX (Pmode));
3304 emit_move_insn (gen_lowpart (Pmode, tmp),
3305 gen_int_mode (BYTES_PER_RISCV_VECTOR, Pmode));
3308 if (BYTES_PER_RISCV_VECTOR.is_constant ())
3310 gcc_assert (value.is_constant ());
3311 riscv_emit_move (dest, GEN_INT (value.to_constant ()));
3312 return;
3314 else
3316 int max_power = exact_log2 (MAX_POLY_VARIANT);
3317 for (int i = 0; i <= max_power; i++)
3319 int possible_div_factor = 1 << i;
3320 if (factor % (vlenb / possible_div_factor) == 0)
3322 div_factor = possible_div_factor;
3323 break;
3326 gcc_assert (div_factor != 0);
3329 if (div_factor != 1)
3330 riscv_expand_op (LSHIFTRT, mode, tmp, tmp,
3331 gen_int_mode (exact_log2 (div_factor), QImode));
3333 riscv_expand_mult_with_const_int (mode, dest, tmp,
3334 factor / (vlenb / div_factor));
3335 HOST_WIDE_INT constant = offset - factor;
3337 if (constant == 0)
3338 return;
3339 else if (SMALL_OPERAND (constant))
3340 riscv_expand_op (PLUS, mode, dest, dest, gen_int_mode (constant, mode));
3341 else
3343 /* Handle the constant value is not a 12-bit value. */
3344 rtx high;
3346 /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
3347 The addition inside the macro CONST_HIGH_PART may cause an
3348 overflow, so we need to force a sign-extension check. */
3349 high = gen_int_mode (CONST_HIGH_PART (constant), mode);
3350 constant = CONST_LOW_PART (constant);
3351 riscv_emit_move (tmp, high);
3352 riscv_expand_op (PLUS, mode, dest, tmp, dest);
3353 riscv_expand_op (PLUS, mode, dest, dest, gen_int_mode (constant, mode));
3357 /* Take care below subreg const_poly_int move:
3359 1. (set (subreg:DI (reg:TI 237) 8)
3360 (subreg:DI (const_poly_int:TI [4, 2]) 8))
3362 (set (subreg:DI (reg:TI 237) 8)
3363 (const_int 0)) */
3365 static bool
3366 riscv_legitimize_subreg_const_poly_move (machine_mode mode, rtx dest, rtx src)
3368 gcc_assert (SUBREG_P (src) && CONST_POLY_INT_P (SUBREG_REG (src)));
3369 gcc_assert (SUBREG_BYTE (src).is_constant ());
3371 int byte_offset = SUBREG_BYTE (src).to_constant ();
3372 rtx const_poly = SUBREG_REG (src);
3373 machine_mode subreg_mode = GET_MODE (const_poly);
3375 if (subreg_mode != TImode) /* Only TImode is needed for now. */
3376 return false;
3378 if (byte_offset == 8)
3380 /* The const_poly_int cannot exceed int64, just set zero here. */
3381 emit_move_insn (dest, CONST0_RTX (mode));
3382 return true;
3385 /* The below transform will be covered in somewhere else.
3386 Thus, ignore this here.
3387 (set (subreg:DI (reg:TI 237) 0)
3388 (subreg:DI (const_poly_int:TI [4, 2]) 0))
3390 (set (subreg:DI (reg:TI 237) 0)
3391 (const_poly_int:DI [4, 2])) */
3393 return false;
3396 /* If (set DEST SRC) is not a valid move instruction, emit an equivalent
3397 sequence that is valid. */
3399 bool
3400 riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
3402 if (CONST_POLY_INT_P (src))
3405 Handle:
3406 (insn 183 182 184 6 (set (mem:QI (plus:DI (reg/f:DI 156)
3407 (const_int 96 [0x60])) [0 S1 A8])
3408 (const_poly_int:QI [8, 8]))
3409 "../../../../riscv-gcc/libgcc/unwind-dw2.c":1579:3 -1 (nil))
3411 if (MEM_P (dest))
3413 emit_move_insn (dest, force_reg (mode, src));
3414 return true;
3416 poly_int64 value = rtx_to_poly_int64 (src);
3417 if (!value.is_constant () && !TARGET_VECTOR)
3419 riscv_report_v_required ();
3420 return false;
3423 if (satisfies_constraint_vp (src) && GET_MODE (src) == Pmode)
3424 return false;
3426 if (GET_MODE_SIZE (mode).to_constant () < GET_MODE_SIZE (Pmode))
3428 /* In RV32 system, handle (const_poly_int:QI [m, n])
3429 (const_poly_int:HI [m, n]).
3430 In RV64 system, handle (const_poly_int:QI [m, n])
3431 (const_poly_int:HI [m, n])
3432 (const_poly_int:SI [m, n]). */
3433 rtx tmp = gen_reg_rtx (Pmode);
3434 rtx tmp2 = gen_reg_rtx (Pmode);
3435 riscv_legitimize_poly_move (Pmode, tmp2, tmp, src);
3436 emit_move_insn (dest, gen_lowpart (mode, tmp2));
3438 else
3440 /* In RV32 system, handle (const_poly_int:SI [m, n])
3441 (const_poly_int:DI [m, n]).
3442 In RV64 system, handle (const_poly_int:DI [m, n]).
3443 FIXME: Maybe we could gen SImode in RV32 and then sign-extend to
3444 DImode, the offset should not exceed 4GiB in general. */
3445 rtx tmp = gen_reg_rtx (mode);
3446 riscv_legitimize_poly_move (mode, dest, tmp, src);
3448 return true;
3451 if (SUBREG_P (src) && CONST_POLY_INT_P (SUBREG_REG (src))
3452 && riscv_legitimize_subreg_const_poly_move (mode, dest, src))
3453 return true;
3455 /* Expand
3456 (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
3457 Expand this data movement instead of simply forbid it since
3458 we can improve the code generation for this following scenario
3459 by RVV auto-vectorization:
3460 (set (reg:V8QI 149) (vec_duplicate:V8QI (reg:QI))
3461 (set (reg:DI target) (subreg:DI (reg:V8QI reg) 0))
3462 Since RVV mode and scalar mode are in different REG_CLASS,
3463 we need to explicitly move data from V_REGS to GR_REGS by scalar move. */
3464 if (SUBREG_P (src) && riscv_v_ext_mode_p (GET_MODE (SUBREG_REG (src))))
3466 machine_mode vmode = GET_MODE (SUBREG_REG (src));
3467 unsigned int mode_size = GET_MODE_SIZE (mode).to_constant ();
3468 unsigned int vmode_size = GET_MODE_SIZE (vmode).to_constant ();
3469 /* We should be able to handle both partial and paradoxical subreg. */
3470 unsigned int nunits = vmode_size > mode_size ? vmode_size / mode_size : 1;
3471 scalar_mode smode = as_a<scalar_mode> (mode);
3472 unsigned int index = SUBREG_BYTE (src).to_constant () / mode_size;
3473 unsigned int num = known_eq (GET_MODE_SIZE (smode), 8)
3474 && !TARGET_VECTOR_ELEN_64 ? 2 : 1;
3475 bool need_int_reg_p = false;
3477 if (num == 2)
3479 /* If we want to extract 64bit value but ELEN < 64,
3480 we use RVV vector mode with EEW = 32 to extract
3481 the highpart and lowpart. */
3482 need_int_reg_p = smode == DFmode;
3483 smode = SImode;
3484 nunits = nunits * 2;
3487 if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode))
3489 rtx v = gen_lowpart (vmode, SUBREG_REG (src));
3490 rtx int_reg = dest;
3492 if (need_int_reg_p)
3494 int_reg = gen_reg_rtx (DImode);
3495 emit_move_insn (int_reg, gen_lowpart (GET_MODE (int_reg), dest));
3498 for (unsigned int i = 0; i < num; i++)
3500 rtx result;
3501 if (num == 1)
3502 result = int_reg;
3503 else if (i == 0)
3504 result = gen_lowpart (smode, int_reg);
3505 else
3506 result = gen_reg_rtx (smode);
3508 riscv_vector::emit_vec_extract (result, v,
3509 gen_int_mode (index + i, Pmode));
3511 if (i == 1)
3513 if (UNITS_PER_WORD < mode_size)
3514 /* If Pmode = SImode and mode = DImode, we just need to
3515 extract element of index = 1 from the vector and move it
3516 into the highpart of the DEST since DEST consists of 2
3517 scalar registers. */
3518 emit_move_insn (gen_highpart (smode, int_reg), result);
3519 else
3521 rtx tmp = expand_binop (Pmode, ashl_optab,
3522 gen_lowpart (Pmode, result),
3523 gen_int_mode (32, Pmode),
3524 NULL_RTX, 0, OPTAB_DIRECT);
3525 rtx tmp2 = expand_binop (Pmode, ior_optab, tmp, int_reg,
3526 NULL_RTX, 0, OPTAB_DIRECT);
3527 emit_move_insn (int_reg, tmp2);
3532 if (need_int_reg_p)
3533 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), int_reg));
3534 else
3535 emit_move_insn (dest, int_reg);
3537 else
3538 gcc_unreachable ();
3540 return true;
3542 /* Expand
3543 (set (reg:QI target) (mem:QI (address)))
3545 (set (reg:DI temp) (zero_extend:DI (mem:QI (address))))
3546 (set (reg:QI target) (subreg:QI (reg:DI temp) 0))
3547 with auto-sign/zero extend. */
3548 if (GET_MODE_CLASS (mode) == MODE_INT
3549 && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD
3550 && can_create_pseudo_p ()
3551 && MEM_P (src))
3553 rtx temp_reg;
3554 int zero_extend_p;
3556 temp_reg = gen_reg_rtx (word_mode);
3557 zero_extend_p = (LOAD_EXTEND_OP (mode) == ZERO_EXTEND);
3558 emit_insn (gen_extend_insn (temp_reg, src, word_mode, mode,
3559 zero_extend_p));
3560 riscv_emit_move (dest, gen_lowpart (mode, temp_reg));
3561 return true;
3564 if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
3566 rtx reg;
3568 if (GET_CODE (src) == CONST_INT)
3570 /* Apply the equivalent of PROMOTE_MODE here for constants to
3571 improve cse. */
3572 machine_mode promoted_mode = mode;
3573 if (GET_MODE_CLASS (mode) == MODE_INT
3574 && GET_MODE_SIZE (mode).to_constant () < UNITS_PER_WORD)
3575 promoted_mode = word_mode;
3577 if (splittable_const_int_operand (src, mode))
3579 reg = gen_reg_rtx (promoted_mode);
3580 riscv_move_integer (reg, reg, INTVAL (src), mode);
3582 else
3583 reg = force_reg (promoted_mode, src);
3585 if (promoted_mode != mode)
3586 reg = gen_lowpart (mode, reg);
3588 else
3589 reg = force_reg (mode, src);
3590 riscv_emit_move (dest, reg);
3591 return true;
3594 /* In order to fit NaN boxing, expand
3595 (set FP_REG (reg:HF/BF src))
3597 (set (reg:SI/DI mask) (const_int -65536)
3598 (set (reg:SI/DI temp) (zero_extend:SI/DI (subreg:HI (reg:HF/BF src) 0)))
3599 (set (reg:SI/DI temp) (ior:SI/DI (reg:SI/DI mask) (reg:SI/DI temp)))
3600 (set (reg:HF/BF dest) (unspec:HF/BF[ (reg:SI/DI temp) ] UNSPEC_FMV_FP16_X))
3603 if (TARGET_HARD_FLOAT
3604 && !TARGET_ZFHMIN
3605 && (mode == HFmode || mode == BFmode)
3606 && REG_P (dest) && FP_REG_P (REGNO (dest))
3607 && REG_P (src) && !FP_REG_P (REGNO (src))
3608 && can_create_pseudo_p ())
3610 rtx mask = force_reg (word_mode, gen_int_mode (-65536, word_mode));
3611 rtx temp = gen_reg_rtx (word_mode);
3612 emit_insn (gen_extend_insn (temp,
3613 simplify_gen_subreg (HImode, src, mode, 0),
3614 word_mode, HImode, 1));
3615 if (word_mode == SImode)
3616 emit_insn (gen_iorsi3 (temp, mask, temp));
3617 else
3618 emit_insn (gen_iordi3 (temp, mask, temp));
3620 riscv_emit_move (dest, gen_rtx_UNSPEC (mode, gen_rtvec (1, temp),
3621 UNSPEC_FMV_FP16_X));
3623 return true;
3626 /* We need to deal with constants that would be legitimate
3627 immediate_operands but aren't legitimate move_operands. */
3628 if (CONSTANT_P (src) && !move_operand (src, mode))
3630 riscv_legitimize_const_move (mode, dest, src);
3631 set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src));
3632 return true;
3635 /* RISC-V GCC may generate non-legitimate address due to we provide some
3636 pattern for optimize access PIC local symbol and it's make GCC generate
3637 unrecognizable instruction during optimizing. */
3639 if (MEM_P (dest) && !riscv_legitimate_address_p (mode, XEXP (dest, 0),
3640 reload_completed))
3642 XEXP (dest, 0) = riscv_force_address (XEXP (dest, 0), mode);
3645 if (MEM_P (src) && !riscv_legitimate_address_p (mode, XEXP (src, 0),
3646 reload_completed))
3648 XEXP (src, 0) = riscv_force_address (XEXP (src, 0), mode);
3651 return false;
3654 /* Return true if there is an instruction that implements CODE and accepts
3655 X as an immediate operand. */
3657 static int
3658 riscv_immediate_operand_p (int code, HOST_WIDE_INT x)
3660 switch (code)
3662 case ASHIFT:
3663 case ASHIFTRT:
3664 case LSHIFTRT:
3665 /* All shift counts are truncated to a valid constant. */
3666 return true;
3668 case AND:
3669 case IOR:
3670 case XOR:
3671 case PLUS:
3672 case LT:
3673 case LTU:
3674 /* These instructions take 12-bit signed immediates. */
3675 return SMALL_OPERAND (x);
3677 case LE:
3678 /* We add 1 to the immediate and use SLT. */
3679 return SMALL_OPERAND (x + 1);
3681 case LEU:
3682 /* Likewise SLTU, but reject the always-true case. */
3683 return SMALL_OPERAND (x + 1) && x + 1 != 0;
3685 case GE:
3686 case GEU:
3687 /* We can emulate an immediate of 1 by using GT/GTU against x0. */
3688 return x == 1;
3690 default:
3691 /* By default assume that x0 can be used for 0. */
3692 return x == 0;
3696 /* Return the cost of binary operation X, given that the instruction
3697 sequence for a word-sized or smaller operation takes SINGLE_INSNS
3698 instructions and that the sequence of a double-word operation takes
3699 DOUBLE_INSNS instructions. */
3701 static int
3702 riscv_binary_cost (rtx x, int single_insns, int double_insns)
3704 if (!riscv_v_ext_mode_p (GET_MODE (x))
3705 && GET_MODE_SIZE (GET_MODE (x)).to_constant () == UNITS_PER_WORD * 2)
3706 return COSTS_N_INSNS (double_insns);
3707 return COSTS_N_INSNS (single_insns);
3710 /* Return the cost of sign- or zero-extending OP. */
3712 static int
3713 riscv_extend_cost (rtx op, bool unsigned_p)
3715 if (MEM_P (op))
3716 return 0;
3718 if (unsigned_p && GET_MODE (op) == QImode)
3719 /* We can use ANDI. */
3720 return COSTS_N_INSNS (1);
3722 /* ZBA provide zext.w. */
3723 if (TARGET_ZBA && TARGET_64BIT && unsigned_p && GET_MODE (op) == SImode)
3724 return COSTS_N_INSNS (1);
3726 /* ZBB provide zext.h, sext.b and sext.h. */
3727 if (TARGET_ZBB)
3729 if (!unsigned_p && GET_MODE (op) == QImode)
3730 return COSTS_N_INSNS (1);
3732 if (GET_MODE (op) == HImode)
3733 return COSTS_N_INSNS (1);
3736 if (!unsigned_p && GET_MODE (op) == SImode)
3737 /* We can use SEXT.W. */
3738 return COSTS_N_INSNS (1);
3740 /* We need to use a shift left and a shift right. */
3741 return COSTS_N_INSNS (2);
3744 /* Implement TARGET_RTX_COSTS. */
3746 #define SINGLE_SHIFT_COST 1
3748 static bool
3749 riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UNUSED,
3750 int *total, bool speed)
3752 /* TODO: We set RVV instruction cost as 1 by default.
3753 Cost Model need to be well analyzed and supported in the future. */
3754 if (riscv_v_ext_mode_p (mode))
3756 *total = COSTS_N_INSNS (1);
3757 return true;
3760 bool float_mode_p = FLOAT_MODE_P (mode);
3761 int cost;
3763 switch (GET_CODE (x))
3765 case SET:
3766 /* If we are called for an INSN that's a simple set of a register,
3767 then cost based on the SET_SRC alone. */
3768 if (outer_code == INSN
3769 && register_operand (SET_DEST (x), GET_MODE (SET_DEST (x))))
3771 if (REG_P (SET_SRC (x)) && TARGET_DOUBLE_FLOAT && mode == DFmode)
3773 *total = COSTS_N_INSNS (1);
3774 return true;
3776 riscv_rtx_costs (SET_SRC (x), mode, SET, opno, total, speed);
3777 return true;
3780 /* Otherwise return FALSE indicating we should recurse into both the
3781 SET_DEST and SET_SRC combining the cost of both. */
3782 return false;
3784 case CONST_INT:
3785 /* trivial constants checked using OUTER_CODE in case they are
3786 encodable in insn itself w/o need for additional insn(s). */
3787 if (riscv_immediate_operand_p (outer_code, INTVAL (x)))
3789 *total = 0;
3790 return true;
3792 /* Fall through. */
3794 case SYMBOL_REF:
3795 case LABEL_REF:
3796 case CONST_DOUBLE:
3797 /* With TARGET_SUPPORTS_WIDE_INT const int can't be in CONST_DOUBLE
3798 rtl object. Weird recheck due to switch-case fall through above. */
3799 if (GET_CODE (x) == CONST_DOUBLE)
3800 gcc_assert (GET_MODE (x) != VOIDmode);
3801 /* Fall through. */
3803 case CONST:
3804 /* Non trivial CONST_INT Fall through: check if need multiple insns. */
3805 if ((cost = riscv_const_insns (x, can_create_pseudo_p ())) > 0)
3807 /* 1. Hoist will GCSE constants only if TOTAL returned is non-zero.
3808 2. For constants loaded more than once, the approach so far has
3809 been to duplicate the operation than to CSE the constant.
3810 3. TODO: make cost more accurate specially if riscv_const_insns
3811 returns > 1. */
3812 if (outer_code == SET || GET_MODE (x) == VOIDmode)
3813 *total = COSTS_N_INSNS (1);
3815 else /* The instruction will be fetched from the constant pool. */
3816 *total = COSTS_N_INSNS (riscv_symbol_insns (SYMBOL_ABSOLUTE));
3817 return true;
3819 case MEM:
3820 /* If the address is legitimate, return the number of
3821 instructions it needs. */
3822 if ((cost = riscv_address_insns (XEXP (x, 0), mode, true)) > 0)
3824 /* When optimizing for size, make uncompressible 32-bit addresses
3825 more expensive so that compressible 32-bit addresses are
3826 preferred. */
3827 if ((TARGET_RVC || TARGET_ZCA)
3828 && !speed && riscv_mshorten_memrefs && mode == SImode
3829 && !riscv_compressed_lw_address_p (XEXP (x, 0)))
3830 cost++;
3832 *total = COSTS_N_INSNS (cost + tune_param->memory_cost);
3833 return true;
3835 /* Otherwise use the default handling. */
3836 return false;
3838 case IF_THEN_ELSE:
3839 if ((TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
3840 && reg_or_0_operand (XEXP (x, 1), mode)
3841 && sfb_alu_operand (XEXP (x, 2), mode)
3842 && comparison_operator (XEXP (x, 0), VOIDmode))
3844 /* For predicated conditional-move operations we assume the cost
3845 of a single instruction even though there are actually two. */
3846 *total = COSTS_N_INSNS (1);
3847 return true;
3849 else if (TARGET_ZICOND_LIKE
3850 && outer_code == SET
3851 && ((GET_CODE (XEXP (x, 1)) == REG
3852 && XEXP (x, 2) == CONST0_RTX (GET_MODE (XEXP (x, 1))))
3853 || (GET_CODE (XEXP (x, 2)) == REG
3854 && XEXP (x, 1) == CONST0_RTX (GET_MODE (XEXP (x, 2))))
3855 || (COMPARISON_P (XEXP (x, 0))
3856 && GET_CODE (XEXP (x, 1)) == REG
3857 && rtx_equal_p (XEXP (x, 1), XEXP (XEXP (x, 0), 0)))
3858 || (COMPARISON_P (XEXP (x, 0))
3859 && GET_CODE (XEXP (x, 1)) == REG
3860 && rtx_equal_p (XEXP (x, 2), XEXP (XEXP (x, 0), 0)))))
3862 *total = COSTS_N_INSNS (1);
3863 return true;
3865 else if (LABEL_REF_P (XEXP (x, 1)) && XEXP (x, 2) == pc_rtx)
3867 if (equality_operator (XEXP (x, 0), mode)
3868 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTRACT)
3870 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST + 1);
3871 return true;
3873 if (ordered_comparison_operator (XEXP (x, 0), mode))
3875 *total = COSTS_N_INSNS (1);
3876 return true;
3879 return false;
3881 case NOT:
3882 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 2 : 1);
3883 return false;
3885 case AND:
3886 /* slli.uw pattern for zba. */
3887 if (TARGET_ZBA && TARGET_64BIT && mode == DImode
3888 && GET_CODE (XEXP (x, 0)) == ASHIFT)
3890 rtx and_rhs = XEXP (x, 1);
3891 rtx ashift_lhs = XEXP (XEXP (x, 0), 0);
3892 rtx ashift_rhs = XEXP (XEXP (x, 0), 1);
3893 if (register_operand (ashift_lhs, GET_MODE (ashift_lhs))
3894 && CONST_INT_P (ashift_rhs)
3895 && CONST_INT_P (and_rhs)
3896 && ((INTVAL (and_rhs) >> INTVAL (ashift_rhs)) == 0xffffffff))
3897 *total = COSTS_N_INSNS (1);
3898 return true;
3900 /* bclri pattern for zbs. */
3901 if (TARGET_ZBS
3902 && not_single_bit_mask_operand (XEXP (x, 1), VOIDmode))
3904 *total = COSTS_N_INSNS (1);
3905 return true;
3907 /* bclr pattern for zbs. */
3908 if (TARGET_ZBS
3909 && register_operand (XEXP (x, 1), GET_MODE (XEXP (x, 1)))
3910 && GET_CODE (XEXP (x, 0)) == ROTATE
3911 && CONST_INT_P (XEXP ((XEXP (x, 0)), 0))
3912 && INTVAL (XEXP ((XEXP (x, 0)), 0)) == -2)
3914 *total = COSTS_N_INSNS (1);
3915 return true;
3918 gcc_fallthrough ();
3919 case IOR:
3920 case XOR:
3921 /* orn, andn and xorn pattern for zbb. */
3922 if (TARGET_ZBB
3923 && GET_CODE (XEXP (x, 0)) == NOT)
3925 *total = riscv_binary_cost (x, 1, 2);
3926 return true;
3929 /* bset[i] and binv[i] pattern for zbs. */
3930 if ((GET_CODE (x) == IOR || GET_CODE (x) == XOR)
3931 && TARGET_ZBS
3932 && ((GET_CODE (XEXP (x, 0)) == ASHIFT
3933 && CONST_INT_P (XEXP (XEXP (x, 0), 0)))
3934 || single_bit_mask_operand (XEXP (x, 1), VOIDmode)))
3936 *total = COSTS_N_INSNS (1);
3937 return true;
3940 /* Double-word operations use two single-word operations. */
3941 *total = riscv_binary_cost (x, 1, 2);
3942 return false;
3944 case ZERO_EXTRACT:
3945 /* This is an SImode shift. */
3946 if (outer_code == SET
3947 && CONST_INT_P (XEXP (x, 1))
3948 && CONST_INT_P (XEXP (x, 2))
3949 && (INTVAL (XEXP (x, 2)) > 0)
3950 && (INTVAL (XEXP (x, 1)) + INTVAL (XEXP (x, 2)) == 32))
3952 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3953 return true;
3955 /* bit extraction pattern (zbs:bext, xtheadbs:tst). */
3956 if ((TARGET_ZBS || TARGET_XTHEADBS) && outer_code == SET
3957 && GET_CODE (XEXP (x, 1)) == CONST_INT
3958 && INTVAL (XEXP (x, 1)) == 1)
3960 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3961 return true;
3963 gcc_fallthrough ();
3964 case SIGN_EXTRACT:
3965 if (TARGET_XTHEADBB && outer_code == SET
3966 && CONST_INT_P (XEXP (x, 1))
3967 && CONST_INT_P (XEXP (x, 2)))
3969 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
3970 return true;
3972 return false;
3974 case ASHIFT:
3975 /* bset pattern for zbs. */
3976 if (TARGET_ZBS
3977 && CONST_INT_P (XEXP (x, 0))
3978 && INTVAL (XEXP (x, 0)) == 1)
3980 *total = COSTS_N_INSNS (1);
3981 return true;
3983 gcc_fallthrough ();
3984 case ASHIFTRT:
3985 case LSHIFTRT:
3986 *total = riscv_binary_cost (x, SINGLE_SHIFT_COST,
3987 CONSTANT_P (XEXP (x, 1)) ? 4 : 9);
3988 return false;
3990 case ABS:
3991 *total = COSTS_N_INSNS (float_mode_p ? 1 : 3);
3992 return false;
3994 case LO_SUM:
3995 *total = set_src_cost (XEXP (x, 0), mode, speed);
3996 return true;
3998 case LT:
3999 /* This is an SImode shift. */
4000 if (outer_code == SET && GET_MODE (x) == DImode
4001 && GET_MODE (XEXP (x, 0)) == SImode)
4003 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
4004 return true;
4006 /* Fall through. */
4007 case LTU:
4008 case LE:
4009 case LEU:
4010 case GT:
4011 case GTU:
4012 case GE:
4013 case GEU:
4014 case EQ:
4015 case NE:
4016 /* Branch comparisons have VOIDmode, so use the first operand's
4017 mode instead. */
4018 mode = GET_MODE (XEXP (x, 0));
4019 if (float_mode_p)
4020 *total = tune_param->fp_add[mode == DFmode];
4021 else
4022 *total = riscv_binary_cost (x, 1, 3);
4023 return false;
4025 case UNORDERED:
4026 case ORDERED:
4027 /* (FEQ(A, A) & FEQ(B, B)) compared against 0. */
4028 mode = GET_MODE (XEXP (x, 0));
4029 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
4030 return false;
4032 case UNEQ:
4033 /* (FEQ(A, A) & FEQ(B, B)) compared against FEQ(A, B). */
4034 mode = GET_MODE (XEXP (x, 0));
4035 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (3);
4036 return false;
4038 case LTGT:
4039 /* (FLT(A, A) || FGT(B, B)). */
4040 mode = GET_MODE (XEXP (x, 0));
4041 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
4042 return false;
4044 case UNGE:
4045 case UNGT:
4046 case UNLE:
4047 case UNLT:
4048 /* FLT or FLE, but guarded by an FFLAGS read and write. */
4049 mode = GET_MODE (XEXP (x, 0));
4050 *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (4);
4051 return false;
4053 case MINUS:
4054 if (float_mode_p)
4055 *total = tune_param->fp_add[mode == DFmode];
4056 else
4057 *total = riscv_binary_cost (x, 1, 4);
4058 return false;
4060 case PLUS:
4061 /* add.uw pattern for zba. */
4062 if (TARGET_ZBA
4063 && (TARGET_64BIT && (mode == DImode))
4064 && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4065 && register_operand (XEXP (XEXP (x, 0), 0),
4066 GET_MODE (XEXP (XEXP (x, 0), 0)))
4067 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
4069 *total = COSTS_N_INSNS (1);
4070 return true;
4072 /* shNadd pattern for zba. */
4073 if (TARGET_ZBA
4074 && ((!TARGET_64BIT && (mode == SImode)) ||
4075 (TARGET_64BIT && (mode == DImode)))
4076 && (GET_CODE (XEXP (x, 0)) == ASHIFT)
4077 && register_operand (XEXP (XEXP (x, 0), 0),
4078 GET_MODE (XEXP (XEXP (x, 0), 0)))
4079 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4080 && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1)), 1, 3))
4082 *total = COSTS_N_INSNS (1);
4083 return true;
4085 /* Before strength-reduction, the shNadd can be expressed as the addition
4086 of a multiplication with a power-of-two. If this case is not handled,
4087 the strength-reduction in expmed.c will calculate an inflated cost. */
4088 if (TARGET_ZBA
4089 && mode == word_mode
4090 && GET_CODE (XEXP (x, 0)) == MULT
4091 && register_operand (XEXP (XEXP (x, 0), 0),
4092 GET_MODE (XEXP (XEXP (x, 0), 0)))
4093 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4094 && pow2p_hwi (INTVAL (XEXP (XEXP (x, 0), 1)))
4095 && IN_RANGE (exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1))), 1, 3))
4097 *total = COSTS_N_INSNS (1);
4098 return true;
4100 /* shNadd.uw pattern for zba.
4101 [(set (match_operand:DI 0 "register_operand" "=r")
4102 (plus:DI
4103 (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
4104 (match_operand:QI 2 "immediate_operand" "I"))
4105 (match_operand 3 "immediate_operand" ""))
4106 (match_operand:DI 4 "register_operand" "r")))]
4107 "TARGET_64BIT && TARGET_ZBA
4108 && (INTVAL (operands[2]) >= 1) && (INTVAL (operands[2]) <= 3)
4109 && (INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff"
4111 if (TARGET_ZBA
4112 && (TARGET_64BIT && (mode == DImode))
4113 && (GET_CODE (XEXP (x, 0)) == AND)
4114 && register_operand (XEXP (x, 1), GET_MODE (XEXP (x, 1))))
4116 do {
4117 rtx and_lhs = XEXP (XEXP (x, 0), 0);
4118 rtx and_rhs = XEXP (XEXP (x, 0), 1);
4119 if (GET_CODE (and_lhs) != ASHIFT)
4120 break;
4121 if (!CONST_INT_P (and_rhs))
4122 break;
4124 rtx ashift_rhs = XEXP (and_lhs, 1);
4126 if (!CONST_INT_P (ashift_rhs)
4127 || !IN_RANGE (INTVAL (ashift_rhs), 1, 3))
4128 break;
4130 if (CONST_INT_P (and_rhs)
4131 && ((INTVAL (and_rhs) >> INTVAL (ashift_rhs)) == 0xffffffff))
4133 *total = COSTS_N_INSNS (1);
4134 return true;
4136 } while (false);
4139 if (float_mode_p)
4140 *total = tune_param->fp_add[mode == DFmode];
4141 else
4142 *total = riscv_binary_cost (x, 1, 4);
4143 return false;
4145 case NEG:
4147 rtx op = XEXP (x, 0);
4148 if (GET_CODE (op) == FMA && !HONOR_SIGNED_ZEROS (mode))
4150 *total = (tune_param->fp_mul[mode == DFmode]
4151 + set_src_cost (XEXP (op, 0), mode, speed)
4152 + set_src_cost (XEXP (op, 1), mode, speed)
4153 + set_src_cost (XEXP (op, 2), mode, speed));
4154 return true;
4158 if (float_mode_p)
4159 *total = tune_param->fp_add[mode == DFmode];
4160 else
4161 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD ? 4 : 1);
4162 return false;
4164 case MULT:
4165 if (float_mode_p)
4166 *total = tune_param->fp_mul[mode == DFmode];
4167 else if (!(TARGET_MUL || TARGET_ZMMUL))
4168 /* Estimate the cost of a library call. */
4169 *total = COSTS_N_INSNS (speed ? 32 : 6);
4170 else if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD)
4171 *total = 3 * tune_param->int_mul[0] + COSTS_N_INSNS (2);
4172 else if (!speed)
4173 *total = COSTS_N_INSNS (1);
4174 else
4175 *total = tune_param->int_mul[mode == DImode];
4176 return false;
4178 case DIV:
4179 case SQRT:
4180 case MOD:
4181 if (float_mode_p)
4183 *total = tune_param->fp_div[mode == DFmode];
4184 return false;
4186 /* Fall through. */
4188 case UDIV:
4189 case UMOD:
4190 if (!TARGET_DIV)
4191 /* Estimate the cost of a library call. */
4192 *total = COSTS_N_INSNS (speed ? 32 : 6);
4193 else if (speed)
4194 *total = tune_param->int_div[mode == DImode];
4195 else
4196 *total = COSTS_N_INSNS (1);
4197 return false;
4199 case ZERO_EXTEND:
4200 /* This is an SImode shift. */
4201 if (GET_CODE (XEXP (x, 0)) == LSHIFTRT)
4203 *total = COSTS_N_INSNS (SINGLE_SHIFT_COST);
4204 return true;
4206 /* Fall through. */
4207 case SIGN_EXTEND:
4208 *total = riscv_extend_cost (XEXP (x, 0), GET_CODE (x) == ZERO_EXTEND);
4209 return false;
4211 case BSWAP:
4212 if (TARGET_ZBB)
4214 /* RISC-V only defines rev8 for XLEN, so we will need an extra
4215 shift-right instruction for smaller modes. */
4216 *total = COSTS_N_INSNS (mode == word_mode ? 1 : 2);
4217 return true;
4219 return false;
4221 case FLOAT:
4222 case UNSIGNED_FLOAT:
4223 case FIX:
4224 case FLOAT_EXTEND:
4225 case FLOAT_TRUNCATE:
4226 *total = tune_param->fp_add[mode == DFmode];
4227 return false;
4229 case FMA:
4230 *total = (tune_param->fp_mul[mode == DFmode]
4231 + set_src_cost (XEXP (x, 0), mode, speed)
4232 + set_src_cost (XEXP (x, 1), mode, speed)
4233 + set_src_cost (XEXP (x, 2), mode, speed));
4234 return true;
4236 case UNSPEC:
4237 if (XINT (x, 1) == UNSPEC_AUIPC)
4239 /* Make AUIPC cheap to avoid spilling its result to the stack. */
4240 *total = 1;
4241 return true;
4243 return false;
4245 default:
4246 return false;
4250 /* Implement TARGET_ADDRESS_COST. */
4252 static int
4253 riscv_address_cost (rtx addr, machine_mode mode,
4254 addr_space_t as ATTRIBUTE_UNUSED,
4255 bool speed ATTRIBUTE_UNUSED)
4257 /* When optimizing for size, make uncompressible 32-bit addresses more
4258 * expensive so that compressible 32-bit addresses are preferred. */
4259 if ((TARGET_RVC || TARGET_ZCA)
4260 && !speed && riscv_mshorten_memrefs && mode == SImode
4261 && !riscv_compressed_lw_address_p (addr))
4262 return riscv_address_insns (addr, mode, false) + 1;
4263 return riscv_address_insns (addr, mode, false);
4266 /* Implement TARGET_INSN_COST. We factor in the branch cost in the cost
4267 calculation for conditional branches: one unit is considered the cost
4268 of microarchitecture-dependent actual branch execution and therefore
4269 multiplied by BRANCH_COST and any remaining units are considered fixed
4270 branch overhead. Branches on a floating-point condition incur an extra
4271 instruction cost as they will be split into an FCMP operation followed
4272 by a branch on an integer condition. */
4274 static int
4275 riscv_insn_cost (rtx_insn *insn, bool speed)
4277 rtx x = PATTERN (insn);
4278 int cost = pattern_cost (x, speed);
4280 if (JUMP_P (insn))
4282 if (GET_CODE (x) == PARALLEL)
4283 x = XVECEXP (x, 0, 0);
4284 if (GET_CODE (x) == SET
4285 && GET_CODE (SET_DEST (x)) == PC
4286 && GET_CODE (SET_SRC (x)) == IF_THEN_ELSE)
4288 cost += COSTS_N_INSNS (BRANCH_COST (speed, false) - 1);
4289 if (FLOAT_MODE_P (GET_MODE (XEXP (XEXP (SET_SRC (x), 0), 0))))
4290 cost += COSTS_N_INSNS (1);
4293 return cost;
4296 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
4297 but we consider cost units of branch instructions equal to cost units of
4298 other instructions. */
4300 static unsigned int
4301 riscv_max_noce_ifcvt_seq_cost (edge e)
4303 bool predictable_p = predictable_edge_p (e);
4305 if (predictable_p)
4307 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
4308 return param_max_rtl_if_conversion_predictable_cost;
4310 else
4312 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
4313 return param_max_rtl_if_conversion_unpredictable_cost;
4316 return COSTS_N_INSNS (BRANCH_COST (true, predictable_p));
4319 /* Implement TARGET_NOCE_CONVERSION_PROFITABLE_P. We replace the cost of a
4320 conditional branch assumed by `noce_find_if_block' at `COSTS_N_INSNS (2)'
4321 by our actual conditional branch cost, observing that our branches test
4322 conditions directly, so there is no preparatory extra condition-set
4323 instruction. */
4325 static bool
4326 riscv_noce_conversion_profitable_p (rtx_insn *seq,
4327 struct noce_if_info *if_info)
4329 struct noce_if_info riscv_if_info = *if_info;
4331 riscv_if_info.original_cost -= COSTS_N_INSNS (2);
4332 riscv_if_info.original_cost += insn_cost (if_info->jump, if_info->speed_p);
4334 /* Hack alert! When `noce_try_store_flag_mask' uses `cstore<mode>4'
4335 to emit a conditional set operation on DImode output it comes up
4336 with a sequence such as:
4338 (insn 26 0 27 (set (reg:SI 140)
4339 (eq:SI (reg/v:DI 137 [ c ])
4340 (const_int 0 [0]))) 302 {*seq_zero_disi}
4341 (nil))
4342 (insn 27 26 28 (set (reg:DI 139)
4343 (zero_extend:DI (reg:SI 140))) 116 {*zero_extendsidi2_internal}
4344 (nil))
4346 because our `cstore<mode>4' pattern expands to an insn that gives
4347 a SImode output. The output of conditional set is 0 or 1 boolean,
4348 so it is valid for input in any scalar integer mode and therefore
4349 combine later folds the zero extend operation into an equivalent
4350 conditional set operation that produces a DImode output, however
4351 this redundant zero extend operation counts towards the cost of
4352 the replacement sequence. Compensate for that by incrementing the
4353 cost of the original sequence as well as the maximum sequence cost
4354 accordingly. Likewise for sign extension. */
4355 rtx last_dest = NULL_RTX;
4356 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
4358 if (!NONDEBUG_INSN_P (insn))
4359 continue;
4361 rtx x = PATTERN (insn);
4362 if (NONJUMP_INSN_P (insn)
4363 && GET_CODE (x) == SET)
4365 rtx src = SET_SRC (x);
4366 enum rtx_code code = GET_CODE (src);
4367 if (last_dest != NULL_RTX
4368 && (code == SIGN_EXTEND || code == ZERO_EXTEND)
4369 && REG_P (XEXP (src, 0))
4370 && REGNO (XEXP (src, 0)) == REGNO (last_dest))
4372 riscv_if_info.original_cost += COSTS_N_INSNS (1);
4373 riscv_if_info.max_seq_cost += COSTS_N_INSNS (1);
4376 rtx dest = SET_DEST (x);
4378 /* Do something similar for the moves that are likely to
4379 turn into NOP moves by the time the register allocator is
4380 done. These are also side effects of how our sCC expanders
4381 work. We'll want to check and update LAST_DEST here too. */
4382 if (last_dest
4383 && REG_P (dest)
4384 && GET_MODE (dest) == SImode
4385 && SUBREG_P (src)
4386 && SUBREG_PROMOTED_VAR_P (src)
4387 && REGNO (SUBREG_REG (src)) == REGNO (last_dest))
4389 riscv_if_info.original_cost += COSTS_N_INSNS (1);
4390 riscv_if_info.max_seq_cost += COSTS_N_INSNS (1);
4391 if (last_dest)
4392 last_dest = dest;
4394 else
4395 last_dest = NULL_RTX;
4397 if (COMPARISON_P (src) && REG_P (dest))
4398 last_dest = dest;
4400 else
4401 last_dest = NULL_RTX;
4404 return default_noce_conversion_profitable_p (seq, &riscv_if_info);
4407 /* Return one word of double-word value OP. HIGH_P is true to select the
4408 high part or false to select the low part. */
4411 riscv_subword (rtx op, bool high_p)
4413 unsigned int byte = (high_p != BYTES_BIG_ENDIAN) ? UNITS_PER_WORD : 0;
4414 machine_mode mode = GET_MODE (op);
4416 if (mode == VOIDmode)
4417 mode = TARGET_64BIT ? TImode : DImode;
4419 if (MEM_P (op))
4420 return adjust_address (op, word_mode, byte);
4422 if (REG_P (op))
4423 gcc_assert (!FP_REG_RTX_P (op));
4425 return simplify_gen_subreg (word_mode, op, mode, byte);
4428 /* Return true if a 64-bit move from SRC to DEST should be split into two. */
4430 bool
4431 riscv_split_64bit_move_p (rtx dest, rtx src)
4433 if (TARGET_64BIT)
4434 return false;
4436 /* There is no need to split if the FLI instruction in the `Zfa` extension can be used. */
4437 if (satisfies_constraint_zfli (src))
4438 return false;
4440 /* Allow FPR <-> FPR and FPR <-> MEM moves, and permit the special case
4441 of zeroing an FPR with FCVT.D.W. */
4442 if (TARGET_DOUBLE_FLOAT
4443 && ((FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
4444 || (FP_REG_RTX_P (dest) && MEM_P (src))
4445 || (FP_REG_RTX_P (src) && MEM_P (dest))
4446 || (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))))
4447 return false;
4449 return true;
4452 /* Split a doubleword move from SRC to DEST. On 32-bit targets,
4453 this function handles 64-bit moves for which riscv_split_64bit_move_p
4454 holds. For 64-bit targets, this function handles 128-bit moves. */
4456 void
4457 riscv_split_doubleword_move (rtx dest, rtx src)
4459 /* ZFA or XTheadFmv has instructions for accessing the upper bits of a double. */
4460 if (!TARGET_64BIT && (TARGET_ZFA || TARGET_XTHEADFMV))
4462 if (FP_REG_RTX_P (dest))
4464 rtx low_src = riscv_subword (src, false);
4465 rtx high_src = riscv_subword (src, true);
4467 if (TARGET_ZFA)
4468 emit_insn (gen_movdfsisi3_rv32 (dest, high_src, low_src));
4469 else
4470 emit_insn (gen_th_fmv_hw_w_x (dest, high_src, low_src));
4471 return;
4473 if (FP_REG_RTX_P (src))
4475 rtx low_dest = riscv_subword (dest, false);
4476 rtx high_dest = riscv_subword (dest, true);
4478 if (TARGET_ZFA)
4480 emit_insn (gen_movsidf2_low_rv32 (low_dest, src));
4481 emit_insn (gen_movsidf2_high_rv32 (high_dest, src));
4482 return;
4484 else
4486 emit_insn (gen_th_fmv_x_w (low_dest, src));
4487 emit_insn (gen_th_fmv_x_hw (high_dest, src));
4489 return;
4493 /* The operation can be split into two normal moves. Decide in
4494 which order to do them. */
4495 rtx low_dest = riscv_subword (dest, false);
4496 if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src))
4498 riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
4499 riscv_emit_move (low_dest, riscv_subword (src, false));
4501 else
4503 riscv_emit_move (low_dest, riscv_subword (src, false));
4504 riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
4508 /* Constant VAL is known to be sum of two S12 constants. Break it into
4509 comprising BASE and OFF.
4510 Numerically S12 is -2048 to 2047, however it uses the more conservative
4511 range -2048 to 2032 as offsets pertain to stack related registers. */
4513 void
4514 riscv_split_sum_of_two_s12 (HOST_WIDE_INT val, HOST_WIDE_INT *base,
4515 HOST_WIDE_INT *off)
4517 if (SUM_OF_TWO_S12_N (val))
4519 *base = -2048;
4520 *off = val - (-2048);
4522 else if (SUM_OF_TWO_S12_P_ALGN (val))
4524 *base = 2032;
4525 *off = val - 2032;
4527 else
4529 gcc_unreachable ();
4534 /* Return the appropriate instructions to move SRC into DEST. Assume
4535 that SRC is operand 1 and DEST is operand 0. */
4537 const char *
4538 riscv_output_move (rtx dest, rtx src)
4540 enum rtx_code dest_code, src_code;
4541 machine_mode mode;
4542 bool dbl_p;
4543 unsigned width;
4544 const char *insn;
4546 if ((insn = th_output_move (dest, src)))
4547 return insn;
4549 dest_code = GET_CODE (dest);
4550 src_code = GET_CODE (src);
4551 mode = GET_MODE (dest);
4552 dbl_p = (GET_MODE_SIZE (mode).to_constant () == 8);
4553 width = GET_MODE_SIZE (mode).to_constant ();
4555 if (dbl_p && riscv_split_64bit_move_p (dest, src))
4556 return "#";
4558 if (dest_code == REG && GP_REG_P (REGNO (dest)))
4560 if (src_code == REG && FP_REG_P (REGNO (src)))
4561 switch (width)
4563 case 2:
4564 if (TARGET_ZFHMIN || TARGET_ZFBFMIN)
4565 return "fmv.x.h\t%0,%1";
4566 /* Using fmv.x.s + sign-extend to emulate fmv.x.h. */
4567 return "fmv.x.s\t%0,%1;slli\t%0,%0,16;srai\t%0,%0,16";
4568 case 4:
4569 return "fmv.x.s\t%0,%1";
4570 case 8:
4571 return "fmv.x.d\t%0,%1";
4574 if (src_code == MEM)
4575 switch (width)
4577 case 1: return "lbu\t%0,%1";
4578 case 2: return "lhu\t%0,%1";
4579 case 4: return "lw\t%0,%1";
4580 case 8: return "ld\t%0,%1";
4583 if (src_code == CONST_INT)
4585 if (SMALL_OPERAND (INTVAL (src)) || LUI_OPERAND (INTVAL (src)))
4586 return "li\t%0,%1";
4588 if (TARGET_ZBS
4589 && SINGLE_BIT_MASK_OPERAND (INTVAL (src)))
4590 return "bseti\t%0,zero,%S1";
4592 /* Should never reach here. */
4593 abort ();
4596 if (src_code == HIGH)
4597 return "lui\t%0,%h1";
4599 if (symbolic_operand (src, VOIDmode))
4600 switch (riscv_classify_symbolic_expression (src))
4602 case SYMBOL_GOT_DISP: return "la\t%0,%1";
4603 case SYMBOL_ABSOLUTE: return "lla\t%0,%1";
4604 case SYMBOL_PCREL: return "lla\t%0,%1";
4605 default: gcc_unreachable ();
4608 if ((src_code == REG && GP_REG_P (REGNO (src)))
4609 || (src == CONST0_RTX (mode)))
4611 if (dest_code == REG)
4613 if (GP_REG_P (REGNO (dest)))
4614 return "mv\t%0,%z1";
4616 if (FP_REG_P (REGNO (dest)))
4617 switch (width)
4619 case 2:
4620 if (TARGET_ZFHMIN || TARGET_ZFBFMIN)
4621 return "fmv.h.x\t%0,%z1";
4622 /* High 16 bits should be all-1, otherwise HW will treated
4623 as a n-bit canonical NaN, but isn't matter for softfloat. */
4624 return "fmv.s.x\t%0,%1";
4625 case 4:
4626 return "fmv.s.x\t%0,%z1";
4627 case 8:
4628 if (TARGET_64BIT)
4629 return "fmv.d.x\t%0,%z1";
4630 /* in RV32, we can emulate fmv.d.x %0, x0 using fcvt.d.w */
4631 gcc_assert (src == CONST0_RTX (mode));
4632 return "fcvt.d.w\t%0,x0";
4635 if (dest_code == MEM)
4636 switch (width)
4638 case 1: return "sb\t%z1,%0";
4639 case 2: return "sh\t%z1,%0";
4640 case 4: return "sw\t%z1,%0";
4641 case 8: return "sd\t%z1,%0";
4644 if (src_code == REG && FP_REG_P (REGNO (src)))
4646 if (dest_code == REG && FP_REG_P (REGNO (dest)))
4647 switch (width)
4649 case 2:
4650 if (TARGET_ZFH)
4651 return "fmv.h\t%0,%1";
4652 return "fmv.s\t%0,%1";
4653 case 4:
4654 return "fmv.s\t%0,%1";
4655 case 8:
4656 return "fmv.d\t%0,%1";
4659 if (dest_code == MEM)
4660 switch (width)
4662 case 2:
4663 return "fsh\t%1,%0";
4664 case 4:
4665 return "fsw\t%1,%0";
4666 case 8:
4667 return "fsd\t%1,%0";
4670 if (dest_code == REG && FP_REG_P (REGNO (dest)))
4672 if (src_code == MEM)
4673 switch (width)
4675 case 2:
4676 return "flh\t%0,%1";
4677 case 4:
4678 return "flw\t%0,%1";
4679 case 8:
4680 return "fld\t%0,%1";
4683 if (src_code == CONST_DOUBLE && satisfies_constraint_zfli (src))
4684 switch (width)
4686 case 2:
4687 return "fli.h\t%0,%1";
4688 case 4:
4689 return "fli.s\t%0,%1";
4690 case 8:
4691 return "fli.d\t%0,%1";
4694 if (dest_code == REG && GP_REG_P (REGNO (dest)) && src_code == CONST_POLY_INT)
4696 /* We only want a single full vector register VLEN read after reload. */
4697 gcc_assert (known_eq (rtx_to_poly_int64 (src), BYTES_PER_RISCV_VECTOR));
4698 return "csrr\t%0,vlenb";
4700 gcc_unreachable ();
4703 const char *
4704 riscv_output_return ()
4706 if (cfun->machine->naked_p)
4707 return "";
4709 return "ret";
4713 /* Return true if CMP1 is a suitable second operand for integer ordering
4714 test CODE. See also the *sCC patterns in riscv.md. */
4716 static bool
4717 riscv_int_order_operand_ok_p (enum rtx_code code, rtx cmp1)
4719 switch (code)
4721 case GT:
4722 case GTU:
4723 return reg_or_0_operand (cmp1, VOIDmode);
4725 case GE:
4726 case GEU:
4727 return cmp1 == const1_rtx;
4729 case LT:
4730 case LTU:
4731 return arith_operand (cmp1, VOIDmode);
4733 case LE:
4734 return sle_operand (cmp1, VOIDmode);
4736 case LEU:
4737 return sleu_operand (cmp1, VOIDmode);
4739 default:
4740 gcc_unreachable ();
4744 /* Return true if *CMP1 (of mode MODE) is a valid second operand for
4745 integer ordering test *CODE, or if an equivalent combination can
4746 be formed by adjusting *CODE and *CMP1. When returning true, update
4747 *CODE and *CMP1 with the chosen code and operand, otherwise leave
4748 them alone. */
4750 static bool
4751 riscv_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1,
4752 machine_mode mode)
4754 HOST_WIDE_INT plus_one;
4756 if (riscv_int_order_operand_ok_p (*code, *cmp1))
4757 return true;
4759 if (CONST_INT_P (*cmp1))
4760 switch (*code)
4762 case LE:
4763 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
4764 if (INTVAL (*cmp1) < plus_one)
4766 *code = LT;
4767 *cmp1 = force_reg (mode, GEN_INT (plus_one));
4768 return true;
4770 break;
4772 case LEU:
4773 plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
4774 if (plus_one != 0)
4776 *code = LTU;
4777 *cmp1 = force_reg (mode, GEN_INT (plus_one));
4778 return true;
4780 break;
4782 default:
4783 break;
4785 return false;
4788 /* Compare CMP0 and CMP1 using ordering test CODE and store the result
4789 in TARGET. CMP0 and TARGET are register_operands. If INVERT_PTR
4790 is nonnull, it's OK to set TARGET to the inverse of the result and
4791 flip *INVERT_PTR instead. */
4793 static void
4794 riscv_emit_int_order_test (enum rtx_code code, bool *invert_ptr,
4795 rtx target, rtx cmp0, rtx cmp1)
4797 machine_mode mode;
4799 /* First see if there is a RISCV instruction that can do this operation.
4800 If not, try doing the same for the inverse operation. If that also
4801 fails, force CMP1 into a register and try again. */
4802 mode = GET_MODE (cmp0);
4803 if (riscv_canonicalize_int_order_test (&code, &cmp1, mode))
4804 riscv_emit_binary (code, target, cmp0, cmp1);
4805 else
4807 enum rtx_code inv_code = reverse_condition (code);
4808 if (!riscv_canonicalize_int_order_test (&inv_code, &cmp1, mode))
4810 cmp1 = force_reg (mode, cmp1);
4811 riscv_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1);
4813 else if (invert_ptr == 0)
4815 rtx inv_target = riscv_force_binary (word_mode,
4816 inv_code, cmp0, cmp1);
4817 riscv_emit_binary (EQ, target, inv_target, const0_rtx);
4819 else
4821 *invert_ptr = !*invert_ptr;
4822 riscv_emit_binary (inv_code, target, cmp0, cmp1);
4827 /* Return a register that is zero iff CMP0 and CMP1 are equal.
4828 The register will have the same mode as CMP0. */
4830 static rtx
4831 riscv_zero_if_equal (rtx cmp0, rtx cmp1)
4833 if (cmp1 == const0_rtx)
4834 return cmp0;
4836 return expand_binop (GET_MODE (cmp0), sub_optab,
4837 cmp0, cmp1, 0, 0, OPTAB_DIRECT);
4840 /* Helper function for riscv_extend_comparands to Sign-extend the OP.
4841 However if the OP is SI subreg promoted with an inner DI, such as
4842 (subreg/s/v:SI (reg/v:DI) 0)
4843 just peel off the SUBREG to get DI, avoiding extraneous extension. */
4845 static void
4846 riscv_sign_extend_if_not_subreg_prom (rtx *op)
4848 if (GET_CODE (*op) == SUBREG
4849 && SUBREG_PROMOTED_VAR_P (*op)
4850 && SUBREG_PROMOTED_SIGNED_P (*op)
4851 && (GET_MODE_SIZE (GET_MODE (XEXP (*op, 0))).to_constant ()
4852 == GET_MODE_SIZE (word_mode)))
4853 *op = XEXP (*op, 0);
4854 else
4855 *op = gen_rtx_SIGN_EXTEND (word_mode, *op);
4858 /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */
4860 static void
4861 riscv_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
4863 /* Comparisons consider all XLEN bits, so extend sub-XLEN values. */
4864 if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)).to_constant ())
4866 /* It is more profitable to zero-extend QImode values. But not if the
4867 first operand has already been sign-extended, and the second one is
4868 is a constant or has already been sign-extended also. */
4869 if (unsigned_condition (code) == code
4870 && (GET_MODE (*op0) == QImode
4871 && ! (GET_CODE (*op0) == SUBREG
4872 && SUBREG_PROMOTED_VAR_P (*op0)
4873 && SUBREG_PROMOTED_SIGNED_P (*op0)
4874 && (CONST_INT_P (*op1)
4875 || (GET_CODE (*op1) == SUBREG
4876 && SUBREG_PROMOTED_VAR_P (*op1)
4877 && SUBREG_PROMOTED_SIGNED_P (*op1))))))
4879 *op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0);
4880 if (CONST_INT_P (*op1))
4881 *op1 = GEN_INT ((uint8_t) INTVAL (*op1));
4882 else
4883 *op1 = gen_rtx_ZERO_EXTEND (word_mode, *op1);
4885 else
4887 riscv_sign_extend_if_not_subreg_prom (op0);
4889 if (*op1 != const0_rtx)
4890 riscv_sign_extend_if_not_subreg_prom (op1);
4895 /* Convert a comparison into something that can be used in a branch or
4896 conditional move. On entry, *OP0 and *OP1 are the values being
4897 compared and *CODE is the code used to compare them.
4899 Update *CODE, *OP0 and *OP1 so that they describe the final comparison.
4900 If NEED_EQ_NE_P, then only EQ or NE comparisons against zero are
4901 emitted. */
4903 static void
4904 riscv_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1,
4905 bool need_eq_ne_p = false)
4907 if (need_eq_ne_p)
4909 rtx cmp_op0 = *op0;
4910 rtx cmp_op1 = *op1;
4911 if (*code == EQ || *code == NE)
4913 *op0 = riscv_zero_if_equal (cmp_op0, cmp_op1);
4914 *op1 = const0_rtx;
4915 return;
4917 gcc_unreachable ();
4920 if (splittable_const_int_operand (*op1, VOIDmode))
4922 HOST_WIDE_INT rhs = INTVAL (*op1);
4924 if (*code == EQ || *code == NE)
4926 /* Convert e.g. OP0 == 2048 into OP0 - 2048 == 0. */
4927 if (SMALL_OPERAND (-rhs))
4929 *op0 = riscv_force_binary (GET_MODE (*op0), PLUS, *op0,
4930 GEN_INT (-rhs));
4931 *op1 = const0_rtx;
4934 else
4936 static const enum rtx_code mag_comparisons[][2] = {
4937 {LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE}
4940 /* Convert e.g. (OP0 <= 0xFFF) into (OP0 < 0x1000). */
4941 for (size_t i = 0; i < ARRAY_SIZE (mag_comparisons); i++)
4943 HOST_WIDE_INT new_rhs;
4944 bool increment = *code == mag_comparisons[i][0];
4945 bool decrement = *code == mag_comparisons[i][1];
4946 if (!increment && !decrement)
4947 continue;
4949 new_rhs = rhs + (increment ? 1 : -1);
4950 new_rhs = trunc_int_for_mode (new_rhs, GET_MODE (*op0));
4951 if (riscv_integer_cost (new_rhs, true) < riscv_integer_cost (rhs, true)
4952 && (rhs < 0) == (new_rhs < 0))
4954 *op1 = GEN_INT (new_rhs);
4955 *code = mag_comparisons[i][increment];
4957 break;
4962 riscv_extend_comparands (*code, op0, op1);
4964 *op0 = force_reg (word_mode, *op0);
4965 if (*op1 != const0_rtx)
4966 *op1 = force_reg (word_mode, *op1);
4969 /* Like riscv_emit_int_compare, but for floating-point comparisons. */
4971 static void
4972 riscv_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1,
4973 bool *invert_ptr = nullptr)
4975 rtx tmp0, tmp1, cmp_op0 = *op0, cmp_op1 = *op1;
4976 enum rtx_code fp_code = *code;
4977 *code = NE;
4979 switch (fp_code)
4981 case UNORDERED:
4982 *code = EQ;
4983 /* Fall through. */
4985 case ORDERED:
4986 /* a == a && b == b */
4987 tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0);
4988 tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1);
4989 *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1);
4990 *op1 = const0_rtx;
4991 break;
4993 case UNEQ:
4994 /* ordered(a, b) > (a == b) */
4995 *code = EQ;
4996 tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0);
4997 tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1);
4998 *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1);
4999 *op1 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op1);
5000 break;
5002 #define UNORDERED_COMPARISON(CODE, CMP) \
5003 case CODE: \
5004 *code = EQ; \
5005 *op0 = gen_reg_rtx (word_mode); \
5006 if (GET_MODE (cmp_op0) == SFmode && TARGET_64BIT) \
5007 emit_insn (gen_f##CMP##_quietsfdi4 (*op0, cmp_op0, cmp_op1)); \
5008 else if (GET_MODE (cmp_op0) == SFmode) \
5009 emit_insn (gen_f##CMP##_quietsfsi4 (*op0, cmp_op0, cmp_op1)); \
5010 else if (GET_MODE (cmp_op0) == DFmode && TARGET_64BIT) \
5011 emit_insn (gen_f##CMP##_quietdfdi4 (*op0, cmp_op0, cmp_op1)); \
5012 else if (GET_MODE (cmp_op0) == DFmode) \
5013 emit_insn (gen_f##CMP##_quietdfsi4 (*op0, cmp_op0, cmp_op1)); \
5014 else if (GET_MODE (cmp_op0) == HFmode && TARGET_64BIT) \
5015 emit_insn (gen_f##CMP##_quiethfdi4 (*op0, cmp_op0, cmp_op1)); \
5016 else if (GET_MODE (cmp_op0) == HFmode) \
5017 emit_insn (gen_f##CMP##_quiethfsi4 (*op0, cmp_op0, cmp_op1)); \
5018 else \
5019 gcc_unreachable (); \
5020 *op1 = const0_rtx; \
5021 break;
5023 case UNLT:
5024 std::swap (cmp_op0, cmp_op1);
5025 gcc_fallthrough ();
5027 UNORDERED_COMPARISON(UNGT, le)
5029 case UNLE:
5030 std::swap (cmp_op0, cmp_op1);
5031 gcc_fallthrough ();
5033 UNORDERED_COMPARISON(UNGE, lt)
5034 #undef UNORDERED_COMPARISON
5036 case NE:
5037 fp_code = EQ;
5038 if (invert_ptr != nullptr)
5039 *invert_ptr = !*invert_ptr;
5040 else
5042 cmp_op0 = riscv_force_binary (word_mode, fp_code, cmp_op0, cmp_op1);
5043 cmp_op1 = const0_rtx;
5045 gcc_fallthrough ();
5047 case EQ:
5048 case LE:
5049 case LT:
5050 case GE:
5051 case GT:
5052 /* We have instructions for these cases. */
5053 *code = fp_code;
5054 *op0 = cmp_op0;
5055 *op1 = cmp_op1;
5056 break;
5058 case LTGT:
5059 /* (a < b) | (a > b) */
5060 tmp0 = riscv_force_binary (word_mode, LT, cmp_op0, cmp_op1);
5061 tmp1 = riscv_force_binary (word_mode, GT, cmp_op0, cmp_op1);
5062 *op0 = riscv_force_binary (word_mode, IOR, tmp0, tmp1);
5063 *op1 = const0_rtx;
5064 break;
5066 default:
5067 gcc_unreachable ();
5071 /* CODE-compare OP0 and OP1. Store the result in TARGET. */
5073 void
5074 riscv_expand_int_scc (rtx target, enum rtx_code code, rtx op0, rtx op1, bool *invert_ptr)
5076 riscv_extend_comparands (code, &op0, &op1);
5077 op0 = force_reg (word_mode, op0);
5079 /* For sub-word targets on rv64, do the computation in DImode
5080 then extract the lowpart for the final target, marking it
5081 as sign extended. Note that it's also properly zero extended,
5082 but it's probably more profitable to expose it as sign extended. */
5083 rtx t;
5084 if (TARGET_64BIT && GET_MODE (target) == SImode)
5085 t = gen_reg_rtx (DImode);
5086 else
5087 t = target;
5089 if (code == EQ || code == NE)
5091 rtx zie = riscv_zero_if_equal (op0, op1);
5092 riscv_emit_binary (code, t, zie, const0_rtx);
5094 else
5095 riscv_emit_int_order_test (code, invert_ptr, t, op0, op1);
5097 if (t != target)
5099 t = gen_lowpart (SImode, t);
5100 SUBREG_PROMOTED_VAR_P (t) = 1;
5101 SUBREG_PROMOTED_SET (t, SRP_SIGNED);
5102 emit_move_insn (target, t);
5106 /* Like riscv_expand_int_scc, but for floating-point comparisons. */
5108 void
5109 riscv_expand_float_scc (rtx target, enum rtx_code code, rtx op0, rtx op1,
5110 bool *invert_ptr)
5112 riscv_emit_float_compare (&code, &op0, &op1, invert_ptr);
5114 machine_mode mode = GET_MODE (target);
5115 if (mode != word_mode)
5117 rtx cmp = riscv_force_binary (word_mode, code, op0, op1);
5118 riscv_emit_set (target, lowpart_subreg (mode, cmp, word_mode));
5120 else
5121 riscv_emit_binary (code, target, op0, op1);
5124 /* Jump to LABEL if (CODE OP0 OP1) holds. */
5126 void
5127 riscv_expand_conditional_branch (rtx label, rtx_code code, rtx op0, rtx op1)
5129 if (FLOAT_MODE_P (GET_MODE (op1)))
5130 riscv_emit_float_compare (&code, &op0, &op1);
5131 else
5132 riscv_emit_int_compare (&code, &op0, &op1);
5134 if (FLOAT_MODE_P (GET_MODE (op0)))
5136 op0 = riscv_force_binary (word_mode, code, op0, op1);
5137 op1 = const0_rtx;
5138 code = NE;
5141 rtx condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
5142 emit_jump_insn (gen_condjump (condition, label));
5145 /* Emit a cond move: If OP holds, move CONS to DEST; else move ALT to DEST.
5146 Return 0 if expansion failed. */
5148 bool
5149 riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
5151 machine_mode mode = GET_MODE (dest);
5152 rtx_code code = GET_CODE (op);
5153 rtx op0 = XEXP (op, 0);
5154 rtx op1 = XEXP (op, 1);
5156 if (((TARGET_ZICOND_LIKE
5157 || (arith_operand (cons, mode) && arith_operand (alt, mode)))
5158 && (GET_MODE_CLASS (mode) == MODE_INT))
5159 || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
5161 machine_mode mode0 = GET_MODE (op0);
5162 machine_mode mode1 = GET_MODE (op1);
5164 /* An integer comparison must be comparing WORD_MODE objects. We
5165 must enforce that so that we don't strip away a sign_extension
5166 thinking it is unnecessary. We might consider using
5167 riscv_extend_operands if they are not already properly extended. */
5168 if ((INTEGRAL_MODE_P (mode0) && mode0 != word_mode)
5169 || (INTEGRAL_MODE_P (mode1) && mode1 != word_mode))
5170 return false;
5172 /* In the fallback generic case use MODE rather than WORD_MODE for
5173 the output of the SCC instruction, to match the mode of the NEG
5174 operation below. The output of SCC is 0 or 1 boolean, so it is
5175 valid for input in any scalar integer mode. */
5176 rtx tmp = gen_reg_rtx ((TARGET_ZICOND_LIKE
5177 || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
5178 ? word_mode : mode);
5179 bool invert = false;
5181 /* Canonicalize the comparison. It must be an equality comparison
5182 of integer operands, or with SFB it can be any comparison of
5183 integer operands. If it isn't, then emit an SCC instruction
5184 so that we can then use an equality comparison against zero. */
5185 if ((!TARGET_SFB_ALU && !equality_operator (op, VOIDmode))
5186 || !INTEGRAL_MODE_P (mode0))
5188 bool *invert_ptr = nullptr;
5190 /* If riscv_expand_int_scc inverts the condition, then it will
5191 flip the value of INVERT. We need to know where so that
5192 we can adjust it for our needs. */
5193 if (code == LE || code == LEU || code == GE || code == GEU)
5194 invert_ptr = &invert;
5196 /* Emit an SCC-like instruction into a temporary so that we can
5197 use an EQ/NE comparison. We can support both FP and integer
5198 conditional moves. */
5199 if (INTEGRAL_MODE_P (mode0))
5200 riscv_expand_int_scc (tmp, code, op0, op1, invert_ptr);
5201 else if (FLOAT_MODE_P (mode0)
5202 && fp_scc_comparison (op, GET_MODE (op)))
5203 riscv_expand_float_scc (tmp, code, op0, op1, &invert);
5204 else
5205 return false;
5207 op = gen_rtx_fmt_ee (invert ? EQ : NE, mode, tmp, const0_rtx);
5209 /* We've generated a new comparison. Update the local variables. */
5210 code = GET_CODE (op);
5211 op0 = XEXP (op, 0);
5212 op1 = XEXP (op, 1);
5214 else if (!TARGET_ZICOND_LIKE && !TARGET_SFB_ALU && !TARGET_XTHEADCONDMOV)
5215 riscv_expand_int_scc (tmp, code, op0, op1, &invert);
5217 if (TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
5219 riscv_emit_int_compare (&code, &op0, &op1, !TARGET_SFB_ALU);
5220 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5222 /* The expander is a bit loose in its specification of the true
5223 arm of the conditional move. That allows us to support more
5224 cases for extensions which are more general than SFB. But
5225 does mean we need to force CONS into a register at this point. */
5226 cons = force_reg (mode, cons);
5227 /* With XTheadCondMov we need to force ALT into a register too. */
5228 alt = force_reg (mode, alt);
5229 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
5230 cons, alt)));
5231 return true;
5233 else if (!TARGET_ZICOND_LIKE)
5235 if (invert)
5236 std::swap (cons, alt);
5238 rtx reg1 = gen_reg_rtx (mode);
5239 rtx reg2 = gen_reg_rtx (mode);
5240 rtx reg3 = gen_reg_rtx (mode);
5241 rtx reg4 = gen_reg_rtx (mode);
5243 riscv_emit_unary (NEG, reg1, tmp);
5244 riscv_emit_binary (AND, reg2, reg1, cons);
5245 riscv_emit_unary (NOT, reg3, reg1);
5246 riscv_emit_binary (AND, reg4, reg3, alt);
5247 riscv_emit_binary (IOR, dest, reg2, reg4);
5248 return true;
5250 /* 0, reg or 0, imm */
5251 else if (cons == CONST0_RTX (mode)
5252 && (REG_P (alt)
5253 || (CONST_INT_P (alt) && alt != CONST0_RTX (mode))))
5255 riscv_emit_int_compare (&code, &op0, &op1, true);
5256 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5257 alt = force_reg (mode, alt);
5258 emit_insn (gen_rtx_SET (dest,
5259 gen_rtx_IF_THEN_ELSE (mode, cond,
5260 cons, alt)));
5261 return true;
5263 /* imm, imm */
5264 else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode)
5265 && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
5267 riscv_emit_int_compare (&code, &op0, &op1, true);
5268 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5269 HOST_WIDE_INT t = INTVAL (alt) - INTVAL (cons);
5270 alt = force_reg (mode, gen_int_mode (t, mode));
5271 emit_insn (gen_rtx_SET (dest,
5272 gen_rtx_IF_THEN_ELSE (mode, cond,
5273 CONST0_RTX (mode),
5274 alt)));
5275 /* CONS might not fit into a signed 12 bit immediate suitable
5276 for an addi instruction. If that's the case, force it
5277 into a register. */
5278 if (!SMALL_OPERAND (INTVAL (cons)))
5279 cons = force_reg (mode, cons);
5280 riscv_emit_binary (PLUS, dest, dest, cons);
5281 return true;
5283 /* imm, reg */
5284 else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode) && REG_P (alt))
5286 /* Optimize for register value of 0. */
5287 if (code == NE && rtx_equal_p (op0, alt) && op1 == CONST0_RTX (mode))
5289 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5290 cons = force_reg (mode, cons);
5291 emit_insn (gen_rtx_SET (dest,
5292 gen_rtx_IF_THEN_ELSE (mode, cond,
5293 cons, alt)));
5294 return true;
5297 riscv_emit_int_compare (&code, &op0, &op1, true);
5298 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5300 rtx temp1 = gen_reg_rtx (mode);
5301 rtx temp2 = gen_int_mode (-1 * INTVAL (cons), mode);
5303 /* TEMP2 and/or CONS might not fit into a signed 12 bit immediate
5304 suitable for an addi instruction. If that's the case, force it
5305 into a register. */
5306 if (!SMALL_OPERAND (INTVAL (temp2)))
5307 temp2 = force_reg (mode, temp2);
5308 if (!SMALL_OPERAND (INTVAL (cons)))
5309 cons = force_reg (mode, cons);
5311 riscv_emit_binary (PLUS, temp1, alt, temp2);
5312 emit_insn (gen_rtx_SET (dest,
5313 gen_rtx_IF_THEN_ELSE (mode, cond,
5314 CONST0_RTX (mode),
5315 temp1)));
5316 riscv_emit_binary (PLUS, dest, dest, cons);
5317 return true;
5319 /* reg, 0 or imm, 0 */
5320 else if ((REG_P (cons)
5321 || (CONST_INT_P (cons) && cons != CONST0_RTX (mode)))
5322 && alt == CONST0_RTX (mode))
5324 riscv_emit_int_compare (&code, &op0, &op1, true);
5325 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5326 cons = force_reg (mode, cons);
5327 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
5328 cons, alt)));
5329 return true;
5331 /* reg, imm */
5332 else if (REG_P (cons) && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
5334 /* Optimize for register value of 0. */
5335 if (code == EQ && rtx_equal_p (op0, cons) && op1 == CONST0_RTX (mode))
5337 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5338 alt = force_reg (mode, alt);
5339 emit_insn (gen_rtx_SET (dest,
5340 gen_rtx_IF_THEN_ELSE (mode, cond,
5341 cons, alt)));
5342 return true;
5345 riscv_emit_int_compare (&code, &op0, &op1, true);
5346 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5348 rtx temp1 = gen_reg_rtx (mode);
5349 rtx temp2 = gen_int_mode (-1 * INTVAL (alt), mode);
5351 /* TEMP2 and/or ALT might not fit into a signed 12 bit immediate
5352 suitable for an addi instruction. If that's the case, force it
5353 into a register. */
5354 if (!SMALL_OPERAND (INTVAL (temp2)))
5355 temp2 = force_reg (mode, temp2);
5356 if (!SMALL_OPERAND (INTVAL (alt)))
5357 alt = force_reg (mode, alt);
5359 riscv_emit_binary (PLUS, temp1, cons, temp2);
5360 emit_insn (gen_rtx_SET (dest,
5361 gen_rtx_IF_THEN_ELSE (mode, cond,
5362 temp1,
5363 CONST0_RTX (mode))));
5364 riscv_emit_binary (PLUS, dest, dest, alt);
5365 return true;
5367 /* reg, reg */
5368 else if (REG_P (cons) && REG_P (alt))
5370 if (((code == EQ && rtx_equal_p (cons, op0))
5371 || (code == NE && rtx_equal_p (alt, op0)))
5372 && op1 == CONST0_RTX (mode))
5374 rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5375 alt = force_reg (mode, alt);
5376 emit_insn (gen_rtx_SET (dest,
5377 gen_rtx_IF_THEN_ELSE (mode, cond,
5378 cons, alt)));
5379 return true;
5382 rtx reg1 = gen_reg_rtx (mode);
5383 rtx reg2 = gen_reg_rtx (mode);
5384 riscv_emit_int_compare (&code, &op0, &op1, true);
5385 rtx cond1 = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
5386 rtx cond2 = gen_rtx_fmt_ee (code == NE ? EQ : NE,
5387 GET_MODE (op0), op0, op1);
5388 emit_insn (gen_rtx_SET (reg2,
5389 gen_rtx_IF_THEN_ELSE (mode, cond2,
5390 CONST0_RTX (mode),
5391 cons)));
5392 emit_insn (gen_rtx_SET (reg1,
5393 gen_rtx_IF_THEN_ELSE (mode, cond1,
5394 CONST0_RTX (mode),
5395 alt)));
5396 riscv_emit_binary (PLUS, dest, reg1, reg2);
5397 return true;
5401 return false;
5404 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at
5405 least PARM_BOUNDARY bits of alignment, but will be given anything up
5406 to PREFERRED_STACK_BOUNDARY bits if the type requires it. */
5408 static unsigned int
5409 riscv_function_arg_boundary (machine_mode mode, const_tree type)
5411 unsigned int alignment;
5413 /* Use natural alignment if the type is not aggregate data. */
5414 if (type && !AGGREGATE_TYPE_P (type))
5415 alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
5416 else
5417 alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
5419 return MIN (PREFERRED_STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment));
5422 /* If MODE represents an argument that can be passed or returned in
5423 floating-point registers, return the number of registers, else 0. */
5425 static unsigned
5426 riscv_pass_mode_in_fpr_p (machine_mode mode)
5428 if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG)
5430 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5431 return 1;
5433 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5434 return 2;
5437 return 0;
5440 typedef struct {
5441 const_tree type;
5442 HOST_WIDE_INT offset;
5443 } riscv_aggregate_field;
5445 /* Identify subfields of aggregates that are candidates for passing in
5446 floating-point registers. */
5448 static int
5449 riscv_flatten_aggregate_field (const_tree type,
5450 riscv_aggregate_field fields[2],
5451 int n, HOST_WIDE_INT offset,
5452 bool ignore_zero_width_bit_field_p)
5454 switch (TREE_CODE (type))
5456 case RECORD_TYPE:
5457 /* Can't handle incomplete types nor sizes that are not fixed. */
5458 if (!COMPLETE_TYPE_P (type)
5459 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5460 || !tree_fits_uhwi_p (TYPE_SIZE (type)))
5461 return -1;
5463 for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
5464 if (TREE_CODE (f) == FIELD_DECL)
5466 if (!TYPE_P (TREE_TYPE (f)))
5467 return -1;
5469 /* The C++ front end strips zero-length bit-fields from structs.
5470 So we need to ignore them in the C front end to make C code
5471 compatible with C++ code. */
5472 if (ignore_zero_width_bit_field_p
5473 && DECL_BIT_FIELD (f)
5474 && (DECL_SIZE (f) == NULL_TREE
5475 || integer_zerop (DECL_SIZE (f))))
5477 else
5479 HOST_WIDE_INT pos = offset + int_byte_position (f);
5480 n = riscv_flatten_aggregate_field (TREE_TYPE (f),
5481 fields, n, pos,
5482 ignore_zero_width_bit_field_p);
5484 if (n < 0)
5485 return -1;
5487 return n;
5489 case ARRAY_TYPE:
5491 HOST_WIDE_INT n_elts;
5492 riscv_aggregate_field subfields[2];
5493 tree index = TYPE_DOMAIN (type);
5494 tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
5495 int n_subfields = riscv_flatten_aggregate_field (TREE_TYPE (type),
5496 subfields, 0, offset,
5497 ignore_zero_width_bit_field_p);
5499 /* Can't handle incomplete types nor sizes that are not fixed. */
5500 if (n_subfields <= 0
5501 || !COMPLETE_TYPE_P (type)
5502 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5503 || !index
5504 || !TYPE_MAX_VALUE (index)
5505 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5506 || !TYPE_MIN_VALUE (index)
5507 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5508 || !tree_fits_uhwi_p (elt_size))
5509 return -1;
5511 n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5512 - tree_to_uhwi (TYPE_MIN_VALUE (index));
5513 gcc_assert (n_elts >= 0);
5515 for (HOST_WIDE_INT i = 0; i < n_elts; i++)
5516 for (int j = 0; j < n_subfields; j++)
5518 if (n >= 2)
5519 return -1;
5521 fields[n] = subfields[j];
5522 fields[n++].offset += i * tree_to_uhwi (elt_size);
5525 return n;
5528 case COMPLEX_TYPE:
5530 /* Complex type need consume 2 field, so n must be 0. */
5531 if (n != 0)
5532 return -1;
5534 HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type))).to_constant ();
5536 if (elt_size <= UNITS_PER_FP_ARG)
5538 fields[0].type = TREE_TYPE (type);
5539 fields[0].offset = offset;
5540 fields[1].type = TREE_TYPE (type);
5541 fields[1].offset = offset + elt_size;
5543 return 2;
5546 return -1;
5549 default:
5550 if (n < 2
5551 && ((SCALAR_FLOAT_TYPE_P (type)
5552 && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_FP_ARG)
5553 || (INTEGRAL_TYPE_P (type)
5554 && GET_MODE_SIZE (TYPE_MODE (type)).to_constant () <= UNITS_PER_WORD)))
5556 fields[n].type = type;
5557 fields[n].offset = offset;
5558 return n + 1;
5560 else
5561 return -1;
5565 /* Identify candidate aggregates for passing in floating-point registers.
5566 Candidates have at most two fields after flattening. */
5568 static int
5569 riscv_flatten_aggregate_argument (const_tree type,
5570 riscv_aggregate_field fields[2],
5571 bool ignore_zero_width_bit_field_p)
5573 if (!type || TREE_CODE (type) != RECORD_TYPE)
5574 return -1;
5576 return riscv_flatten_aggregate_field (type, fields, 0, 0,
5577 ignore_zero_width_bit_field_p);
5580 /* See whether TYPE is a record whose fields should be returned in one or
5581 two floating-point registers. If so, populate FIELDS accordingly. */
5583 static unsigned
5584 riscv_pass_aggregate_in_fpr_pair_p (const_tree type,
5585 riscv_aggregate_field fields[2])
5587 static int warned = 0;
5589 /* This is the old ABI, which differs for C++ and C. */
5590 int n_old = riscv_flatten_aggregate_argument (type, fields, false);
5591 for (int i = 0; i < n_old; i++)
5592 if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
5594 n_old = -1;
5595 break;
5598 /* This is the new ABI, which is the same for C++ and C. */
5599 int n_new = riscv_flatten_aggregate_argument (type, fields, true);
5600 for (int i = 0; i < n_new; i++)
5601 if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
5603 n_new = -1;
5604 break;
5607 if ((n_old != n_new) && (warned == 0))
5609 warning (OPT_Wpsabi, "ABI for flattened struct with zero-length "
5610 "bit-fields changed in GCC 10");
5611 warned = 1;
5614 return n_new > 0 ? n_new : 0;
5617 /* See whether TYPE is a record whose fields should be returned in one or
5618 floating-point register and one integer register. If so, populate
5619 FIELDS accordingly. */
5621 static bool
5622 riscv_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
5623 riscv_aggregate_field fields[2])
5625 static int warned = 0;
5627 /* This is the old ABI, which differs for C++ and C. */
5628 unsigned num_int_old = 0, num_float_old = 0;
5629 int n_old = riscv_flatten_aggregate_argument (type, fields, false);
5630 for (int i = 0; i < n_old; i++)
5632 num_float_old += SCALAR_FLOAT_TYPE_P (fields[i].type);
5633 num_int_old += INTEGRAL_TYPE_P (fields[i].type);
5636 /* This is the new ABI, which is the same for C++ and C. */
5637 unsigned num_int_new = 0, num_float_new = 0;
5638 int n_new = riscv_flatten_aggregate_argument (type, fields, true);
5639 for (int i = 0; i < n_new; i++)
5641 num_float_new += SCALAR_FLOAT_TYPE_P (fields[i].type);
5642 num_int_new += INTEGRAL_TYPE_P (fields[i].type);
5645 if (((num_int_old == 1 && num_float_old == 1
5646 && (num_int_old != num_int_new || num_float_old != num_float_new))
5647 || (num_int_new == 1 && num_float_new == 1
5648 && (num_int_old != num_int_new || num_float_old != num_float_new)))
5649 && (warned == 0))
5651 warning (OPT_Wpsabi, "ABI for flattened struct with zero-length "
5652 "bit-fields changed in GCC 10");
5653 warned = 1;
5656 return num_int_new == 1 && num_float_new == 1;
5659 /* Return the representation of an argument passed or returned in an FPR
5660 when the value has mode VALUE_MODE and the type has TYPE_MODE. The
5661 two modes may be different for structures like:
5663 struct __attribute__((packed)) foo { float f; }
5665 where the SFmode value "f" is passed in REGNO but the struct itself
5666 has mode BLKmode. */
5668 static rtx
5669 riscv_pass_fpr_single (machine_mode type_mode, unsigned regno,
5670 machine_mode value_mode,
5671 HOST_WIDE_INT offset)
5673 rtx x = gen_rtx_REG (value_mode, regno);
5675 if (type_mode != value_mode)
5677 x = gen_rtx_EXPR_LIST (VOIDmode, x, GEN_INT (offset));
5678 x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x));
5680 return x;
5683 /* Pass or return a composite value in the FPR pair REGNO and REGNO + 1.
5684 MODE is the mode of the composite. MODE1 and OFFSET1 are the mode and
5685 byte offset for the first value, likewise MODE2 and OFFSET2 for the
5686 second value. */
5688 static rtx
5689 riscv_pass_fpr_pair (machine_mode mode, unsigned regno1,
5690 machine_mode mode1, HOST_WIDE_INT offset1,
5691 unsigned regno2, machine_mode mode2,
5692 HOST_WIDE_INT offset2)
5694 return gen_rtx_PARALLEL
5695 (mode,
5696 gen_rtvec (2,
5697 gen_rtx_EXPR_LIST (VOIDmode,
5698 gen_rtx_REG (mode1, regno1),
5699 GEN_INT (offset1)),
5700 gen_rtx_EXPR_LIST (VOIDmode,
5701 gen_rtx_REG (mode2, regno2),
5702 GEN_INT (offset2))));
5705 static rtx
5706 riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode,
5707 unsigned gpr_base)
5709 gcc_assert (riscv_v_ext_vls_mode_p (mode));
5711 unsigned count = 0;
5712 unsigned regnum = 0;
5713 machine_mode gpr_mode = VOIDmode;
5714 unsigned vls_size = GET_MODE_SIZE (mode).to_constant ();
5715 unsigned gpr_size = GET_MODE_SIZE (Xmode);
5717 if (IN_RANGE (vls_size, 0, gpr_size * 2))
5719 count = riscv_v_vls_mode_aggregate_gpr_count (vls_size, gpr_size);
5721 if (count + info->gpr_offset <= MAX_ARGS_IN_REGISTERS)
5723 regnum = gpr_base + info->gpr_offset;
5724 info->num_gprs = count;
5725 gpr_mode = riscv_v_vls_to_gpr_mode (vls_size);
5729 if (!regnum)
5730 return NULL_RTX; /* Return NULL_RTX if we cannot find a suitable reg. */
5732 gcc_assert (gpr_mode != VOIDmode);
5734 rtx reg = gen_rtx_REG (gpr_mode, regnum);
5735 rtx x = gen_rtx_EXPR_LIST (VOIDmode, reg, CONST0_RTX (gpr_mode));
5737 return gen_rtx_PARALLEL (mode, gen_rtvec (1, x));
5740 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5741 for a call to a function whose data type is FNTYPE.
5742 For a library call, FNTYPE is 0. */
5744 void
5745 riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx, tree, int)
5747 memset (cum, 0, sizeof (*cum));
5749 if (fntype)
5750 cum->variant_cc = (riscv_cc) fntype_abi (fntype).id ();
5751 else
5752 cum->variant_cc = RISCV_CC_BASE;
5755 /* Return true if TYPE is a vector type that can be passed in vector registers.
5758 static bool
5759 riscv_vector_type_p (const_tree type)
5761 /* Currently, only builtin scalable vector type is allowed, in the future,
5762 more vector types may be allowed, such as GNU vector type, etc. */
5763 return riscv_vector::builtin_type_p (type);
5766 static unsigned int
5767 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode);
5769 /* Subroutine of riscv_get_arg_info. */
5771 static rtx
5772 riscv_get_vector_arg (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
5773 machine_mode mode, bool return_p)
5775 gcc_assert (riscv_v_ext_mode_p (mode));
5777 info->mr_offset = cum->num_mrs;
5778 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
5780 /* For scalable mask return value. */
5781 if (return_p)
5782 return gen_rtx_REG (mode, V_REG_FIRST);
5784 /* For the first scalable mask argument. */
5785 if (info->mr_offset < MAX_ARGS_IN_MASK_REGISTERS)
5787 info->num_mrs = 1;
5788 return gen_rtx_REG (mode, V_REG_FIRST);
5790 else
5792 /* Rest scalable mask arguments are treated as scalable data
5793 arguments. */
5797 /* The number and alignment of vector registers need for this scalable vector
5798 argument. When the mode size is less than a full vector, we use 1 vector
5799 register to pass. Just call TARGET_HARD_REGNO_NREGS for the number
5800 information. */
5801 int nregs = riscv_hard_regno_nregs (V_ARG_FIRST, mode);
5802 int LMUL = riscv_v_ext_tuple_mode_p (mode)
5803 ? nregs / riscv_vector::get_nf (mode)
5804 : nregs;
5805 int arg_reg_start = V_ARG_FIRST - V_REG_FIRST;
5806 int arg_reg_end = V_ARG_LAST - V_REG_FIRST;
5807 int aligned_reg_start = ROUND_UP (arg_reg_start, LMUL);
5809 /* For scalable data and scalable tuple return value. */
5810 if (return_p)
5811 return gen_rtx_REG (mode, aligned_reg_start + V_REG_FIRST);
5813 /* Iterate through the USED_VRS array to find vector register groups that have
5814 not been allocated and the first register is aligned with LMUL. */
5815 for (int i = aligned_reg_start; i + nregs - 1 <= arg_reg_end; i += LMUL)
5817 /* The index in USED_VRS array. */
5818 int idx = i - arg_reg_start;
5819 /* Find the first register unused. */
5820 if (!cum->used_vrs[idx])
5822 bool find_set = true;
5823 /* Ensure there are NREGS continuous unused registers. */
5824 for (int j = 1; j < nregs; j++)
5825 if (cum->used_vrs[idx + j])
5827 find_set = false;
5828 /* Update I to the last aligned register which
5829 cannot be used and the next iteration will add
5830 LMUL step to I. */
5831 i += (j / LMUL) * LMUL;
5832 break;
5835 if (find_set)
5837 info->num_vrs = nregs;
5838 info->vr_offset = idx;
5839 return gen_rtx_REG (mode, i + V_REG_FIRST);
5844 return NULL_RTX;
5847 /* Fill INFO with information about a single argument, and return an RTL
5848 pattern to pass or return the argument. Return NULL_RTX if argument cannot
5849 pass or return in registers, then the argument may be passed by reference or
5850 through the stack or . CUM is the cumulative state for earlier arguments.
5851 MODE is the mode of this argument and TYPE is its type (if known). NAMED is
5852 true if this is a named (fixed) argument rather than a variable one. RETURN_P
5853 is true if returning the argument, or false if passing the argument. */
5855 static rtx
5856 riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
5857 machine_mode mode, const_tree type, bool named,
5858 bool return_p)
5860 unsigned num_bytes, num_words;
5861 unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST;
5862 unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST;
5863 unsigned alignment = riscv_function_arg_boundary (mode, type);
5865 memset (info, 0, sizeof (*info));
5866 info->gpr_offset = cum->num_gprs;
5867 info->fpr_offset = cum->num_fprs;
5869 /* Passed by reference when the scalable vector argument is anonymous. */
5870 if (riscv_v_ext_mode_p (mode) && !named)
5871 return NULL_RTX;
5873 if (named)
5875 riscv_aggregate_field fields[2];
5876 unsigned fregno = fpr_base + info->fpr_offset;
5877 unsigned gregno = gpr_base + info->gpr_offset;
5879 /* Pass one- or two-element floating-point aggregates in FPRs. */
5880 if ((info->num_fprs = riscv_pass_aggregate_in_fpr_pair_p (type, fields))
5881 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
5882 switch (info->num_fprs)
5884 case 1:
5885 return riscv_pass_fpr_single (mode, fregno,
5886 TYPE_MODE (fields[0].type),
5887 fields[0].offset);
5889 case 2:
5890 return riscv_pass_fpr_pair (mode, fregno,
5891 TYPE_MODE (fields[0].type),
5892 fields[0].offset,
5893 fregno + 1,
5894 TYPE_MODE (fields[1].type),
5895 fields[1].offset);
5897 default:
5898 gcc_unreachable ();
5901 /* Pass real and complex floating-point numbers in FPRs. */
5902 if ((info->num_fprs = riscv_pass_mode_in_fpr_p (mode))
5903 && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
5904 switch (GET_MODE_CLASS (mode))
5906 case MODE_FLOAT:
5907 return gen_rtx_REG (mode, fregno);
5909 case MODE_COMPLEX_FLOAT:
5910 return riscv_pass_fpr_pair (mode, fregno, GET_MODE_INNER (mode), 0,
5911 fregno + 1, GET_MODE_INNER (mode),
5912 GET_MODE_UNIT_SIZE (mode));
5914 default:
5915 gcc_unreachable ();
5918 /* Pass structs with one float and one integer in an FPR and a GPR. */
5919 if (riscv_pass_aggregate_in_fpr_and_gpr_p (type, fields)
5920 && info->gpr_offset < MAX_ARGS_IN_REGISTERS
5921 && info->fpr_offset < MAX_ARGS_IN_REGISTERS)
5923 info->num_gprs = 1;
5924 info->num_fprs = 1;
5926 if (!SCALAR_FLOAT_TYPE_P (fields[0].type))
5927 std::swap (fregno, gregno);
5929 return riscv_pass_fpr_pair (mode, fregno, TYPE_MODE (fields[0].type),
5930 fields[0].offset,
5931 gregno, TYPE_MODE (fields[1].type),
5932 fields[1].offset);
5935 /* For scalable vector argument. */
5936 if (riscv_vector_type_p (type) && riscv_v_ext_mode_p (mode))
5937 return riscv_get_vector_arg (info, cum, mode, return_p);
5939 /* For vls mode aggregated in gpr. */
5940 if (riscv_v_ext_vls_mode_p (mode))
5941 return riscv_pass_vls_aggregate_in_gpr (info, mode, gpr_base);
5944 /* Work out the size of the argument. */
5945 num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode).to_constant ();
5946 num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5948 /* Doubleword-aligned varargs start on an even register boundary. */
5949 if (!named && num_bytes != 0 && alignment > BITS_PER_WORD)
5950 info->gpr_offset += info->gpr_offset & 1;
5952 /* Partition the argument between registers and stack. */
5953 info->num_fprs = 0;
5954 info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset);
5955 info->stack_p = (num_words - info->num_gprs) != 0;
5957 if (info->num_gprs || return_p)
5958 return gen_rtx_REG (mode, gpr_base + info->gpr_offset);
5960 return NULL_RTX;
5963 /* Implement TARGET_FUNCTION_ARG. */
5965 static rtx
5966 riscv_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
5968 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5969 struct riscv_arg_info info;
5971 if (arg.end_marker_p ())
5972 /* Return the calling convention that used by the current function. */
5973 return gen_int_mode (cum->variant_cc, SImode);
5975 return riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5978 /* Implement TARGET_FUNCTION_ARG_ADVANCE. */
5980 static void
5981 riscv_function_arg_advance (cumulative_args_t cum_v,
5982 const function_arg_info &arg)
5984 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5985 struct riscv_arg_info info;
5987 riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
5989 /* Set the corresponding register in USED_VRS to used status. */
5990 for (unsigned int i = 0; i < info.num_vrs; i++)
5992 gcc_assert (!cum->used_vrs[info.vr_offset + i]);
5993 cum->used_vrs[info.vr_offset + i] = true;
5996 if ((info.num_vrs > 0 || info.num_mrs > 0) && cum->variant_cc != RISCV_CC_V)
5998 error ("RVV type %qT cannot be passed to an unprototyped function",
5999 arg.type);
6000 /* Avoid repeating the message */
6001 cum->variant_cc = RISCV_CC_V;
6004 /* Advance the register count. This has the effect of setting
6005 num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned
6006 argument required us to skip the final GPR and pass the whole
6007 argument on the stack. */
6008 cum->num_fprs = info.fpr_offset + info.num_fprs;
6009 cum->num_gprs = info.gpr_offset + info.num_gprs;
6010 cum->num_mrs = info.mr_offset + info.num_mrs;
6013 /* Implement TARGET_ARG_PARTIAL_BYTES. */
6015 static int
6016 riscv_arg_partial_bytes (cumulative_args_t cum,
6017 const function_arg_info &generic_arg)
6019 struct riscv_arg_info arg;
6021 riscv_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode,
6022 generic_arg.type, generic_arg.named, false);
6023 return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
6026 /* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls,
6027 VALTYPE is the return type and MODE is VOIDmode. For libcalls,
6028 VALTYPE is null and MODE is the mode of the return value. */
6031 riscv_function_value (const_tree type, const_tree func, machine_mode mode)
6033 struct riscv_arg_info info;
6034 CUMULATIVE_ARGS args;
6036 if (type)
6038 int unsigned_p = TYPE_UNSIGNED (type);
6040 mode = TYPE_MODE (type);
6042 /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
6043 return values, promote the mode here too. */
6044 mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
6047 memset (&args, 0, sizeof args);
6049 return riscv_get_arg_info (&info, &args, mode, type, true, true);
6052 /* Implement TARGET_PASS_BY_REFERENCE. */
6054 static bool
6055 riscv_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
6057 HOST_WIDE_INT size = arg.type_size_in_bytes ().to_constant ();;
6058 struct riscv_arg_info info;
6059 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6061 /* ??? std_gimplify_va_arg_expr passes NULL for cum. Fortunately, we
6062 never pass variadic arguments in floating-point and vector registers,
6063 so we can avoid the call to riscv_get_arg_info in this case. */
6064 if (cum != NULL)
6066 riscv_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false);
6068 /* Don't pass by reference if we can use a floating-point register. */
6069 if (info.num_fprs)
6070 return false;
6072 /* Don't pass by reference if we can use general register(s) for vls. */
6073 if (info.num_gprs && riscv_v_ext_vls_mode_p (arg.mode))
6074 return false;
6076 /* Don't pass by reference if we can use vector register groups. */
6077 if (info.num_vrs > 0 || info.num_mrs > 0)
6078 return false;
6081 /* Passed by reference when:
6082 1. The scalable vector argument is anonymous.
6083 2. Args cannot be passed through vector registers. */
6084 if (riscv_v_ext_mode_p (arg.mode))
6085 return true;
6087 /* Pass by reference if the data do not fit in two integer registers. */
6088 return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD);
6091 /* Implement TARGET_RETURN_IN_MEMORY. */
6093 static bool
6094 riscv_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
6096 CUMULATIVE_ARGS args;
6097 cumulative_args_t cum = pack_cumulative_args (&args);
6099 /* The rules for returning in memory are the same as for passing the
6100 first named argument by reference. */
6101 memset (&args, 0, sizeof args);
6102 function_arg_info arg (const_cast<tree> (type), /*named=*/true);
6103 return riscv_pass_by_reference (cum, arg);
6106 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
6108 static void
6109 riscv_setup_incoming_varargs (cumulative_args_t cum,
6110 const function_arg_info &arg,
6111 int *pretend_size ATTRIBUTE_UNUSED, int no_rtl)
6113 CUMULATIVE_ARGS local_cum;
6114 int gp_saved;
6116 /* The caller has advanced CUM up to, but not beyond, the last named
6117 argument. Advance a local copy of CUM past the last "real" named
6118 argument, to find out how many registers are left over. */
6119 local_cum = *get_cumulative_args (cum);
6120 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
6121 || arg.type != NULL_TREE)
6122 riscv_function_arg_advance (pack_cumulative_args (&local_cum), arg);
6124 /* Found out how many registers we need to save. */
6125 gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
6127 if (!no_rtl && gp_saved > 0)
6129 rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6130 REG_PARM_STACK_SPACE (cfun->decl)
6131 - gp_saved * UNITS_PER_WORD);
6132 rtx mem = gen_frame_mem (BLKmode, ptr);
6133 set_mem_alias_set (mem, get_varargs_alias_set ());
6135 move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST,
6136 mem, gp_saved);
6138 if (REG_PARM_STACK_SPACE (cfun->decl) == 0)
6139 cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD;
6142 /* Return the descriptor of the Standard Vector Calling Convention Variant. */
6144 static const predefined_function_abi &
6145 riscv_v_abi ()
6147 predefined_function_abi &v_abi = function_abis[RISCV_CC_V];
6148 if (!v_abi.initialized_p ())
6150 HARD_REG_SET full_reg_clobbers
6151 = default_function_abi.full_reg_clobbers ();
6152 /* Callee-saved vector registers: v1-v7, v24-v31. */
6153 for (int regno = V_REG_FIRST + 1; regno <= V_REG_FIRST + 7; regno += 1)
6154 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
6155 for (int regno = V_REG_FIRST + 24; regno <= V_REG_FIRST + 31; regno += 1)
6156 CLEAR_HARD_REG_BIT (full_reg_clobbers, regno);
6157 v_abi.initialize (RISCV_CC_V, full_reg_clobbers);
6159 return v_abi;
6162 static bool
6163 riscv_vector_int_type_p (const_tree type)
6165 machine_mode mode = TYPE_MODE (type);
6167 if (VECTOR_MODE_P (mode))
6168 return INTEGRAL_MODE_P (GET_MODE_INNER (mode));
6170 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
6171 return strstr (name, "int") != NULL || strstr (name, "uint") != NULL;
6174 bool
6175 riscv_vector_float_type_p (const_tree type)
6177 if (!riscv_vector_type_p (type))
6178 return false;
6180 machine_mode mode = TYPE_MODE (type);
6182 if (VECTOR_MODE_P (mode))
6183 return FLOAT_MODE_P (GET_MODE_INNER (mode));
6185 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
6186 return strstr (name, "vfloat") != NULL;
6189 static int
6190 riscv_vector_element_bitsize (const_tree type)
6192 machine_mode mode = TYPE_MODE (type);
6194 if (VECTOR_MODE_P (mode))
6195 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
6197 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
6199 if (strstr (name, "bool") != NULL)
6200 return 1;
6201 else if (strstr (name, "int8") != NULL)
6202 return 8;
6203 else if (strstr (name, "int16") != NULL || strstr (name, "float16") != NULL)
6204 return 16;
6205 else if (strstr (name, "int32") != NULL || strstr (name, "float32") != NULL)
6206 return 32;
6207 else if (strstr (name, "int64") != NULL || strstr (name, "float64") != NULL)
6208 return 64;
6210 gcc_unreachable ();
6213 static int
6214 riscv_vector_required_min_vlen (const_tree type)
6216 machine_mode mode = TYPE_MODE (type);
6218 if (riscv_v_ext_mode_p (mode))
6219 return TARGET_MIN_VLEN;
6221 int element_bitsize = riscv_vector_element_bitsize (type);
6222 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
6224 if (strstr (name, "bool64") != NULL)
6225 return element_bitsize * 64;
6226 else if (strstr (name, "bool32") != NULL)
6227 return element_bitsize * 32;
6228 else if (strstr (name, "bool16") != NULL)
6229 return element_bitsize * 16;
6230 else if (strstr (name, "bool8") != NULL)
6231 return element_bitsize * 8;
6232 else if (strstr (name, "bool4") != NULL)
6233 return element_bitsize * 4;
6234 else if (strstr (name, "bool2") != NULL)
6235 return element_bitsize * 2;
6237 if (strstr (name, "mf8") != NULL)
6238 return element_bitsize * 8;
6239 else if (strstr (name, "mf4") != NULL)
6240 return element_bitsize * 4;
6241 else if (strstr (name, "mf2") != NULL)
6242 return element_bitsize * 2;
6244 return element_bitsize;
6247 static void
6248 riscv_validate_vector_type (const_tree type, const char *hint)
6250 gcc_assert (riscv_vector_type_p (type));
6252 if (!TARGET_VECTOR)
6254 error_at (input_location, "%s %qT requires the V ISA extension",
6255 hint, type);
6256 return;
6259 int element_bitsize = riscv_vector_element_bitsize (type);
6260 bool int_type_p = riscv_vector_int_type_p (type);
6262 if (int_type_p && element_bitsize == 64
6263 && !TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags))
6265 error_at (input_location,
6266 "%s %qT requires the zve64x, zve64f, zve64d or v ISA extension",
6267 hint, type);
6268 return;
6271 bool float_type_p = riscv_vector_float_type_p (type);
6273 if (float_type_p && element_bitsize == 16
6274 && (!TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags)
6275 && !TARGET_VECTOR_ELEN_BF_16_P (riscv_vector_elen_flags)))
6277 const char *name = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
6278 if (strstr (name, "vfloat"))
6279 error_at (input_location,
6280 "%s %qT requires the zvfhmin or zvfh ISA extension",
6281 hint, type);
6282 return;
6285 if (float_type_p && element_bitsize == 32
6286 && !TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags))
6288 error_at (input_location,
6289 "%s %qT requires the zve32f, zve64f, zve64d or v ISA extension",
6290 hint, type);
6291 return;
6294 if (float_type_p && element_bitsize == 64
6295 && !TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags))
6297 error_at (input_location,
6298 "%s %qT requires the zve64d or v ISA extension", hint, type);
6299 return;
6302 int required_min_vlen = riscv_vector_required_min_vlen (type);
6304 if (TARGET_MIN_VLEN < required_min_vlen)
6306 error_at (
6307 input_location,
6308 "%s %qT requires the minimal vector length %qd but %qd is given",
6309 hint, type, required_min_vlen, TARGET_MIN_VLEN);
6310 return;
6314 /* Return true if a function with type FNTYPE returns its value in
6315 RISC-V V registers. */
6317 static bool
6318 riscv_return_value_is_vector_type_p (const_tree fntype)
6320 tree return_type = TREE_TYPE (fntype);
6322 if (riscv_vector_type_p (return_type))
6324 riscv_validate_vector_type (return_type, "return type");
6325 return true;
6327 else
6328 return false;
6331 /* Return true if a function with type FNTYPE takes arguments in
6332 RISC-V V registers. */
6334 static bool
6335 riscv_arguments_is_vector_type_p (const_tree fntype)
6337 for (tree chain = TYPE_ARG_TYPES (fntype); chain && chain != void_list_node;
6338 chain = TREE_CHAIN (chain))
6340 tree arg_type = TREE_VALUE (chain);
6341 if (riscv_vector_type_p (arg_type))
6343 riscv_validate_vector_type (arg_type, "argument type");
6344 return true;
6348 return false;
6351 /* Return true if FUNC is a riscv_vector_cc function.
6352 For more details please reference the below link.
6353 https://github.com/riscv-non-isa/riscv-c-api-doc/pull/67 */
6354 static bool
6355 riscv_vector_cc_function_p (const_tree fntype)
6357 tree attr = TYPE_ATTRIBUTES (fntype);
6358 bool vector_cc_p = lookup_attribute ("vector_cc", attr) != NULL_TREE
6359 || lookup_attribute ("riscv_vector_cc", attr) != NULL_TREE;
6361 if (vector_cc_p && !TARGET_VECTOR)
6362 error_at (input_location,
6363 "function attribute %qs requires the V ISA extension",
6364 "riscv_vector_cc");
6366 return vector_cc_p;
6369 /* Implement TARGET_FNTYPE_ABI. */
6371 static const predefined_function_abi &
6372 riscv_fntype_abi (const_tree fntype)
6374 /* Implement the vector calling convention. For more details please
6375 reference the below link.
6376 https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/389 */
6377 if (riscv_return_value_is_vector_type_p (fntype)
6378 || riscv_arguments_is_vector_type_p (fntype)
6379 || riscv_vector_cc_function_p (fntype))
6380 return riscv_v_abi ();
6382 return default_function_abi;
6385 /* Return riscv calling convention of call_insn. */
6386 riscv_cc
6387 get_riscv_cc (const rtx use)
6389 gcc_assert (GET_CODE (use) == USE);
6390 rtx unspec = XEXP (use, 0);
6391 gcc_assert (GET_CODE (unspec) == UNSPEC
6392 && XINT (unspec, 1) == UNSPEC_CALLEE_CC);
6393 riscv_cc cc = (riscv_cc) INTVAL (XVECEXP (unspec, 0, 0));
6394 gcc_assert (cc < RISCV_CC_UNKNOWN);
6395 return cc;
6398 /* Implement TARGET_INSN_CALLEE_ABI. */
6400 const predefined_function_abi &
6401 riscv_insn_callee_abi (const rtx_insn *insn)
6403 rtx pat = PATTERN (insn);
6404 gcc_assert (GET_CODE (pat) == PARALLEL);
6405 riscv_cc cc = get_riscv_cc (XVECEXP (pat, 0, 1));
6406 return function_abis[cc];
6409 /* Handle an attribute requiring a FUNCTION_DECL;
6410 arguments as in struct attribute_spec.handler. */
6411 static tree
6412 riscv_handle_fndecl_attribute (tree *node, tree name,
6413 tree args ATTRIBUTE_UNUSED,
6414 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6416 if (TREE_CODE (*node) != FUNCTION_DECL)
6418 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6419 name);
6420 *no_add_attrs = true;
6423 return NULL_TREE;
6426 /* Verify type based attributes. NODE is the what the attribute is being
6427 applied to. NAME is the attribute name. ARGS are the attribute args.
6428 FLAGS gives info about the context. NO_ADD_ATTRS should be set to true if
6429 the attribute should be ignored. */
6431 static tree
6432 riscv_handle_type_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6433 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6435 /* Check for an argument. */
6436 if (is_attribute_p ("interrupt", name))
6438 if (args)
6440 tree cst = TREE_VALUE (args);
6441 const char *string;
6443 if (TREE_CODE (cst) != STRING_CST)
6445 warning (OPT_Wattributes,
6446 "%qE attribute requires a string argument",
6447 name);
6448 *no_add_attrs = true;
6449 return NULL_TREE;
6452 string = TREE_STRING_POINTER (cst);
6453 if (strcmp (string, "user") && strcmp (string, "supervisor")
6454 && strcmp (string, "machine"))
6456 warning (OPT_Wattributes,
6457 "argument to %qE attribute is not %<\"user\"%>, %<\"supervisor\"%>, "
6458 "or %<\"machine\"%>", name);
6459 *no_add_attrs = true;
6464 return NULL_TREE;
6467 static tree
6468 riscv_handle_rvv_vector_bits_attribute (tree *node, tree name, tree args,
6469 ATTRIBUTE_UNUSED int flags,
6470 bool *no_add_attrs)
6472 if (!is_attribute_p ("riscv_rvv_vector_bits", name))
6473 return NULL_TREE;
6475 *no_add_attrs = true;
6477 if (rvv_vector_bits != RVV_VECTOR_BITS_ZVL)
6479 error (
6480 "%qs is only supported when %<-mrvv-vector-bits=zvl%> is specified",
6481 "riscv_rvv_vector_bits");
6482 return NULL_TREE;
6485 tree type = *node;
6487 if (!VECTOR_TYPE_P (type) || !riscv_vector::builtin_type_p (type))
6489 error ("%qs applied to non-RVV type %qT", "riscv_rvv_vector_bits", type);
6490 return NULL_TREE;
6493 tree size = TREE_VALUE (args);
6495 if (TREE_CODE (size) != INTEGER_CST)
6497 error ("%qs requires an integer constant", "riscv_rvv_vector_bits");
6498 return NULL_TREE;
6501 unsigned HOST_WIDE_INT args_in_bits = tree_to_uhwi (size);
6502 unsigned HOST_WIDE_INT type_mode_bits
6503 = GET_MODE_PRECISION (TYPE_MODE (type)).to_constant ();
6505 if (args_in_bits != type_mode_bits)
6507 error ("invalid RVV vector size %qd, "
6508 "expected size is %qd based on LMUL of type and %qs",
6509 (int)args_in_bits, (int)type_mode_bits, "-mrvv-vector-bits=zvl");
6510 return NULL_TREE;
6513 type = build_distinct_type_copy (type);
6514 TYPE_ATTRIBUTES (type)
6515 = remove_attribute ("RVV sizeless type",
6516 copy_list (TYPE_ATTRIBUTES (type)));
6518 /* The operations like alu/cmp on vbool*_t is not well defined,
6519 continue to treat vbool*_t as indivisible. */
6520 if (!VECTOR_BOOLEAN_TYPE_P (type))
6521 TYPE_INDIVISIBLE_P (type) = 0;
6523 *node = type;
6525 return NULL_TREE;
6528 /* Return true if function TYPE is an interrupt function. */
6529 static bool
6530 riscv_interrupt_type_p (tree type)
6532 return lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type)) != NULL;
6535 /* Return true if FUNC is a naked function. */
6536 static bool
6537 riscv_naked_function_p (tree func)
6539 tree func_decl = func;
6540 if (func == NULL_TREE)
6541 func_decl = current_function_decl;
6542 return NULL_TREE != lookup_attribute ("naked", DECL_ATTRIBUTES (func_decl));
6545 /* Implement TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS. */
6546 static bool
6547 riscv_allocate_stack_slots_for_args ()
6549 /* Naked functions should not allocate stack slots for arguments. */
6550 return !riscv_naked_function_p (current_function_decl);
6553 /* Implement TARGET_WARN_FUNC_RETURN. */
6554 static bool
6555 riscv_warn_func_return (tree decl)
6557 /* Naked functions are implemented entirely in assembly, including the
6558 return sequence, so suppress warnings about this. */
6559 return !riscv_naked_function_p (decl);
6562 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6564 static void
6565 riscv_va_start (tree valist, rtx nextarg)
6567 nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size);
6568 std_expand_builtin_va_start (valist, nextarg);
6571 /* Make ADDR suitable for use as a call or sibcall target. */
6574 riscv_legitimize_call_address (rtx addr)
6576 if (!call_insn_operand (addr, VOIDmode))
6578 rtx reg = RISCV_CALL_ADDRESS_TEMP (Pmode);
6579 riscv_emit_move (reg, addr);
6580 return reg;
6582 return addr;
6585 /* Print symbolic operand OP, which is part of a HIGH or LO_SUM
6586 in context CONTEXT. HI_RELOC indicates a high-part reloc. */
6588 static void
6589 riscv_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
6591 const char *reloc;
6593 switch (riscv_classify_symbolic_expression (op))
6595 case SYMBOL_ABSOLUTE:
6596 reloc = hi_reloc ? "%hi" : "%lo";
6597 break;
6599 case SYMBOL_PCREL:
6600 reloc = hi_reloc ? "%pcrel_hi" : "%pcrel_lo";
6601 break;
6603 case SYMBOL_TLS_LE:
6604 reloc = hi_reloc ? "%tprel_hi" : "%tprel_lo";
6605 break;
6607 default:
6608 output_operand_lossage ("invalid use of '%%%c'", hi_reloc ? 'h' : 'R');
6609 return;
6612 fprintf (file, "%s(", reloc);
6613 output_addr_const (file, riscv_strip_unspec_address (op));
6614 fputc (')', file);
6617 /* Return the memory model that encapsulates both given models. */
6619 enum memmodel
6620 riscv_union_memmodels (enum memmodel model1, enum memmodel model2)
6622 model1 = memmodel_base (model1);
6623 model2 = memmodel_base (model2);
6625 enum memmodel weaker = model1 <= model2 ? model1: model2;
6626 enum memmodel stronger = model1 > model2 ? model1: model2;
6628 switch (stronger)
6630 case MEMMODEL_SEQ_CST:
6631 case MEMMODEL_ACQ_REL:
6632 return stronger;
6633 case MEMMODEL_RELEASE:
6634 if (weaker == MEMMODEL_ACQUIRE || weaker == MEMMODEL_CONSUME)
6635 return MEMMODEL_ACQ_REL;
6636 else
6637 return stronger;
6638 case MEMMODEL_ACQUIRE:
6639 case MEMMODEL_CONSUME:
6640 case MEMMODEL_RELAXED:
6641 return stronger;
6642 default:
6643 gcc_unreachable ();
6647 /* Return true if the .AQ suffix should be added to an AMO to implement the
6648 acquire portion of memory model MODEL. */
6650 static bool
6651 riscv_memmodel_needs_amo_acquire (enum memmodel model)
6653 /* ZTSO amo mappings require no annotations. */
6654 if (TARGET_ZTSO)
6655 return false;
6657 switch (model)
6659 case MEMMODEL_ACQ_REL:
6660 case MEMMODEL_SEQ_CST:
6661 case MEMMODEL_ACQUIRE:
6662 case MEMMODEL_CONSUME:
6663 return true;
6665 case MEMMODEL_RELEASE:
6666 case MEMMODEL_RELAXED:
6667 return false;
6669 default:
6670 gcc_unreachable ();
6674 /* Return true if the .RL suffix should be added to an AMO to implement the
6675 release portion of memory model MODEL. */
6677 static bool
6678 riscv_memmodel_needs_amo_release (enum memmodel model)
6680 /* ZTSO amo mappings require no annotations. */
6681 if (TARGET_ZTSO)
6682 return false;
6684 switch (model)
6686 case MEMMODEL_ACQ_REL:
6687 case MEMMODEL_SEQ_CST:
6688 case MEMMODEL_RELEASE:
6689 return true;
6691 case MEMMODEL_ACQUIRE:
6692 case MEMMODEL_CONSUME:
6693 case MEMMODEL_RELAXED:
6694 return false;
6696 default:
6697 gcc_unreachable ();
6701 /* Get REGNO alignment of vector mode.
6702 The alignment = LMUL when the LMUL >= 1.
6703 Otherwise, alignment = 1. */
6705 riscv_get_v_regno_alignment (machine_mode mode)
6707 /* 3.3.2. LMUL = 2,4,8, register numbers should be multiple of 2,4,8.
6708 but for mask vector register, register numbers can be any number. */
6709 int lmul = 1;
6710 machine_mode rvv_mode = mode;
6711 if (riscv_v_ext_vls_mode_p (rvv_mode))
6713 int size = GET_MODE_BITSIZE (rvv_mode).to_constant ();
6714 if (size < TARGET_MIN_VLEN)
6715 return 1;
6716 else
6717 return size / TARGET_MIN_VLEN;
6719 if (riscv_v_ext_tuple_mode_p (rvv_mode))
6720 rvv_mode = riscv_vector::get_subpart_mode (rvv_mode);
6721 poly_int64 size = GET_MODE_SIZE (rvv_mode);
6722 if (known_gt (size, UNITS_PER_V_REG))
6723 lmul = exact_div (size, UNITS_PER_V_REG).to_constant ();
6724 return lmul;
6727 /* Define ASM_OUTPUT_OPCODE to do anything special before
6728 emitting an opcode. */
6729 const char *
6730 riscv_asm_output_opcode (FILE *asm_out_file, const char *p)
6732 if (TARGET_XTHEADVECTOR)
6733 return th_asm_output_opcode (asm_out_file, p);
6735 return p;
6738 /* Implement TARGET_PRINT_OPERAND. The RISCV-specific operand codes are:
6740 'h' Print the high-part relocation associated with OP, after stripping
6741 any outermost HIGH.
6742 'R' Print the low-part relocation associated with OP.
6743 'C' Print the integer branch condition for comparison OP.
6744 'N' Print the inverse of the integer branch condition for comparison OP.
6745 'A' Print the atomic operation suffix for memory model OP.
6746 'I' Print the LR suffix for memory model OP.
6747 'J' Print the SC suffix for memory model OP.
6748 'L' Print a non-temporal locality hints instruction.
6749 'z' Print x0 if OP is zero, otherwise print OP normally.
6750 'i' Print i if the operand is not a register.
6751 'S' Print shift-index of single-bit mask OP.
6752 'T' Print shift-index of inverted single-bit mask OP.
6753 '~' Print w if TARGET_64BIT is true; otherwise not print anything.
6755 Note please keep this list and the list in riscv.md in sync. */
6757 static void
6758 riscv_print_operand (FILE *file, rtx op, int letter)
6760 /* `~` does not take an operand so op will be null
6761 Check for before accessing op.
6763 if (letter == '~')
6765 if (TARGET_64BIT)
6766 fputc('w', file);
6767 return;
6769 machine_mode mode = GET_MODE (op);
6770 enum rtx_code code = GET_CODE (op);
6772 switch (letter)
6774 case 'o': {
6775 /* Print 'OP' variant for RVV instructions.
6776 1. If the operand is VECTOR REG, we print 'v'(vnsrl.wv).
6777 2. If the operand is CONST_INT/CONST_VECTOR, we print 'i'(vnsrl.wi).
6778 3. If the operand is SCALAR REG, we print 'x'(vnsrl.wx). */
6779 if (riscv_v_ext_mode_p (mode))
6781 if (REG_P (op))
6782 asm_fprintf (file, "v");
6783 else if (CONST_VECTOR_P (op))
6784 asm_fprintf (file, "i");
6785 else
6786 output_operand_lossage ("invalid vector operand");
6788 else
6790 if (CONST_INT_P (op))
6791 asm_fprintf (file, "i");
6792 else
6793 asm_fprintf (file, "x");
6795 break;
6797 case 'v': {
6798 rtx elt;
6800 if (REG_P (op))
6801 asm_fprintf (file, "%s", reg_names[REGNO (op)]);
6802 else
6804 if (!const_vec_duplicate_p (op, &elt))
6805 output_operand_lossage ("invalid vector constant");
6806 else if (satisfies_constraint_Wc0 (op))
6807 asm_fprintf (file, "0");
6808 else if (satisfies_constraint_vi (op)
6809 || satisfies_constraint_vj (op)
6810 || satisfies_constraint_vk (op))
6811 asm_fprintf (file, "%wd", INTVAL (elt));
6812 else
6813 output_operand_lossage ("invalid vector constant");
6815 break;
6817 case 'V': {
6818 rtx elt;
6819 if (!const_vec_duplicate_p (op, &elt))
6820 output_operand_lossage ("invalid vector constant");
6821 else if (satisfies_constraint_vj (op))
6822 asm_fprintf (file, "%wd", -INTVAL (elt));
6823 else
6824 output_operand_lossage ("invalid vector constant");
6825 break;
6827 case 'm': {
6828 if (riscv_v_ext_mode_p (mode))
6830 /* Calculate lmul according to mode and print the value. */
6831 int lmul = riscv_get_v_regno_alignment (mode);
6832 asm_fprintf (file, "%d", lmul);
6834 else if (code == CONST_INT)
6836 /* If it is a const_int value, it denotes the VLMUL field enum. */
6837 unsigned int vlmul = UINTVAL (op);
6838 switch (vlmul)
6840 case riscv_vector::LMUL_1:
6841 asm_fprintf (file, "%s", "m1");
6842 break;
6843 case riscv_vector::LMUL_2:
6844 asm_fprintf (file, "%s", "m2");
6845 break;
6846 case riscv_vector::LMUL_4:
6847 asm_fprintf (file, "%s", "m4");
6848 break;
6849 case riscv_vector::LMUL_8:
6850 asm_fprintf (file, "%s", "m8");
6851 break;
6852 case riscv_vector::LMUL_F8:
6853 asm_fprintf (file, "%s", "mf8");
6854 break;
6855 case riscv_vector::LMUL_F4:
6856 asm_fprintf (file, "%s", "mf4");
6857 break;
6858 case riscv_vector::LMUL_F2:
6859 asm_fprintf (file, "%s", "mf2");
6860 break;
6861 default:
6862 gcc_unreachable ();
6865 else
6866 output_operand_lossage ("invalid vector constant");
6867 break;
6869 case 'p': {
6870 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
6872 /* Print for RVV mask operand.
6873 If op is reg, print ",v0.t".
6874 Otherwise, don't print anything. */
6875 if (code == REG)
6876 fprintf (file, ",%s.t", reg_names[REGNO (op)]);
6878 else if (code == CONST_INT)
6880 /* Tail && Mask policy. */
6881 asm_fprintf (file, "%s", IS_AGNOSTIC (UINTVAL (op)) ? "a" : "u");
6883 else
6884 output_operand_lossage ("invalid vector constant");
6885 break;
6887 case 'h':
6888 if (code == HIGH)
6889 op = XEXP (op, 0);
6890 riscv_print_operand_reloc (file, op, true);
6891 break;
6893 case 'R':
6894 riscv_print_operand_reloc (file, op, false);
6895 break;
6897 case 'C':
6898 /* The RTL names match the instruction names. */
6899 fputs (GET_RTX_NAME (code), file);
6900 break;
6902 case 'N':
6903 /* The RTL names match the instruction names. */
6904 fputs (GET_RTX_NAME (reverse_condition (code)), file);
6905 break;
6907 case 'A': {
6908 const enum memmodel model = memmodel_base (INTVAL (op));
6909 if (riscv_memmodel_needs_amo_acquire (model)
6910 && riscv_memmodel_needs_amo_release (model))
6911 fputs (".aqrl", file);
6912 else if (riscv_memmodel_needs_amo_acquire (model))
6913 fputs (".aq", file);
6914 else if (riscv_memmodel_needs_amo_release (model))
6915 fputs (".rl", file);
6916 break;
6919 case 'I': {
6920 const enum memmodel model = memmodel_base (INTVAL (op));
6921 if (TARGET_ZTSO && model != MEMMODEL_SEQ_CST)
6922 /* LR ops only have an annotation for SEQ_CST in the Ztso mapping. */
6923 break;
6924 else if (model == MEMMODEL_SEQ_CST)
6925 fputs (".aqrl", file);
6926 else if (riscv_memmodel_needs_amo_acquire (model))
6927 fputs (".aq", file);
6928 break;
6931 case 'J': {
6932 const enum memmodel model = memmodel_base (INTVAL (op));
6933 if (TARGET_ZTSO && model == MEMMODEL_SEQ_CST)
6934 /* SC ops only have an annotation for SEQ_CST in the Ztso mapping. */
6935 fputs (".rl", file);
6936 else if (TARGET_ZTSO)
6937 break;
6938 else if (riscv_memmodel_needs_amo_release (model))
6939 fputs (".rl", file);
6940 break;
6943 case 'L':
6945 const char *ntl_hint = NULL;
6946 switch (INTVAL (op))
6948 case 0:
6949 ntl_hint = "ntl.all";
6950 break;
6951 case 1:
6952 ntl_hint = "ntl.pall";
6953 break;
6954 case 2:
6955 ntl_hint = "ntl.p1";
6956 break;
6959 if (ntl_hint)
6960 asm_fprintf (file, "%s\n\t", ntl_hint);
6961 break;
6964 case 'i':
6965 if (code != REG)
6966 fputs ("i", file);
6967 break;
6969 case 'B':
6970 fputs (GET_RTX_NAME (code), file);
6971 break;
6973 case 'S':
6975 rtx newop = GEN_INT (ctz_hwi (INTVAL (op)));
6976 output_addr_const (file, newop);
6977 break;
6979 case 'T':
6981 rtx newop = GEN_INT (ctz_hwi (~INTVAL (op)));
6982 output_addr_const (file, newop);
6983 break;
6985 case 'X':
6987 int ival = INTVAL (op) + 1;
6988 rtx newop = GEN_INT (ctz_hwi (ival) + 1);
6989 output_addr_const (file, newop);
6990 break;
6992 case 'Y':
6994 unsigned int imm = (UINTVAL (op) & 63);
6995 gcc_assert (imm <= 63);
6996 rtx newop = GEN_INT (imm);
6997 output_addr_const (file, newop);
6998 break;
7000 default:
7001 switch (code)
7003 case REG:
7004 if (letter && letter != 'z')
7005 output_operand_lossage ("invalid use of '%%%c'", letter);
7006 fprintf (file, "%s", reg_names[REGNO (op)]);
7007 break;
7009 case MEM:
7010 if (letter && letter != 'z')
7011 output_operand_lossage ("invalid use of '%%%c'", letter);
7012 else
7013 output_address (mode, XEXP (op, 0));
7014 break;
7016 case CONST_DOUBLE:
7018 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
7020 fputs (reg_names[GP_REG_FIRST], file);
7021 break;
7024 int fli_index = riscv_float_const_rtx_index_for_fli (op);
7025 if (fli_index == -1 || fli_index > 31)
7027 output_operand_lossage ("invalid use of '%%%c'", letter);
7028 break;
7030 asm_fprintf (file, "%s", fli_value_print[fli_index]);
7031 break;
7034 default:
7035 if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
7036 fputs (reg_names[GP_REG_FIRST], file);
7037 else if (letter && letter != 'z')
7038 output_operand_lossage ("invalid use of '%%%c'", letter);
7039 else
7040 output_addr_const (file, riscv_strip_unspec_address (op));
7041 break;
7046 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P */
7047 static bool
7048 riscv_print_operand_punct_valid_p (unsigned char code)
7050 return (code == '~');
7053 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
7055 static void
7056 riscv_print_operand_address (FILE *file, machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7058 struct riscv_address_info addr;
7060 if (th_print_operand_address (file, mode, x))
7061 return;
7063 if (riscv_classify_address (&addr, x, word_mode, true))
7064 switch (addr.type)
7066 case ADDRESS_REG:
7067 output_addr_const (file, riscv_strip_unspec_address (addr.offset));
7068 fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
7069 return;
7071 case ADDRESS_LO_SUM:
7072 riscv_print_operand_reloc (file, addr.offset, false);
7073 fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
7074 return;
7076 case ADDRESS_CONST_INT:
7077 output_addr_const (file, x);
7078 fprintf (file, "(%s)", reg_names[GP_REG_FIRST]);
7079 return;
7081 case ADDRESS_SYMBOLIC:
7082 output_addr_const (file, riscv_strip_unspec_address (x));
7083 return;
7085 default:
7086 gcc_unreachable ();
7089 gcc_unreachable ();
7092 static bool
7093 riscv_size_ok_for_small_data_p (int size)
7095 return g_switch_value && IN_RANGE (size, 1, g_switch_value);
7098 /* Return true if EXP should be placed in the small data section. */
7100 static bool
7101 riscv_in_small_data_p (const_tree x)
7103 /* Because default_use_anchors_for_symbol_p doesn't gather small data to use
7104 the anchor symbol to address nearby objects. In large model, it can get
7105 the better result using the anchor optimization. */
7106 if (riscv_cmodel == CM_LARGE)
7107 return false;
7109 if (TREE_CODE (x) == STRING_CST || TREE_CODE (x) == FUNCTION_DECL)
7110 return false;
7112 if (VAR_P (x) && DECL_SECTION_NAME (x))
7114 const char *sec = DECL_SECTION_NAME (x);
7115 return strcmp (sec, ".sdata") == 0 || strcmp (sec, ".sbss") == 0;
7118 return riscv_size_ok_for_small_data_p (int_size_in_bytes (TREE_TYPE (x)));
7121 /* Switch to the appropriate section for output of DECL. */
7123 static section *
7124 riscv_select_section (tree decl, int reloc,
7125 unsigned HOST_WIDE_INT align)
7127 switch (categorize_decl_for_section (decl, reloc))
7129 case SECCAT_SRODATA:
7130 return get_named_section (decl, ".srodata", reloc);
7132 default:
7133 return default_elf_select_section (decl, reloc, align);
7137 /* Switch to the appropriate section for output of DECL. */
7139 static void
7140 riscv_unique_section (tree decl, int reloc)
7142 const char *prefix = NULL;
7143 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
7145 switch (categorize_decl_for_section (decl, reloc))
7147 case SECCAT_SRODATA:
7148 prefix = one_only ? ".sr" : ".srodata";
7149 break;
7151 default:
7152 break;
7154 if (prefix)
7156 const char *name, *linkonce;
7157 char *string;
7159 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7160 name = targetm.strip_name_encoding (name);
7162 /* If we're using one_only, then there needs to be a .gnu.linkonce
7163 prefix to the section name. */
7164 linkonce = one_only ? ".gnu.linkonce" : "";
7166 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
7168 set_decl_section_name (decl, string);
7169 return;
7171 default_unique_section (decl, reloc);
7174 /* Constant pools are per-function when in large code model. */
7176 static inline bool
7177 riscv_can_use_per_function_literal_pools_p (void)
7179 return riscv_cmodel == CM_LARGE;
7182 static bool
7183 riscv_use_blocks_for_constant_p (machine_mode, const_rtx)
7185 /* We can't use blocks for constants when we're using a per-function
7186 constant pool. */
7187 return !riscv_can_use_per_function_literal_pools_p ();
7190 /* Return a section for X, handling small data. */
7192 static section *
7193 riscv_elf_select_rtx_section (machine_mode mode, rtx x,
7194 unsigned HOST_WIDE_INT align)
7196 /* The literal pool stays with the function. */
7197 if (riscv_can_use_per_function_literal_pools_p ())
7198 return function_section (current_function_decl);
7200 section *s = default_elf_select_rtx_section (mode, x, align);
7202 if (riscv_size_ok_for_small_data_p (GET_MODE_SIZE (mode).to_constant ()))
7204 if (startswith (s->named.name, ".rodata.cst"))
7206 /* Rename .rodata.cst* to .srodata.cst*. */
7207 char *name = (char *) alloca (strlen (s->named.name) + 2);
7208 sprintf (name, ".s%s", s->named.name + 1);
7209 return get_section (name, s->named.common.flags, NULL);
7212 if (s == data_section)
7213 return sdata_section;
7216 return s;
7219 /* Make the last instruction frame-related and note that it performs
7220 the operation described by FRAME_PATTERN. */
7222 static void
7223 riscv_set_frame_expr (rtx frame_pattern)
7225 rtx insn;
7227 insn = get_last_insn ();
7228 RTX_FRAME_RELATED_P (insn) = 1;
7229 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
7230 frame_pattern,
7231 REG_NOTES (insn));
7234 /* Return a frame-related rtx that stores REG at MEM.
7235 REG must be a single register. */
7237 static rtx
7238 riscv_frame_set (rtx mem, rtx reg)
7240 rtx set = gen_rtx_SET (mem, reg);
7241 RTX_FRAME_RELATED_P (set) = 1;
7242 return set;
7245 /* Returns true if the current function might contain a far jump. */
7247 static bool
7248 riscv_far_jump_used_p ()
7250 size_t func_size = 0;
7252 if (cfun->machine->far_jump_used)
7253 return true;
7255 /* We can't change far_jump_used during or after reload, as there is
7256 no chance to change stack frame layout. So we must rely on the
7257 conservative heuristic below having done the right thing. */
7258 if (reload_in_progress || reload_completed)
7259 return false;
7261 /* Estimate the function length. */
7262 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
7263 func_size += get_attr_length (insn);
7265 /* Conservatively determine whether some jump might exceed 1 MiB
7266 displacement. */
7267 if (func_size * 2 >= 0x100000)
7268 cfun->machine->far_jump_used = true;
7270 return cfun->machine->far_jump_used;
7273 /* Return true, if the current function must save the incoming return
7274 address. */
7276 static bool
7277 riscv_save_return_addr_reg_p (void)
7279 /* The $ra register is call-clobbered: if this is not a leaf function,
7280 save it. */
7281 if (!crtl->is_leaf)
7282 return true;
7284 /* We need to save the incoming return address if __builtin_eh_return
7285 is being used to set a different return address. */
7286 if (crtl->calls_eh_return)
7287 return true;
7289 /* Far jumps/branches use $ra as a temporary to set up the target jump
7290 location (clobbering the incoming return address). */
7291 if (riscv_far_jump_used_p ())
7292 return true;
7294 /* We need to save it if anyone has used that. */
7295 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
7296 return true;
7298 /* Need not to use ra for leaf when frame pointer is turned off by
7299 option whatever the omit-leaf-frame's value. */
7300 if (frame_pointer_needed && crtl->is_leaf
7301 && !TARGET_OMIT_LEAF_FRAME_POINTER)
7302 return true;
7304 return false;
7307 /* Return true if the current function must save register REGNO. */
7309 static bool
7310 riscv_save_reg_p (unsigned int regno)
7312 bool call_saved = !global_regs[regno] && !call_used_or_fixed_reg_p (regno);
7313 bool might_clobber = crtl->saves_all_registers
7314 || df_regs_ever_live_p (regno);
7316 if (call_saved && might_clobber)
7317 return true;
7319 /* Save callee-saved V registers. */
7320 if (V_REG_P (regno) && !crtl->abi->clobbers_full_reg_p (regno)
7321 && might_clobber)
7322 return true;
7324 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
7325 return true;
7327 if (regno == RETURN_ADDR_REGNUM && riscv_save_return_addr_reg_p ())
7328 return true;
7330 /* If this is an interrupt handler, then must save extra registers. */
7331 if (cfun->machine->interrupt_handler_p)
7333 /* zero register is always zero. */
7334 if (regno == GP_REG_FIRST)
7335 return false;
7337 /* The function will return the stack pointer to its original value. */
7338 if (regno == STACK_POINTER_REGNUM)
7339 return false;
7341 /* By convention, we assume that gp and tp are safe. */
7342 if (regno == GP_REGNUM || regno == THREAD_POINTER_REGNUM)
7343 return false;
7345 /* We must save every register used in this function. If this is not a
7346 leaf function, then we must save all temporary registers. */
7347 if (df_regs_ever_live_p (regno)
7348 || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno)))
7349 return true;
7352 return false;
7355 /* Return TRUE if Zcmp push and pop insns should be
7356 avoided. FALSE otherwise.
7357 Only use multi push & pop if all GPRs masked can be covered,
7358 and stack access is SP based,
7359 and GPRs are at top of the stack frame,
7360 and no conflicts in stack allocation with other features */
7361 static bool
7362 riscv_avoid_multi_push (const struct riscv_frame_info *frame)
7364 if (!TARGET_ZCMP || crtl->calls_eh_return || frame_pointer_needed
7365 || cfun->machine->interrupt_handler_p || cfun->machine->varargs_size != 0
7366 || crtl->args.pretend_args_size != 0
7367 || (use_shrink_wrapping_separate ()
7368 && !riscv_avoid_shrink_wrapping_separate ())
7369 || (frame->mask & ~MULTI_PUSH_GPR_MASK))
7370 return true;
7372 return false;
7375 /* Determine whether to use multi push insn. */
7376 static bool
7377 riscv_use_multi_push (const struct riscv_frame_info *frame)
7379 if (riscv_avoid_multi_push (frame))
7380 return false;
7382 return (frame->multi_push_adj_base != 0);
7385 /* Return TRUE if a libcall to save/restore GPRs should be
7386 avoided. FALSE otherwise. */
7387 static bool
7388 riscv_avoid_save_libcall (void)
7390 if (!TARGET_SAVE_RESTORE
7391 || crtl->calls_eh_return
7392 || frame_pointer_needed
7393 || cfun->machine->interrupt_handler_p
7394 || cfun->machine->varargs_size != 0
7395 || crtl->args.pretend_args_size != 0)
7396 return true;
7398 return false;
7401 /* Determine whether to call GPR save/restore routines. */
7402 static bool
7403 riscv_use_save_libcall (const struct riscv_frame_info *frame)
7405 if (riscv_avoid_save_libcall ())
7406 return false;
7408 return frame->save_libcall_adjustment != 0;
7411 /* Determine which GPR save/restore routine to call. */
7413 static unsigned
7414 riscv_save_libcall_count (unsigned mask)
7416 for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--)
7417 if (BITSET_P (mask, n))
7418 return CALLEE_SAVED_REG_NUMBER (n) + 1;
7419 abort ();
7422 /* calculate number of s regs in multi push and pop.
7423 Note that {s0-s10} is not valid in Zcmp, use {s0-s11} instead. */
7424 static unsigned
7425 riscv_multi_push_sregs_count (unsigned mask)
7427 unsigned num = riscv_save_libcall_count (mask);
7428 return (num == ZCMP_INVALID_S0S10_SREGS_COUNTS) ? ZCMP_S0S11_SREGS_COUNTS
7429 : num;
7432 /* calculate number of regs(ra, s0-sx) in multi push and pop. */
7433 static unsigned
7434 riscv_multi_push_regs_count (unsigned mask)
7436 /* 1 is for ra */
7437 return riscv_multi_push_sregs_count (mask) + 1;
7440 /* Handle 16 bytes align for poly_int. */
7441 static poly_int64
7442 riscv_16bytes_align (poly_int64 value)
7444 return aligned_upper_bound (value, 16);
7447 static HOST_WIDE_INT
7448 riscv_16bytes_align (HOST_WIDE_INT value)
7450 return ROUND_UP (value, 16);
7453 /* Handle stack align for poly_int. */
7454 static poly_int64
7455 riscv_stack_align (poly_int64 value)
7457 return aligned_upper_bound (value, PREFERRED_STACK_BOUNDARY / 8);
7460 static HOST_WIDE_INT
7461 riscv_stack_align (HOST_WIDE_INT value)
7463 return RISCV_STACK_ALIGN (value);
7466 /* Populate the current function's riscv_frame_info structure.
7468 RISC-V stack frames grown downward. High addresses are at the top.
7470 +-------------------------------+
7472 | incoming stack arguments |
7474 +-------------------------------+ <-- incoming stack pointer
7476 | callee-allocated save area |
7477 | for arguments that are |
7478 | split between registers and |
7479 | the stack |
7481 +-------------------------------+ <-- arg_pointer_rtx
7483 | callee-allocated save area |
7484 | for register varargs |
7486 +-------------------------------+ <-- hard_frame_pointer_rtx;
7487 | | stack_pointer_rtx + gp_sp_offset
7488 | GPR save area | + UNITS_PER_WORD
7490 +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
7491 | | + UNITS_PER_FP_REG
7492 | FPR save area |
7494 +-------------------------------+ <-- stack_pointer_rtx
7495 | | + v_sp_offset_top
7496 | Vector Registers save area |
7498 | ----------------------------- | <-- stack_pointer_rtx
7499 | padding | + v_sp_offset_bottom
7500 +-------------------------------+ <-- frame_pointer_rtx (virtual)
7502 | local variables |
7504 P +-------------------------------+
7506 | outgoing stack arguments |
7508 +-------------------------------+ <-- stack_pointer_rtx
7510 Dynamic stack allocations such as alloca insert data at point P.
7511 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
7512 hard_frame_pointer_rtx unchanged. */
7514 static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size);
7516 static void
7517 riscv_compute_frame_info (void)
7519 struct riscv_frame_info *frame;
7520 poly_int64 offset;
7521 bool interrupt_save_prologue_temp = false;
7522 unsigned int regno, i, num_x_saved = 0, num_f_saved = 0, x_save_size = 0;
7523 unsigned int num_v_saved = 0;
7525 frame = &cfun->machine->frame;
7527 /* Adjust the outgoing arguments size if required. Keep it in sync with what
7528 the mid-end is doing. */
7529 crtl->outgoing_args_size = STACK_DYNAMIC_OFFSET (cfun);
7531 /* In an interrupt function, there are two cases in which t0 needs to be used:
7532 1, If we have a large frame, then we need to save/restore t0. We check for
7533 this before clearing the frame struct.
7534 2, Need to save and restore some CSRs in the frame. */
7535 if (cfun->machine->interrupt_handler_p)
7537 HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
7538 if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1))
7539 || (TARGET_HARD_FLOAT || TARGET_ZFINX))
7540 interrupt_save_prologue_temp = true;
7543 frame->reset();
7545 if (!cfun->machine->naked_p)
7547 /* Find out which GPRs we need to save. */
7548 for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
7549 if (riscv_save_reg_p (regno)
7550 || (interrupt_save_prologue_temp
7551 && (regno == RISCV_PROLOGUE_TEMP_REGNUM)))
7552 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
7554 /* If this function calls eh_return, we must also save and restore the
7555 EH data registers. */
7556 if (crtl->calls_eh_return)
7557 for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
7558 frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
7560 /* Find out which FPRs we need to save. This loop must iterate over
7561 the same space as its companion in riscv_for_each_saved_reg. */
7562 if (TARGET_HARD_FLOAT)
7563 for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7564 if (riscv_save_reg_p (regno))
7565 frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++;
7567 /* Find out which V registers we need to save. */
7568 if (TARGET_VECTOR)
7569 for (regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
7570 if (riscv_save_reg_p (regno))
7572 frame->vmask |= 1 << (regno - V_REG_FIRST);
7573 num_v_saved++;
7577 if (frame->mask)
7579 x_save_size = riscv_stack_align (num_x_saved * UNITS_PER_WORD);
7581 /* 1 is for ra */
7582 unsigned num_save_restore = 1 + riscv_save_libcall_count (frame->mask);
7583 /* Only use save/restore routines if they don't alter the stack size. */
7584 if (riscv_stack_align (num_save_restore * UNITS_PER_WORD) == x_save_size
7585 && !riscv_avoid_save_libcall ())
7587 /* Libcall saves/restores 3 registers at once, so we need to
7588 allocate 12 bytes for callee-saved register. */
7589 if (TARGET_RVE)
7590 x_save_size = 3 * UNITS_PER_WORD;
7592 frame->save_libcall_adjustment = x_save_size;
7595 if (!riscv_avoid_multi_push (frame))
7597 /* num(ra, s0-sx) */
7598 unsigned num_multi_push = riscv_multi_push_regs_count (frame->mask);
7599 x_save_size = riscv_stack_align (num_multi_push * UNITS_PER_WORD);
7600 frame->multi_push_adj_base = riscv_16bytes_align (x_save_size);
7604 /* In an interrupt function, we need extra space for the initial saves of CSRs. */
7605 if (cfun->machine->interrupt_handler_p
7606 && ((TARGET_HARD_FLOAT && frame->fmask)
7607 || (TARGET_ZFINX
7608 /* Except for RISCV_PROLOGUE_TEMP_REGNUM. */
7609 && (frame->mask & ~(1 << RISCV_PROLOGUE_TEMP_REGNUM)))))
7610 /* Save and restore FCSR. */
7611 /* TODO: When P or V extensions support interrupts, some of their CSRs
7612 may also need to be saved and restored. */
7613 x_save_size += riscv_stack_align (1 * UNITS_PER_WORD);
7615 /* At the bottom of the frame are any outgoing stack arguments. */
7616 offset = riscv_stack_align (crtl->outgoing_args_size);
7617 /* Next are local stack variables. */
7618 offset += riscv_stack_align (get_frame_size ());
7619 /* The virtual frame pointer points above the local variables. */
7620 frame->frame_pointer_offset = offset;
7621 /* Next are the callee-saved VRs. */
7622 if (frame->vmask)
7623 offset += riscv_stack_align (num_v_saved * UNITS_PER_V_REG);
7624 frame->v_sp_offset_top = offset;
7625 frame->v_sp_offset_bottom
7626 = frame->v_sp_offset_top - num_v_saved * UNITS_PER_V_REG;
7627 /* Next are the callee-saved FPRs. */
7628 if (frame->fmask)
7629 offset += riscv_stack_align (num_f_saved * UNITS_PER_FP_REG);
7630 frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
7631 /* Next are the callee-saved GPRs. */
7632 if (frame->mask)
7634 offset += x_save_size;
7635 /* align to 16 bytes and add paddings to GPR part to honor
7636 both stack alignment and zcmp pus/pop size alignment. */
7637 if (riscv_use_multi_push (frame)
7638 && known_lt (offset, frame->multi_push_adj_base
7639 + ZCMP_SP_INC_STEP * ZCMP_MAX_SPIMM))
7640 offset = riscv_16bytes_align (offset);
7642 frame->gp_sp_offset = offset - UNITS_PER_WORD;
7643 /* The hard frame pointer points above the callee-saved GPRs. */
7644 frame->hard_frame_pointer_offset = offset;
7645 /* Above the hard frame pointer is the callee-allocated varags save area. */
7646 offset += riscv_stack_align (cfun->machine->varargs_size);
7647 /* Next is the callee-allocated area for pretend stack arguments. */
7648 offset += riscv_stack_align (crtl->args.pretend_args_size);
7649 /* Arg pointer must be below pretend args, but must be above alignment
7650 padding. */
7651 frame->arg_pointer_offset = offset - crtl->args.pretend_args_size;
7652 frame->total_size = offset;
7654 /* Next points the incoming stack pointer and any incoming arguments. */
7657 /* Implement TARGET_CAN_INLINE_P. Determine whether inlining the function
7658 CALLER into the function CALLEE is safe. Inlining should be rejected if
7659 there is no always_inline attribute and the target options differ except
7660 for differences in ISA extensions or performance tuning options like the
7661 code model, TLS dialect, and stack protector, etc. Inlining is
7662 permissible when the non-ISA extension options are identical and the ISA
7663 extensions of CALLEE are a subset of those of CALLER, thereby improving
7664 the performance of Function Multi-Versioning. */
7666 static bool
7667 riscv_can_inline_p (tree caller, tree callee)
7669 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
7670 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
7672 /* It's safe to inline if callee has no opts. */
7673 if (! callee_tree)
7674 return true;
7676 if (! caller_tree)
7677 caller_tree = target_option_default_node;
7679 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
7680 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
7682 int isa_flag_mask = riscv_x_target_flags_isa_mask ();
7684 /* Callee and caller should have the same target options except for ISA. */
7685 int callee_target_flags = callee_opts->x_target_flags & ~isa_flag_mask;
7686 int caller_target_flags = caller_opts->x_target_flags & ~isa_flag_mask;
7688 if (callee_target_flags != caller_target_flags)
7689 return false;
7691 /* Callee's ISA should be a subset of the caller's ISA. */
7692 if (! riscv_ext_is_subset (caller_opts, callee_opts))
7693 return false;
7695 /* If the callee has always_inline set, we can ignore the rest attributes. */
7696 if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
7697 return true;
7699 if (caller_opts->x_riscv_cmodel != callee_opts->x_riscv_cmodel)
7700 return false;
7702 if (caller_opts->x_riscv_tls_dialect != callee_opts->x_riscv_tls_dialect)
7703 return false;
7705 if (caller_opts->x_riscv_stack_protector_guard_reg
7706 != callee_opts->x_riscv_stack_protector_guard_reg)
7707 return false;
7709 if (caller_opts->x_riscv_stack_protector_guard_offset
7710 != callee_opts->x_riscv_stack_protector_guard_offset)
7711 return false;
7713 if (caller_opts->x_rvv_vector_strict_align
7714 != callee_opts->x_rvv_vector_strict_align)
7715 return false;
7717 return true;
7720 /* Make sure that we're not trying to eliminate to the wrong hard frame
7721 pointer. */
7723 static bool
7724 riscv_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
7726 return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
7729 /* Helper to determine if reg X pertains to stack. */
7730 bool
7731 riscv_reg_frame_related (rtx x)
7733 return REG_P (x)
7734 && (REGNO (x) == FRAME_POINTER_REGNUM
7735 || REGNO (x) == HARD_FRAME_POINTER_REGNUM
7736 || REGNO (x) == ARG_POINTER_REGNUM
7737 || REGNO (x) == VIRTUAL_STACK_VARS_REGNUM);
7740 /* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer
7741 or argument pointer. TO is either the stack pointer or hard frame
7742 pointer. */
7744 poly_int64
7745 riscv_initial_elimination_offset (int from, int to)
7747 poly_int64 src, dest;
7749 riscv_compute_frame_info ();
7751 if (to == HARD_FRAME_POINTER_REGNUM)
7752 dest = cfun->machine->frame.hard_frame_pointer_offset;
7753 else if (to == STACK_POINTER_REGNUM)
7754 dest = 0; /* The stack pointer is the base of all offsets, hence 0. */
7755 else
7756 gcc_unreachable ();
7758 if (from == FRAME_POINTER_REGNUM)
7759 src = cfun->machine->frame.frame_pointer_offset;
7760 else if (from == ARG_POINTER_REGNUM)
7761 src = cfun->machine->frame.arg_pointer_offset;
7762 else
7763 gcc_unreachable ();
7765 return src - dest;
7768 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
7769 previous frame. */
7772 riscv_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
7774 if (count != 0)
7775 return const0_rtx;
7777 return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
7780 /* Emit code to change the current function's return address to
7781 ADDRESS. SCRATCH is available as a scratch register, if needed.
7782 ADDRESS and SCRATCH are both word-mode GPRs. */
7784 void
7785 riscv_set_return_address (rtx address, rtx scratch)
7787 rtx slot_address;
7789 gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
7790 slot_address = riscv_add_offset (scratch, stack_pointer_rtx,
7791 cfun->machine->frame.gp_sp_offset.to_constant());
7792 riscv_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address);
7795 /* Save register REG to MEM. Make the instruction frame-related. */
7797 static void
7798 riscv_save_reg (rtx reg, rtx mem)
7800 riscv_emit_move (mem, reg);
7801 riscv_set_frame_expr (riscv_frame_set (mem, reg));
7804 /* Restore register REG from MEM. */
7806 static void
7807 riscv_restore_reg (rtx reg, rtx mem)
7809 rtx insn = riscv_emit_move (reg, mem);
7810 rtx dwarf = NULL_RTX;
7811 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
7813 if (known_gt (epilogue_cfa_sp_offset, 0)
7814 && REGNO (reg) == HARD_FRAME_POINTER_REGNUM)
7816 rtx cfa_adjust_rtx
7817 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
7818 gen_int_mode (epilogue_cfa_sp_offset, Pmode));
7819 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
7822 REG_NOTES (insn) = dwarf;
7823 RTX_FRAME_RELATED_P (insn) = 1;
7826 /* A function to save or store a register. The first argument is the
7827 register and the second is the stack slot. */
7828 typedef void (*riscv_save_restore_fn) (rtx, rtx);
7830 /* Use FN to save or restore register REGNO. MODE is the register's
7831 mode and OFFSET is the offset of its save slot from the current
7832 stack pointer. */
7834 static void
7835 riscv_save_restore_reg (machine_mode mode, int regno,
7836 HOST_WIDE_INT offset, riscv_save_restore_fn fn)
7838 rtx mem;
7840 mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset));
7841 fn (gen_rtx_REG (mode, regno), mem);
7844 /* Return the next register up from REGNO up to LIMIT for the callee
7845 to save or restore. OFFSET will be adjusted accordingly.
7846 If INC is set, then REGNO will be incremented first.
7847 Returns INVALID_REGNUM if there is no such next register. */
7849 static unsigned int
7850 riscv_next_saved_reg (unsigned int regno, unsigned int limit,
7851 HOST_WIDE_INT *offset, bool inc = true)
7853 if (inc)
7854 regno++;
7856 while (regno <= limit)
7858 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
7860 *offset = *offset - UNITS_PER_WORD;
7861 return regno;
7864 regno++;
7866 return INVALID_REGNUM;
7869 /* Return TRUE if provided REGNO is eh return data register. */
7871 static bool
7872 riscv_is_eh_return_data_register (unsigned int regno)
7874 unsigned int i, regnum;
7876 if (!crtl->calls_eh_return)
7877 return false;
7879 for (i = 0; (regnum = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
7880 if (regno == regnum)
7882 return true;
7885 return false;
7888 /* Call FN for each register that is saved by the current function.
7889 SP_OFFSET is the offset of the current stack pointer from the start
7890 of the frame. */
7892 static void
7893 riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
7894 bool epilogue, bool maybe_eh_return)
7896 HOST_WIDE_INT offset, first_fp_offset;
7897 unsigned int regno, num_masked_fp = 0;
7898 unsigned int start = GP_REG_FIRST;
7899 unsigned int limit = GP_REG_LAST;
7901 /* Save the link register and s-registers. */
7902 offset = (cfun->machine->frame.gp_sp_offset - sp_offset).to_constant ()
7903 + UNITS_PER_WORD;
7904 for (regno = riscv_next_saved_reg (start, limit, &offset, false);
7905 regno != INVALID_REGNUM;
7906 regno = riscv_next_saved_reg (regno, limit, &offset))
7908 if (cfun->machine->reg_is_wrapped_separately[regno])
7909 continue;
7911 /* If this is a normal return in a function that calls the eh_return
7912 builtin, then do not restore the eh return data registers as that
7913 would clobber the return value. But we do still need to save them
7914 in the prologue, and restore them for an exception return, so we
7915 need special handling here. */
7916 if (epilogue && !maybe_eh_return
7917 && riscv_is_eh_return_data_register (regno))
7918 continue;
7920 /* In an interrupt function, save and restore some necessary CSRs in the stack
7921 to avoid changes in CSRs. */
7922 if (regno == RISCV_PROLOGUE_TEMP_REGNUM
7923 && cfun->machine->interrupt_handler_p
7924 && ((TARGET_HARD_FLOAT && cfun->machine->frame.fmask)
7925 || (TARGET_ZFINX
7926 && (cfun->machine->frame.mask & ~(1 << RISCV_PROLOGUE_TEMP_REGNUM)))))
7928 /* Always assume FCSR occupy UNITS_PER_WORD to prevent stack
7929 offset misaligned later. */
7930 unsigned int fcsr_size = UNITS_PER_WORD;
7931 if (!epilogue)
7933 riscv_save_restore_reg (word_mode, regno, offset, fn);
7934 offset -= fcsr_size;
7935 emit_insn (gen_riscv_frcsr (RISCV_PROLOGUE_TEMP (SImode)));
7936 riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM,
7937 offset, riscv_save_reg);
7939 else
7941 riscv_save_restore_reg (SImode, RISCV_PROLOGUE_TEMP_REGNUM,
7942 offset - fcsr_size, riscv_restore_reg);
7943 emit_insn (gen_riscv_fscsr (RISCV_PROLOGUE_TEMP (SImode)));
7944 riscv_save_restore_reg (word_mode, regno, offset, fn);
7945 offset -= fcsr_size;
7947 continue;
7950 if (TARGET_XTHEADMEMPAIR)
7952 /* Get the next reg/offset pair. */
7953 HOST_WIDE_INT offset2 = offset;
7954 unsigned int regno2 = riscv_next_saved_reg (regno, limit, &offset2);
7956 /* Validate everything before emitting a mempair instruction. */
7957 if (regno2 != INVALID_REGNUM
7958 && !cfun->machine->reg_is_wrapped_separately[regno2]
7959 && !(epilogue && !maybe_eh_return
7960 && riscv_is_eh_return_data_register (regno2)))
7962 bool load_p = (fn == riscv_restore_reg);
7963 rtx operands[4];
7964 th_mempair_prepare_save_restore_operands (operands,
7965 load_p, word_mode,
7966 regno, offset,
7967 regno2, offset2);
7969 /* If the operands fit into a mempair insn, then emit one. */
7970 if (th_mempair_operands_p (operands, load_p, word_mode))
7972 th_mempair_save_restore_regs (operands, load_p, word_mode);
7973 offset = offset2;
7974 regno = regno2;
7975 continue;
7980 riscv_save_restore_reg (word_mode, regno, offset, fn);
7983 /* This loop must iterate over the same space as its companion in
7984 riscv_compute_frame_info. */
7985 first_fp_offset
7986 = (cfun->machine->frame.fp_sp_offset - sp_offset).to_constant ();
7987 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
7988 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
7990 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
7991 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
7992 unsigned int slot = (riscv_use_multi_push (&cfun->machine->frame))
7993 ? CALLEE_SAVED_FREG_NUMBER (regno)
7994 : num_masked_fp;
7995 offset = first_fp_offset - slot * GET_MODE_SIZE (mode).to_constant ();
7996 if (handle_reg)
7997 riscv_save_restore_reg (mode, regno, offset, fn);
7998 num_masked_fp++;
8002 /* Call FN for each V register that is saved by the current function. */
8004 static void
8005 riscv_for_each_saved_v_reg (poly_int64 &remaining_size,
8006 riscv_save_restore_fn fn, bool prologue)
8008 rtx vlen = NULL_RTX;
8009 if (cfun->machine->frame.vmask != 0)
8011 if (UNITS_PER_V_REG.is_constant ()
8012 && SMALL_OPERAND (UNITS_PER_V_REG.to_constant ()))
8013 vlen = GEN_INT (UNITS_PER_V_REG.to_constant ());
8014 else
8016 vlen = RISCV_PROLOGUE_TEMP (Pmode);
8017 rtx insn
8018 = emit_move_insn (vlen, gen_int_mode (UNITS_PER_V_REG, Pmode));
8019 RTX_FRAME_RELATED_P (insn) = 1;
8023 /* Select the mode where LMUL is 1 and SEW is largest. */
8024 machine_mode m1_mode = TARGET_VECTOR_ELEN_64 ? RVVM1DImode : RVVM1SImode;
8026 if (prologue)
8028 /* This loop must iterate over the same space as its companion in
8029 riscv_compute_frame_info. */
8030 for (unsigned int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
8031 if (BITSET_P (cfun->machine->frame.vmask, regno - V_REG_FIRST))
8033 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
8034 if (handle_reg)
8036 rtx insn = NULL_RTX;
8037 if (CONST_INT_P (vlen))
8039 gcc_assert (SMALL_OPERAND (-INTVAL (vlen)));
8040 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
8041 stack_pointer_rtx,
8042 GEN_INT (-INTVAL (vlen))));
8044 else
8045 insn = emit_insn (
8046 gen_sub3_insn (stack_pointer_rtx, stack_pointer_rtx, vlen));
8047 gcc_assert (insn != NULL_RTX);
8048 RTX_FRAME_RELATED_P (insn) = 1;
8049 riscv_save_restore_reg (m1_mode, regno, 0, fn);
8050 remaining_size -= UNITS_PER_V_REG;
8054 else
8056 /* This loop must iterate over the same space as its companion in
8057 riscv_compute_frame_info. */
8058 for (unsigned int regno = V_REG_LAST; regno >= V_REG_FIRST; regno--)
8059 if (BITSET_P (cfun->machine->frame.vmask, regno - V_REG_FIRST))
8061 bool handle_reg = !cfun->machine->reg_is_wrapped_separately[regno];
8062 if (handle_reg)
8064 riscv_save_restore_reg (m1_mode, regno, 0, fn);
8065 rtx insn = emit_insn (
8066 gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, vlen));
8067 gcc_assert (insn != NULL_RTX);
8068 RTX_FRAME_RELATED_P (insn) = 1;
8069 remaining_size -= UNITS_PER_V_REG;
8075 /* For stack frames that can't be allocated with a single ADDI instruction,
8076 compute the best value to initially allocate. It must at a minimum
8077 allocate enough space to spill the callee-saved registers. If TARGET_RVC,
8078 try to pick a value that will allow compression of the register saves
8079 without adding extra instructions. */
8081 static HOST_WIDE_INT
8082 riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size)
8084 HOST_WIDE_INT remaining_const_size;
8085 if (!remaining_size.is_constant ())
8086 remaining_const_size
8087 = riscv_stack_align (remaining_size.coeffs[0])
8088 - riscv_stack_align (remaining_size.coeffs[1]);
8089 else
8090 remaining_const_size = remaining_size.to_constant ();
8092 /* First step must be set to the top of vector registers save area if any
8093 vector registers need be preserved. */
8094 if (frame->vmask != 0)
8095 return (remaining_size - frame->v_sp_offset_top).to_constant ();
8097 if (SMALL_OPERAND (remaining_const_size))
8098 return remaining_const_size;
8100 poly_int64 callee_saved_first_step =
8101 remaining_size - frame->frame_pointer_offset;
8102 gcc_assert(callee_saved_first_step.is_constant ());
8103 HOST_WIDE_INT min_first_step =
8104 riscv_stack_align (callee_saved_first_step.to_constant ());
8105 HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
8106 HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step;
8107 gcc_assert (min_first_step <= max_first_step);
8109 /* As an optimization, use the least-significant bits of the total frame
8110 size, so that the second adjustment step is just LUI + ADD. */
8111 if (!SMALL_OPERAND (min_second_step)
8112 && remaining_const_size % IMM_REACH <= max_first_step
8113 && remaining_const_size % IMM_REACH >= min_first_step)
8114 return remaining_const_size % IMM_REACH;
8116 if (TARGET_RVC || TARGET_ZCA)
8118 /* If we need two subtracts, and one is small enough to allow compressed
8119 loads and stores, then put that one first. */
8120 if (IN_RANGE (min_second_step, 0,
8121 (TARGET_64BIT ? SDSP_REACH : SWSP_REACH)))
8122 return MAX (min_second_step, min_first_step);
8124 /* If we need LUI + ADDI + ADD for the second adjustment step, then start
8125 with the minimum first step, so that we can get compressed loads and
8126 stores. */
8127 else if (!SMALL_OPERAND (min_second_step))
8128 return min_first_step;
8131 return max_first_step;
8134 static rtx
8135 riscv_adjust_libcall_cfi_prologue ()
8137 rtx dwarf = NULL_RTX;
8138 rtx adjust_sp_rtx, reg, mem, insn;
8139 int saved_size = cfun->machine->frame.save_libcall_adjustment;
8140 int offset;
8142 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8143 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
8145 /* The save order is ra, s0, s1, s2 to s11. */
8146 if (regno == RETURN_ADDR_REGNUM)
8147 offset = saved_size - UNITS_PER_WORD;
8148 else if (regno == S0_REGNUM)
8149 offset = saved_size - UNITS_PER_WORD * 2;
8150 else if (regno == S1_REGNUM)
8151 offset = saved_size - UNITS_PER_WORD * 3;
8152 else
8153 offset = saved_size - ((regno - S2_REGNUM + 4) * UNITS_PER_WORD);
8155 reg = gen_rtx_REG (Pmode, regno);
8156 mem = gen_frame_mem (Pmode, plus_constant (Pmode,
8157 stack_pointer_rtx,
8158 offset));
8160 insn = gen_rtx_SET (mem, reg);
8161 dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
8164 /* Debug info for adjust sp. */
8165 adjust_sp_rtx =
8166 gen_rtx_SET (stack_pointer_rtx,
8167 gen_rtx_PLUS (GET_MODE(stack_pointer_rtx), stack_pointer_rtx, GEN_INT (-saved_size)));
8168 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
8169 dwarf);
8170 return dwarf;
8173 static rtx
8174 riscv_adjust_multi_push_cfi_prologue (int saved_size)
8176 rtx dwarf = NULL_RTX;
8177 rtx adjust_sp_rtx, reg, mem, insn;
8178 unsigned int mask = cfun->machine->frame.mask;
8179 int offset;
8180 int saved_cnt = 0;
8182 if (mask & S10_MASK)
8183 mask |= S11_MASK;
8185 for (int regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
8186 if (BITSET_P (mask & MULTI_PUSH_GPR_MASK, regno - GP_REG_FIRST))
8188 /* The save order is s11-s0, ra
8189 from high to low addr. */
8190 offset = saved_size - UNITS_PER_WORD * (++saved_cnt);
8192 reg = gen_rtx_REG (Pmode, regno);
8193 mem = gen_frame_mem (Pmode,
8194 plus_constant (Pmode, stack_pointer_rtx, offset));
8196 insn = gen_rtx_SET (mem, reg);
8197 dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
8200 /* Debug info for adjust sp. */
8201 adjust_sp_rtx
8202 = gen_rtx_SET (stack_pointer_rtx,
8203 plus_constant (Pmode, stack_pointer_rtx, -saved_size));
8204 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, dwarf);
8205 return dwarf;
8208 static void
8209 riscv_emit_stack_tie (rtx reg)
8211 if (Pmode == SImode)
8212 emit_insn (gen_stack_tiesi (stack_pointer_rtx, reg));
8213 else
8214 emit_insn (gen_stack_tiedi (stack_pointer_rtx, reg));
8217 /*zcmp multi push and pop code_for_push_pop function ptr array */
8218 static const code_for_push_pop_t code_for_push_pop[ZCMP_MAX_GRP_SLOTS][ZCMP_OP_NUM]
8219 = {{code_for_gpr_multi_push_up_to_ra, code_for_gpr_multi_pop_up_to_ra,
8220 code_for_gpr_multi_popret_up_to_ra, code_for_gpr_multi_popretz_up_to_ra},
8221 {code_for_gpr_multi_push_up_to_s0, code_for_gpr_multi_pop_up_to_s0,
8222 code_for_gpr_multi_popret_up_to_s0, code_for_gpr_multi_popretz_up_to_s0},
8223 {code_for_gpr_multi_push_up_to_s1, code_for_gpr_multi_pop_up_to_s1,
8224 code_for_gpr_multi_popret_up_to_s1, code_for_gpr_multi_popretz_up_to_s1},
8225 {code_for_gpr_multi_push_up_to_s2, code_for_gpr_multi_pop_up_to_s2,
8226 code_for_gpr_multi_popret_up_to_s2, code_for_gpr_multi_popretz_up_to_s2},
8227 {code_for_gpr_multi_push_up_to_s3, code_for_gpr_multi_pop_up_to_s3,
8228 code_for_gpr_multi_popret_up_to_s3, code_for_gpr_multi_popretz_up_to_s3},
8229 {code_for_gpr_multi_push_up_to_s4, code_for_gpr_multi_pop_up_to_s4,
8230 code_for_gpr_multi_popret_up_to_s4, code_for_gpr_multi_popretz_up_to_s4},
8231 {code_for_gpr_multi_push_up_to_s5, code_for_gpr_multi_pop_up_to_s5,
8232 code_for_gpr_multi_popret_up_to_s5, code_for_gpr_multi_popretz_up_to_s5},
8233 {code_for_gpr_multi_push_up_to_s6, code_for_gpr_multi_pop_up_to_s6,
8234 code_for_gpr_multi_popret_up_to_s6, code_for_gpr_multi_popretz_up_to_s6},
8235 {code_for_gpr_multi_push_up_to_s7, code_for_gpr_multi_pop_up_to_s7,
8236 code_for_gpr_multi_popret_up_to_s7, code_for_gpr_multi_popretz_up_to_s7},
8237 {code_for_gpr_multi_push_up_to_s8, code_for_gpr_multi_pop_up_to_s8,
8238 code_for_gpr_multi_popret_up_to_s8, code_for_gpr_multi_popretz_up_to_s8},
8239 {code_for_gpr_multi_push_up_to_s9, code_for_gpr_multi_pop_up_to_s9,
8240 code_for_gpr_multi_popret_up_to_s9, code_for_gpr_multi_popretz_up_to_s9},
8241 {nullptr, nullptr, nullptr, nullptr},
8242 {code_for_gpr_multi_push_up_to_s11, code_for_gpr_multi_pop_up_to_s11,
8243 code_for_gpr_multi_popret_up_to_s11,
8244 code_for_gpr_multi_popretz_up_to_s11}};
8246 /* Set a probe loop for stack clash protection. */
8247 static void
8248 riscv_allocate_and_probe_stack_loop (rtx tmp, enum rtx_code code,
8249 rtx op0, rtx op1, bool vector,
8250 HOST_WIDE_INT offset)
8252 tmp = riscv_force_temporary (tmp, gen_int_mode (offset, Pmode));
8254 /* Loop. */
8255 rtx label = gen_label_rtx ();
8256 emit_label (label);
8258 /* Allocate and probe stack. */
8259 emit_insn (gen_sub3_insn (stack_pointer_rtx, stack_pointer_rtx, tmp));
8260 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8261 STACK_CLASH_CALLER_GUARD));
8262 emit_insn (gen_blockage ());
8264 /* Adjust the remaining vector length. */
8265 if (vector)
8266 emit_insn (gen_sub3_insn (op0, op0, tmp));
8268 /* Branch if there's still more bytes to probe. */
8269 riscv_expand_conditional_branch (label, code, op0, op1);
8270 JUMP_LABEL (get_last_insn ()) = label;
8272 emit_insn (gen_blockage ());
8275 /* Adjust scalable frame of vector for prologue && epilogue. */
8277 static void
8278 riscv_v_adjust_scalable_frame (rtx target, poly_int64 offset, bool epilogue)
8280 rtx tmp = RISCV_PROLOGUE_TEMP (Pmode);
8281 rtx adjust_size = RISCV_PROLOGUE_TEMP2 (Pmode);
8282 rtx insn, dwarf, adjust_frame_rtx;
8284 riscv_legitimize_poly_move (Pmode, adjust_size, tmp,
8285 gen_int_mode (offset, Pmode));
8287 /* If doing stack clash protection then we use a loop to allocate and probe
8288 the stack. */
8289 if (flag_stack_clash_protection)
8291 if (epilogue)
8293 insn = emit_insn (gen_add3_insn (target, target, adjust_size));
8295 if (!frame_pointer_needed)
8297 add_reg_note (insn, REG_CFA_DEF_CFA,
8298 plus_constant (Pmode, stack_pointer_rtx, -offset));
8299 RTX_FRAME_RELATED_P (insn) = 1;
8302 return;
8305 HOST_WIDE_INT min_probe_threshold
8306 = (1 << param_stack_clash_protection_guard_size) - STACK_CLASH_CALLER_GUARD;
8308 if (!frame_pointer_needed)
8310 /* This is done to provide unwinding information for the stack
8311 adjustments we're about to do, however to prevent the optimizers
8312 from removing the T3 move and leaving the CFA note (which would be
8313 very wrong) we tie the old and new stack pointer together.
8314 The tie will expand to nothing but the optimizers will not touch
8315 the instruction. */
8316 insn = get_last_insn ();
8317 rtx stack_ptr_copy = gen_rtx_REG (Pmode, RISCV_STACK_CLASH_VECTOR_CFA_REGNUM);
8318 emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
8319 riscv_emit_stack_tie (stack_ptr_copy);
8321 /* We want the CFA independent of the stack pointer for the
8322 duration of the loop. */
8323 add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
8324 RTX_FRAME_RELATED_P (insn) = 1;
8327 riscv_allocate_and_probe_stack_loop (tmp, GE, adjust_size, tmp, true,
8328 min_probe_threshold);
8330 /* Allocate the residual. */
8331 insn = emit_insn (gen_sub3_insn (target, target, adjust_size));
8333 /* Now reset the CFA register if needed. */
8334 if (!frame_pointer_needed)
8336 add_reg_note (insn, REG_CFA_DEF_CFA,
8337 plus_constant (Pmode, stack_pointer_rtx, offset));
8338 RTX_FRAME_RELATED_P (insn) = 1;
8341 return;
8344 if (epilogue)
8345 insn = gen_add3_insn (target, target, adjust_size);
8346 else
8347 insn = gen_sub3_insn (target, target, adjust_size);
8349 insn = emit_insn (insn);
8351 RTX_FRAME_RELATED_P (insn) = 1;
8353 adjust_frame_rtx
8354 = gen_rtx_SET (target,
8355 plus_constant (Pmode, target, epilogue ? offset : -offset));
8357 dwarf = alloc_reg_note (REG_FRAME_RELATED_EXPR, copy_rtx (adjust_frame_rtx),
8358 NULL_RTX);
8360 REG_NOTES (insn) = dwarf;
8363 static rtx
8364 riscv_gen_multi_push_pop_insn (riscv_zcmp_op_t op, HOST_WIDE_INT adj_size,
8365 unsigned int regs_num)
8367 gcc_assert (op < ZCMP_OP_NUM);
8368 gcc_assert (regs_num <= ZCMP_MAX_GRP_SLOTS
8369 && regs_num != ZCMP_INVALID_S0S10_SREGS_COUNTS + 1); /* 1 for ra*/
8370 rtx stack_adj = GEN_INT (adj_size);
8371 return GEN_FCN (code_for_push_pop[regs_num - 1][op](Pmode)) (stack_adj);
8374 static unsigned
8375 get_multi_push_fpr_mask (unsigned max_fprs_push)
8377 unsigned mask_fprs_push = 0, num_f_pushed = 0;
8378 for (unsigned regno = FP_REG_FIRST;
8379 regno <= FP_REG_LAST && num_f_pushed < max_fprs_push; regno++)
8380 if (riscv_save_reg_p (regno))
8381 mask_fprs_push |= 1 << (regno - FP_REG_FIRST), num_f_pushed++;
8382 return mask_fprs_push;
8385 /* Allocate SIZE bytes of stack space using TEMP1 as a scratch register.
8386 If SIZE is not large enough to require a probe this function will only
8387 adjust the stack.
8389 We emit barriers after each stack adjustment to prevent optimizations from
8390 breaking the invariant that we never drop the stack more than a page. This
8391 invariant is needed to make it easier to correctly handle asynchronous
8392 events, e.g. if we were to allow the stack to be dropped by more than a page
8393 and then have multiple probes up and we take a signal somewhere in between
8394 then the signal handler doesn't know the state of the stack and can make no
8395 assumptions about which pages have been probed. */
8397 static void
8398 riscv_allocate_and_probe_stack_space (rtx temp1, HOST_WIDE_INT size)
8400 HOST_WIDE_INT guard_size
8401 = 1 << param_stack_clash_protection_guard_size;
8402 HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
8403 HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
8404 HOST_WIDE_INT min_probe_threshold = guard_size - guard_used_by_caller;
8405 rtx insn;
8407 /* We should always have a positive probe threshold. */
8408 gcc_assert (min_probe_threshold > 0);
8410 /* If SIZE is not large enough to require probing, just adjust the stack and
8411 exit. */
8412 if (known_lt (size, min_probe_threshold)
8413 || !flag_stack_clash_protection)
8415 if (flag_stack_clash_protection)
8417 if (known_eq (cfun->machine->frame.total_size, 0))
8418 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
8419 else
8420 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8423 if (SMALL_OPERAND (-size))
8425 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-size));
8426 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
8428 else if (SUM_OF_TWO_S12_ALGN (-size))
8430 HOST_WIDE_INT one, two;
8431 riscv_split_sum_of_two_s12 (-size, &one, &two);
8432 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8433 GEN_INT (one));
8434 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
8435 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8436 GEN_INT (two));
8437 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
8439 else
8441 temp1 = riscv_force_temporary (temp1, GEN_INT (-size));
8442 emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, temp1));
8443 insn = plus_constant (Pmode, stack_pointer_rtx, -size);
8444 insn = gen_rtx_SET (stack_pointer_rtx, insn);
8445 riscv_set_frame_expr (insn);
8448 /* We must have allocated the remainder of the stack frame.
8449 Emit a stack tie if we have a frame pointer so that the
8450 allocation is ordered WRT fp setup and subsequent writes
8451 into the frame. */
8452 if (frame_pointer_needed)
8453 riscv_emit_stack_tie (hard_frame_pointer_rtx);
8455 return;
8458 gcc_assert (multiple_p (size, byte_sp_alignment));
8460 if (dump_file)
8461 fprintf (dump_file,
8462 "Stack clash prologue: " HOST_WIDE_INT_PRINT_DEC
8463 " bytes, probing will be required.\n", size);
8465 /* Round size to the nearest multiple of guard_size, and calculate the
8466 residual as the difference between the original size and the rounded
8467 size. */
8468 HOST_WIDE_INT rounded_size = ROUND_DOWN (size, guard_size);
8469 HOST_WIDE_INT residual = size - rounded_size;
8471 /* We can handle a small number of allocations/probes inline. Otherwise
8472 punt to a loop. */
8473 if (rounded_size <= STACK_CLASH_MAX_UNROLL_PAGES * guard_size)
8475 temp1 = riscv_force_temporary (temp1, gen_int_mode (guard_size, Pmode));
8476 for (HOST_WIDE_INT i = 0; i < rounded_size; i += guard_size)
8478 emit_insn (gen_sub3_insn (stack_pointer_rtx, stack_pointer_rtx, temp1));
8479 insn = plus_constant (Pmode, stack_pointer_rtx, -guard_size);
8480 insn = gen_rtx_SET (stack_pointer_rtx, insn);
8481 riscv_set_frame_expr (insn);
8482 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8483 guard_used_by_caller));
8484 emit_insn (gen_blockage ());
8486 dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
8488 else
8490 /* Compute the ending address. */
8491 rtx temp2 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP2_REGNUM);
8492 temp2 = riscv_force_temporary (temp2, gen_int_mode (rounded_size, Pmode));
8493 insn = emit_insn (gen_sub3_insn (temp2, stack_pointer_rtx, temp2));
8495 if (!frame_pointer_needed)
8497 /* We want the CFA independent of the stack pointer for the
8498 duration of the loop. */
8499 add_reg_note (insn, REG_CFA_DEF_CFA,
8500 plus_constant (Pmode, temp1, rounded_size));
8501 RTX_FRAME_RELATED_P (insn) = 1;
8504 /* This allocates and probes the stack. */
8505 riscv_allocate_and_probe_stack_loop (temp1, NE, stack_pointer_rtx, temp2,
8506 false, guard_size);
8508 /* Now reset the CFA register if needed. */
8509 if (!frame_pointer_needed)
8511 insn = get_last_insn ();
8512 add_reg_note (insn, REG_CFA_DEF_CFA,
8513 plus_constant (Pmode, stack_pointer_rtx, rounded_size));
8514 RTX_FRAME_RELATED_P (insn) = 1;
8517 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
8520 /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to
8521 be probed. This maintains the requirement that each page is probed at
8522 least once. For initial probing we probe only if the allocation is
8523 more than GUARD_SIZE - buffer, and below the saved registers we probe
8524 if the amount is larger than buffer. GUARD_SIZE - buffer + buffer ==
8525 GUARD_SIZE. This works that for any allocation that is large enough to
8526 trigger a probe here, we'll have at least one, and if they're not large
8527 enough for this code to emit anything for them, The page would have been
8528 probed by the saving of FP/LR either by this function or any callees. If
8529 we don't have any callees then we won't have more stack adjustments and so
8530 are still safe. */
8531 if (residual)
8533 gcc_assert (guard_used_by_caller + byte_sp_alignment <= size);
8535 temp1 = riscv_force_temporary (temp1, gen_int_mode (residual, Pmode));
8536 emit_insn (gen_sub3_insn (stack_pointer_rtx, stack_pointer_rtx, temp1));
8537 insn = plus_constant (Pmode, stack_pointer_rtx, -residual);
8538 insn = gen_rtx_SET (stack_pointer_rtx, insn);
8539 riscv_set_frame_expr (insn);
8540 if (residual >= min_probe_threshold)
8542 if (dump_file)
8543 fprintf (dump_file,
8544 "Stack clash prologue residuals: "
8545 HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
8546 "\n", residual);
8548 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
8549 guard_used_by_caller));
8550 emit_insn (gen_blockage ());
8555 /* Expand the "prologue" pattern. */
8557 void
8558 riscv_expand_prologue (void)
8560 struct riscv_frame_info *frame = &cfun->machine->frame;
8561 poly_int64 remaining_size = frame->total_size;
8562 unsigned mask = frame->mask;
8563 unsigned fmask = frame->fmask;
8564 int spimm, multi_push_additional, stack_adj;
8565 rtx insn, dwarf = NULL_RTX;
8566 unsigned th_int_mask = 0;
8568 if (flag_stack_usage_info)
8569 current_function_static_stack_size = constant_lower_bound (remaining_size);
8571 if (cfun->machine->naked_p)
8572 return;
8574 /* prefer multi-push to save-restore libcall. */
8575 if (riscv_use_multi_push (frame))
8577 remaining_size -= frame->multi_push_adj_base;
8578 /* If there are vector registers that need to be saved, then it can only
8579 be reduced to the frame->v_sp_offset_top position at most, since the
8580 vector registers will need to be saved one by one by decreasing the SP
8581 later. */
8582 poly_int64 remaining_size_above_varea
8583 = frame->vmask != 0
8584 ? remaining_size - frame->v_sp_offset_top
8585 : remaining_size;
8587 if (known_gt (remaining_size_above_varea, 2 * ZCMP_SP_INC_STEP))
8588 spimm = 3;
8589 else if (known_gt (remaining_size_above_varea, ZCMP_SP_INC_STEP))
8590 spimm = 2;
8591 else if (known_gt (remaining_size_above_varea, 0))
8592 spimm = 1;
8593 else
8594 spimm = 0;
8595 multi_push_additional = spimm * ZCMP_SP_INC_STEP;
8596 frame->multi_push_adj_addi = multi_push_additional;
8597 remaining_size -= multi_push_additional;
8599 /* emit multi push insn & dwarf along with it. */
8600 stack_adj = frame->multi_push_adj_base + multi_push_additional;
8601 insn = emit_insn (riscv_gen_multi_push_pop_insn (
8602 PUSH_IDX, -stack_adj, riscv_multi_push_regs_count (frame->mask)));
8603 dwarf = riscv_adjust_multi_push_cfi_prologue (stack_adj);
8604 RTX_FRAME_RELATED_P (insn) = 1;
8605 REG_NOTES (insn) = dwarf;
8607 /* Temporarily fib that we need not save GPRs. */
8608 frame->mask = 0;
8610 /* push FPRs into the additional reserved space by cm.push. */
8611 if (fmask)
8613 unsigned mask_fprs_push
8614 = get_multi_push_fpr_mask (multi_push_additional / UNITS_PER_WORD);
8615 frame->fmask &= mask_fprs_push;
8616 riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false,
8617 false);
8618 frame->fmask = fmask & ~mask_fprs_push; /* mask for the rest FPRs. */
8621 /* When optimizing for size, call a subroutine to save the registers. */
8622 else if (riscv_use_save_libcall (frame))
8624 rtx dwarf = NULL_RTX;
8625 dwarf = riscv_adjust_libcall_cfi_prologue ();
8627 remaining_size -= frame->save_libcall_adjustment;
8628 insn = emit_insn (riscv_gen_gpr_save_insn (frame));
8629 frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
8631 RTX_FRAME_RELATED_P (insn) = 1;
8632 REG_NOTES (insn) = dwarf;
8635 th_int_mask = th_int_get_mask (frame->mask);
8636 if (th_int_mask && TH_INT_INTERRUPT (cfun))
8638 frame->mask &= ~th_int_mask;
8640 /* RISCV_PROLOGUE_TEMP may be used to handle some CSR for
8641 interrupts, such as fcsr. */
8642 if ((TARGET_HARD_FLOAT && frame->fmask)
8643 || (TARGET_ZFINX && frame->mask))
8644 frame->mask |= (1 << RISCV_PROLOGUE_TEMP_REGNUM);
8646 unsigned save_adjustment = th_int_get_save_adjustment ();
8647 frame->gp_sp_offset -= save_adjustment;
8648 remaining_size -= save_adjustment;
8650 insn = emit_insn (gen_th_int_push ());
8652 rtx dwarf = th_int_adjust_cfi_prologue (th_int_mask);
8653 RTX_FRAME_RELATED_P (insn) = 1;
8654 REG_NOTES (insn) = dwarf;
8657 /* Save the GP, FP registers. */
8658 if ((frame->mask | frame->fmask) != 0)
8660 if (known_gt (remaining_size, frame->frame_pointer_offset))
8662 HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size);
8663 remaining_size -= step1;
8664 insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8665 GEN_INT (-step1));
8666 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
8668 riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, false);
8671 /* Undo the above fib. */
8672 frame->mask = mask;
8673 frame->fmask = fmask;
8675 /* Set up the frame pointer, if we're using one. */
8676 if (frame_pointer_needed)
8678 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
8679 GEN_INT ((frame->hard_frame_pointer_offset - remaining_size).to_constant ()));
8680 RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
8682 riscv_emit_stack_tie (hard_frame_pointer_rtx);
8685 /* Save the V registers. */
8686 if (frame->vmask != 0)
8687 riscv_for_each_saved_v_reg (remaining_size, riscv_save_reg, true);
8689 /* Allocate the rest of the frame. */
8690 if (known_gt (remaining_size, 0))
8692 /* Two step adjustment:
8693 1.scalable frame. 2.constant frame. */
8694 poly_int64 scalable_frame (0, 0);
8695 if (!remaining_size.is_constant ())
8697 /* First for scalable frame. */
8698 poly_int64 scalable_frame = remaining_size;
8699 scalable_frame.coeffs[0] = remaining_size.coeffs[1];
8700 riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, false);
8701 remaining_size -= scalable_frame;
8704 /* Second step for constant frame. */
8705 HOST_WIDE_INT constant_frame = remaining_size.to_constant ();
8706 if (constant_frame == 0)
8708 /* We must have allocated stack space for the scalable frame.
8709 Emit a stack tie if we have a frame pointer so that the
8710 allocation is ordered WRT fp setup and subsequent writes
8711 into the frame. */
8712 if (frame_pointer_needed)
8713 riscv_emit_stack_tie (hard_frame_pointer_rtx);
8714 return;
8717 riscv_allocate_and_probe_stack_space (RISCV_PROLOGUE_TEMP (Pmode), constant_frame);
8719 else if (flag_stack_clash_protection)
8721 if (known_eq (frame->total_size, 0))
8722 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
8723 else
8724 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
8728 static rtx
8729 riscv_adjust_multi_pop_cfi_epilogue (int saved_size)
8731 rtx dwarf = NULL_RTX;
8732 rtx adjust_sp_rtx, reg;
8733 unsigned int mask = cfun->machine->frame.mask;
8735 if (mask & S10_MASK)
8736 mask |= S11_MASK;
8738 /* Debug info for adjust sp. */
8739 adjust_sp_rtx
8740 = gen_rtx_SET (stack_pointer_rtx,
8741 plus_constant (Pmode, stack_pointer_rtx, saved_size));
8742 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx, dwarf);
8744 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8745 if (BITSET_P (mask, regno - GP_REG_FIRST))
8747 reg = gen_rtx_REG (Pmode, regno);
8748 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
8751 return dwarf;
8754 static rtx
8755 riscv_adjust_libcall_cfi_epilogue ()
8757 rtx dwarf = NULL_RTX;
8758 rtx adjust_sp_rtx, reg;
8759 int saved_size = cfun->machine->frame.save_libcall_adjustment;
8761 /* Debug info for adjust sp. */
8762 adjust_sp_rtx =
8763 gen_rtx_SET (stack_pointer_rtx,
8764 gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (saved_size)));
8765 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
8766 dwarf);
8768 for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
8769 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
8771 reg = gen_rtx_REG (Pmode, regno);
8772 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
8775 return dwarf;
8778 static void
8779 riscv_gen_multi_pop_insn (bool use_multi_pop_normal, unsigned mask,
8780 unsigned multipop_size)
8782 rtx insn;
8783 unsigned regs_count = riscv_multi_push_regs_count (mask);
8785 if (!use_multi_pop_normal)
8786 insn = emit_insn (
8787 riscv_gen_multi_push_pop_insn (POP_IDX, multipop_size, regs_count));
8788 else
8789 insn = emit_jump_insn (
8790 riscv_gen_multi_push_pop_insn (POPRET_IDX, multipop_size, regs_count));
8792 rtx dwarf = riscv_adjust_multi_pop_cfi_epilogue (multipop_size);
8793 RTX_FRAME_RELATED_P (insn) = 1;
8794 REG_NOTES (insn) = dwarf;
8797 /* Expand an "epilogue", "sibcall_epilogue", or "eh_return_internal" pattern;
8798 style says which. */
8800 void
8801 riscv_expand_epilogue (int style)
8803 /* Split the frame into 3 steps. STEP1 is the amount of stack we should
8804 deallocate before restoring the registers. STEP2 is the amount we
8805 should deallocate afterwards including the callee saved regs. STEP3
8806 is the amount deallocated by save-restore libcall.
8808 Start off by assuming that no registers need to be restored. */
8809 struct riscv_frame_info *frame = &cfun->machine->frame;
8810 unsigned mask = frame->mask;
8811 unsigned fmask = frame->fmask;
8812 unsigned mask_fprs_push = 0;
8813 poly_int64 step2 = 0;
8814 bool use_multi_pop_normal
8815 = ((style == NORMAL_RETURN) && riscv_use_multi_push (frame));
8816 bool use_multi_pop_sibcall
8817 = ((style == SIBCALL_RETURN) && riscv_use_multi_push (frame));
8818 bool use_multi_pop = use_multi_pop_normal || use_multi_pop_sibcall;
8820 bool use_restore_libcall
8821 = !use_multi_pop
8822 && ((style == NORMAL_RETURN) && riscv_use_save_libcall (frame));
8823 unsigned libcall_size = use_restore_libcall && !use_multi_pop
8824 ? frame->save_libcall_adjustment
8825 : 0;
8826 unsigned multipop_size
8827 = use_multi_pop ? frame->multi_push_adj_base + frame->multi_push_adj_addi
8828 : 0;
8829 rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
8830 unsigned th_int_mask = 0;
8831 rtx insn;
8833 /* We need to add memory barrier to prevent read from deallocated stack. */
8834 bool need_barrier_p = known_ne (get_frame_size ()
8835 + cfun->machine->frame.arg_pointer_offset, 0);
8837 if (cfun->machine->naked_p)
8839 gcc_assert (style == NORMAL_RETURN);
8841 emit_jump_insn (gen_return ());
8843 return;
8846 if ((style == NORMAL_RETURN) && riscv_can_use_return_insn ())
8848 emit_jump_insn (gen_return ());
8849 return;
8852 /* Reset the epilogue cfa info before starting to emit the epilogue. */
8853 epilogue_cfa_sp_offset = 0;
8855 /* Move past any dynamic stack allocations. */
8856 if (cfun->calls_alloca)
8858 /* Emit a barrier to prevent loads from a deallocated stack. */
8859 riscv_emit_stack_tie (hard_frame_pointer_rtx);
8860 need_barrier_p = false;
8862 poly_int64 adjust_offset = -frame->hard_frame_pointer_offset;
8863 rtx dwarf_adj = gen_int_mode (adjust_offset, Pmode);
8864 rtx adjust = NULL_RTX;
8865 bool sum_of_two_s12 = false;
8866 HOST_WIDE_INT one, two;
8868 if (!adjust_offset.is_constant ())
8870 rtx tmp1 = RISCV_PROLOGUE_TEMP (Pmode);
8871 rtx tmp2 = RISCV_PROLOGUE_TEMP2 (Pmode);
8872 riscv_legitimize_poly_move (Pmode, tmp1, tmp2,
8873 gen_int_mode (adjust_offset, Pmode));
8874 adjust = tmp1;
8876 else
8878 HOST_WIDE_INT adj_off_value = adjust_offset.to_constant ();
8879 if (SMALL_OPERAND (adj_off_value))
8881 adjust = GEN_INT (adj_off_value);
8883 else if (SUM_OF_TWO_S12_ALGN (adj_off_value))
8885 riscv_split_sum_of_two_s12 (adj_off_value, &one, &two);
8886 dwarf_adj = adjust = GEN_INT (one);
8887 sum_of_two_s12 = true;
8889 else
8891 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode),
8892 GEN_INT (adj_off_value));
8893 adjust = RISCV_PROLOGUE_TEMP (Pmode);
8897 insn = emit_insn (
8898 gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx,
8899 adjust));
8901 rtx dwarf = NULL_RTX;
8902 rtx cfa_adjust_value = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx,
8903 dwarf_adj);
8904 rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
8905 dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
8907 RTX_FRAME_RELATED_P (insn) = 1;
8909 REG_NOTES (insn) = dwarf;
8911 if (sum_of_two_s12)
8913 insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
8914 GEN_INT (two)));
8915 RTX_FRAME_RELATED_P (insn) = 1;
8919 if (use_restore_libcall || use_multi_pop)
8920 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
8922 /* If we need to restore registers, deallocate as much stack as
8923 possible in the second step without going out of range. */
8924 if (use_multi_pop)
8926 if (frame->fmask
8927 && known_gt (frame->total_size - multipop_size,
8928 frame->frame_pointer_offset))
8929 step2
8930 = riscv_first_stack_step (frame, frame->total_size - multipop_size);
8932 else if ((frame->mask | frame->fmask) != 0)
8933 step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size);
8935 if (use_restore_libcall || use_multi_pop)
8936 frame->mask = mask; /* Undo the above fib. */
8938 poly_int64 step1;
8939 /* STEP1 must be set to the bottom of vector registers save area if any
8940 vector registers need be preserved. */
8941 if (frame->vmask != 0)
8943 step1 = frame->v_sp_offset_bottom;
8944 step2 = frame->total_size - step1 - libcall_size - multipop_size;
8946 else
8947 step1 = frame->total_size - step2 - libcall_size - multipop_size;
8949 /* Set TARGET to BASE + STEP1. */
8950 if (known_gt (step1, 0))
8952 /* Emit a barrier to prevent loads from a deallocated stack. */
8953 riscv_emit_stack_tie (hard_frame_pointer_rtx);
8954 need_barrier_p = false;
8956 /* Restore the scalable frame which is assigned in prologue. */
8957 if (!step1.is_constant ())
8959 poly_int64 scalable_frame = step1;
8960 scalable_frame.coeffs[0] = step1.coeffs[1];
8961 riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame,
8962 true);
8963 step1 -= scalable_frame;
8966 /* Get an rtx for STEP1 that we can add to BASE.
8967 Skip if adjust equal to zero. */
8968 HOST_WIDE_INT step1_value = step1.to_constant ();
8969 if (step1_value != 0)
8971 rtx adjust = GEN_INT (step1_value);
8972 if (SUM_OF_TWO_S12_ALGN (step1_value))
8974 HOST_WIDE_INT one, two;
8975 riscv_split_sum_of_two_s12 (step1_value, &one, &two);
8976 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
8977 stack_pointer_rtx,
8978 GEN_INT (one)));
8979 RTX_FRAME_RELATED_P (insn) = 1;
8980 adjust = GEN_INT (two);
8982 else if (!SMALL_OPERAND (step1_value))
8984 riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust);
8985 adjust = RISCV_PROLOGUE_TEMP (Pmode);
8988 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
8989 stack_pointer_rtx,
8990 adjust));
8991 rtx dwarf = NULL_RTX;
8992 rtx cfa_adjust_rtx
8993 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8994 gen_int_mode (step2 + libcall_size + multipop_size,
8995 Pmode));
8997 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
8998 RTX_FRAME_RELATED_P (insn) = 1;
9000 REG_NOTES (insn) = dwarf;
9003 else if (frame_pointer_needed)
9005 /* Tell riscv_restore_reg to emit dwarf to redefine CFA when restoring
9006 old value of FP. */
9007 epilogue_cfa_sp_offset = step2;
9010 if (use_multi_pop)
9012 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
9013 if (fmask)
9015 mask_fprs_push = get_multi_push_fpr_mask (frame->multi_push_adj_addi
9016 / UNITS_PER_WORD);
9017 frame->fmask &= ~mask_fprs_push; /* FPRs not saved by cm.push */
9020 else if (use_restore_libcall)
9021 frame->mask = 0; /* Temporarily fib that we need not restore GPRs. */
9023 th_int_mask = th_int_get_mask (frame->mask);
9024 if (th_int_mask && TH_INT_INTERRUPT (cfun))
9026 frame->mask &= ~th_int_mask;
9028 /* RISCV_PROLOGUE_TEMP may be used to handle some CSR for
9029 interrupts, such as fcsr. */
9030 if ((TARGET_HARD_FLOAT && frame->fmask)
9031 || (TARGET_ZFINX && frame->mask))
9032 frame->mask |= (1 << RISCV_PROLOGUE_TEMP_REGNUM);
9035 /* Restore the registers. */
9036 riscv_for_each_saved_v_reg (step2, riscv_restore_reg, false);
9037 riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size
9038 - multipop_size,
9039 riscv_restore_reg, true, style == EXCEPTION_RETURN);
9041 if (th_int_mask && TH_INT_INTERRUPT (cfun))
9043 frame->mask = mask; /* Undo the above fib. */
9044 unsigned save_adjustment = th_int_get_save_adjustment ();
9045 gcc_assert (step2.to_constant () >= save_adjustment);
9046 step2 -= save_adjustment;
9049 if (use_restore_libcall)
9050 frame->mask = mask; /* Undo the above fib. */
9052 if (need_barrier_p)
9053 riscv_emit_stack_tie (hard_frame_pointer_rtx);
9055 /* Deallocate the final bit of the frame. */
9056 if (step2.to_constant () > 0)
9058 insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
9059 GEN_INT (step2.to_constant ())));
9061 rtx dwarf = NULL_RTX;
9062 rtx cfa_adjust_rtx
9063 = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9064 GEN_INT (libcall_size + multipop_size));
9065 dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
9066 RTX_FRAME_RELATED_P (insn) = 1;
9068 REG_NOTES (insn) = dwarf;
9071 if (use_multi_pop)
9073 /* restore FPRs pushed by cm.push. */
9074 frame->fmask = fmask & mask_fprs_push;
9075 if (frame->fmask)
9076 riscv_for_each_saved_reg (frame->total_size - libcall_size
9077 - multipop_size,
9078 riscv_restore_reg, true,
9079 style == EXCEPTION_RETURN);
9080 /* Undo the above fib. */
9081 frame->mask = mask;
9082 frame->fmask = fmask;
9083 riscv_gen_multi_pop_insn (use_multi_pop_normal, frame->mask,
9084 multipop_size);
9085 if (use_multi_pop_normal)
9086 return;
9088 else if (use_restore_libcall)
9090 rtx dwarf = riscv_adjust_libcall_cfi_epilogue ();
9091 insn = emit_insn (gen_gpr_restore (GEN_INT (riscv_save_libcall_count (mask))));
9092 RTX_FRAME_RELATED_P (insn) = 1;
9093 REG_NOTES (insn) = dwarf;
9095 emit_jump_insn (gen_gpr_restore_return (ra));
9096 return;
9099 /* Add in the __builtin_eh_return stack adjustment. */
9100 if ((style == EXCEPTION_RETURN) && crtl->calls_eh_return)
9101 emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
9102 EH_RETURN_STACKADJ_RTX));
9104 /* Return from interrupt. */
9105 if (cfun->machine->interrupt_handler_p)
9107 enum riscv_privilege_levels mode = cfun->machine->interrupt_mode;
9109 gcc_assert (mode != UNKNOWN_MODE);
9111 if (th_int_mask && TH_INT_INTERRUPT (cfun))
9112 emit_jump_insn (gen_th_int_pop ());
9113 else if (mode == MACHINE_MODE)
9114 emit_jump_insn (gen_riscv_mret ());
9115 else if (mode == SUPERVISOR_MODE)
9116 emit_jump_insn (gen_riscv_sret ());
9117 else
9118 emit_jump_insn (gen_riscv_uret ());
9120 else if (style != SIBCALL_RETURN)
9121 emit_jump_insn (gen_simple_return_internal (ra));
9124 /* Implement EPILOGUE_USES. */
9126 bool
9127 riscv_epilogue_uses (unsigned int regno)
9129 if (regno == RETURN_ADDR_REGNUM)
9130 return true;
9132 if (epilogue_completed && cfun->machine->interrupt_handler_p)
9134 /* An interrupt function restores temp regs, so we must indicate that
9135 they are live at function end. */
9136 if (df_regs_ever_live_p (regno)
9137 || (!crtl->is_leaf && call_used_or_fixed_reg_p (regno)))
9138 return true;
9141 return false;
9144 static bool
9145 riscv_avoid_shrink_wrapping_separate ()
9147 if (riscv_use_save_libcall (&cfun->machine->frame)
9148 || cfun->machine->interrupt_handler_p
9149 || !cfun->machine->frame.gp_sp_offset.is_constant ())
9150 return true;
9152 return false;
9155 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
9157 static sbitmap
9158 riscv_get_separate_components (void)
9160 HOST_WIDE_INT offset;
9161 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
9162 bitmap_clear (components);
9164 if (riscv_avoid_shrink_wrapping_separate ())
9165 return components;
9167 offset = cfun->machine->frame.gp_sp_offset.to_constant ();
9168 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
9169 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
9171 /* We can only wrap registers that have small operand offsets.
9172 For large offsets a pseudo register might be needed which
9173 cannot be created during the shrink wrapping pass. */
9174 if (SMALL_OPERAND (offset))
9175 bitmap_set_bit (components, regno);
9177 offset -= UNITS_PER_WORD;
9180 offset = cfun->machine->frame.fp_sp_offset.to_constant ();
9181 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
9182 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
9184 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
9186 /* We can only wrap registers that have small operand offsets.
9187 For large offsets a pseudo register might be needed which
9188 cannot be created during the shrink wrapping pass. */
9189 if (SMALL_OPERAND (offset))
9190 bitmap_set_bit (components, regno);
9192 offset -= GET_MODE_SIZE (mode).to_constant ();
9195 /* Don't mess with the hard frame pointer. */
9196 if (frame_pointer_needed)
9197 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
9199 bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
9201 return components;
9204 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
9206 static sbitmap
9207 riscv_components_for_bb (basic_block bb)
9209 bitmap in = DF_LIVE_IN (bb);
9210 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
9211 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
9213 sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
9214 bitmap_clear (components);
9216 function_abi_aggregator callee_abis;
9217 rtx_insn *insn;
9218 FOR_BB_INSNS (bb, insn)
9219 if (CALL_P (insn))
9220 callee_abis.note_callee_abi (insn_callee_abi (insn));
9221 HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
9223 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
9224 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
9225 if (!fixed_regs[regno]
9226 && !crtl->abi->clobbers_full_reg_p (regno)
9227 && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
9228 || bitmap_bit_p (in, regno)
9229 || bitmap_bit_p (gen, regno)
9230 || bitmap_bit_p (kill, regno)))
9231 bitmap_set_bit (components, regno);
9233 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
9234 if (!fixed_regs[regno]
9235 && !crtl->abi->clobbers_full_reg_p (regno)
9236 && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
9237 || bitmap_bit_p (in, regno)
9238 || bitmap_bit_p (gen, regno)
9239 || bitmap_bit_p (kill, regno)))
9240 bitmap_set_bit (components, regno);
9242 return components;
9245 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
9247 static void
9248 riscv_disqualify_components (sbitmap, edge, sbitmap, bool)
9250 /* Nothing to do for riscv. */
9253 static void
9254 riscv_process_components (sbitmap components, bool prologue_p)
9256 HOST_WIDE_INT offset;
9257 riscv_save_restore_fn fn = prologue_p? riscv_save_reg : riscv_restore_reg;
9259 offset = cfun->machine->frame.gp_sp_offset.to_constant ();
9260 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
9261 if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
9263 if (bitmap_bit_p (components, regno))
9264 riscv_save_restore_reg (word_mode, regno, offset, fn);
9266 offset -= UNITS_PER_WORD;
9269 offset = cfun->machine->frame.fp_sp_offset.to_constant ();
9270 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
9271 if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
9273 machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
9275 if (bitmap_bit_p (components, regno))
9276 riscv_save_restore_reg (mode, regno, offset, fn);
9278 offset -= GET_MODE_SIZE (mode).to_constant ();
9282 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
9284 static void
9285 riscv_emit_prologue_components (sbitmap components)
9287 riscv_process_components (components, true);
9290 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
9292 static void
9293 riscv_emit_epilogue_components (sbitmap components)
9295 riscv_process_components (components, false);
9298 static void
9299 riscv_set_handled_components (sbitmap components)
9301 for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
9302 if (bitmap_bit_p (components, regno))
9303 cfun->machine->reg_is_wrapped_separately[regno] = true;
9305 for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
9306 if (bitmap_bit_p (components, regno))
9307 cfun->machine->reg_is_wrapped_separately[regno] = true;
9310 /* Return nonzero if this function is known to have a null epilogue.
9311 This allows the optimizer to omit jumps to jumps if no stack
9312 was created. */
9314 bool
9315 riscv_can_use_return_insn (void)
9317 return (reload_completed && known_eq (cfun->machine->frame.total_size, 0)
9318 && ! cfun->machine->interrupt_handler_p);
9321 /* Given that there exists at least one variable that is set (produced)
9322 by OUT_INSN and read (consumed) by IN_INSN, return true iff
9323 IN_INSN represents one or more memory store operations and none of
9324 the variables set by OUT_INSN is used by IN_INSN as the address of a
9325 store operation. If either IN_INSN or OUT_INSN does not represent
9326 a "single" RTL SET expression (as loosely defined by the
9327 implementation of the single_set function) or a PARALLEL with only
9328 SETs, CLOBBERs, and USEs inside, this function returns false.
9330 Borrowed from rs6000, riscv_store_data_bypass_p checks for certain
9331 conditions that result in assertion failures in the generic
9332 store_data_bypass_p function and returns FALSE in such cases.
9334 This is required to make -msave-restore work with the sifive-7
9335 pipeline description. */
9337 bool
9338 riscv_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
9340 rtx out_set, in_set;
9341 rtx out_pat, in_pat;
9342 rtx out_exp, in_exp;
9343 int i, j;
9345 in_set = single_set (in_insn);
9346 if (in_set)
9348 if (MEM_P (SET_DEST (in_set)))
9350 out_set = single_set (out_insn);
9351 if (!out_set)
9353 out_pat = PATTERN (out_insn);
9354 if (GET_CODE (out_pat) == PARALLEL)
9356 for (i = 0; i < XVECLEN (out_pat, 0); i++)
9358 out_exp = XVECEXP (out_pat, 0, i);
9359 if ((GET_CODE (out_exp) == CLOBBER)
9360 || (GET_CODE (out_exp) == USE))
9361 continue;
9362 else if (GET_CODE (out_exp) != SET)
9363 return false;
9369 else
9371 in_pat = PATTERN (in_insn);
9372 if (GET_CODE (in_pat) != PARALLEL)
9373 return false;
9375 for (i = 0; i < XVECLEN (in_pat, 0); i++)
9377 in_exp = XVECEXP (in_pat, 0, i);
9378 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
9379 continue;
9380 else if (GET_CODE (in_exp) != SET)
9381 return false;
9383 if (MEM_P (SET_DEST (in_exp)))
9385 out_set = single_set (out_insn);
9386 if (!out_set)
9388 out_pat = PATTERN (out_insn);
9389 if (GET_CODE (out_pat) != PARALLEL)
9390 return false;
9391 for (j = 0; j < XVECLEN (out_pat, 0); j++)
9393 out_exp = XVECEXP (out_pat, 0, j);
9394 if ((GET_CODE (out_exp) == CLOBBER)
9395 || (GET_CODE (out_exp) == USE))
9396 continue;
9397 else if (GET_CODE (out_exp) != SET)
9398 return false;
9405 return store_data_bypass_p (out_insn, in_insn);
9408 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
9410 When floating-point registers are wider than integer ones, moves between
9411 them must go through memory. */
9413 static bool
9414 riscv_secondary_memory_needed (machine_mode mode, reg_class_t class1,
9415 reg_class_t class2)
9417 return (!riscv_v_ext_mode_p (mode)
9418 && GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
9419 && (class1 == FP_REGS) != (class2 == FP_REGS)
9420 && !TARGET_XTHEADFMV
9421 && !TARGET_ZFA);
9424 /* Implement TARGET_REGISTER_MOVE_COST. */
9426 static int
9427 riscv_register_move_cost (machine_mode mode,
9428 reg_class_t from, reg_class_t to)
9430 if ((from == FP_REGS && to == GR_REGS) ||
9431 (from == GR_REGS && to == FP_REGS))
9432 return tune_param->fmv_cost;
9434 return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2;
9437 /* Implement TARGET_HARD_REGNO_NREGS. */
9439 static unsigned int
9440 riscv_hard_regno_nregs (unsigned int regno, machine_mode mode)
9442 if (riscv_v_ext_vector_mode_p (mode))
9444 /* Handle fractional LMUL, it only occupy part of vector register but
9445 still need one vector register to hold. */
9446 if (maybe_lt (GET_MODE_SIZE (mode), UNITS_PER_V_REG))
9447 return 1;
9449 return exact_div (GET_MODE_SIZE (mode), UNITS_PER_V_REG).to_constant ();
9452 /* For tuple modes, the number of register = NF * LMUL. */
9453 if (riscv_v_ext_tuple_mode_p (mode))
9455 unsigned int nf = riscv_vector::get_nf (mode);
9456 machine_mode subpart_mode = riscv_vector::get_subpart_mode (mode);
9457 poly_int64 size = GET_MODE_SIZE (subpart_mode);
9458 gcc_assert (known_eq (size * nf, GET_MODE_SIZE (mode)));
9459 if (maybe_lt (size, UNITS_PER_V_REG))
9460 return nf;
9461 else
9463 unsigned int lmul = exact_div (size, UNITS_PER_V_REG).to_constant ();
9464 return nf * lmul;
9468 /* For VLS modes, we allocate registers according to TARGET_MIN_VLEN. */
9469 if (riscv_v_ext_vls_mode_p (mode))
9471 int size = GET_MODE_BITSIZE (mode).to_constant ();
9472 if (size < TARGET_MIN_VLEN)
9473 return 1;
9474 else
9475 return size / TARGET_MIN_VLEN;
9478 /* mode for VL or VTYPE are just a marker, not holding value,
9479 so it always consume one register. */
9480 if (VTYPE_REG_P (regno) || VL_REG_P (regno) || VXRM_REG_P (regno)
9481 || FRM_REG_P (regno))
9482 return 1;
9484 /* Assume every valid non-vector mode fits in one vector register. */
9485 if (V_REG_P (regno))
9486 return 1;
9488 if (FP_REG_P (regno))
9489 return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG;
9491 /* All other registers are word-sized. */
9492 return (GET_MODE_SIZE (mode).to_constant () + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9495 /* Implement TARGET_HARD_REGNO_MODE_OK. */
9497 static bool
9498 riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
9500 unsigned int nregs = riscv_hard_regno_nregs (regno, mode);
9502 if (GP_REG_P (regno))
9504 if (riscv_v_ext_mode_p (mode))
9505 return false;
9507 if (!GP_REG_P (regno + nregs - 1))
9508 return false;
9510 else if (FP_REG_P (regno))
9512 if (riscv_v_ext_mode_p (mode))
9513 return false;
9515 if (!FP_REG_P (regno + nregs - 1))
9516 return false;
9518 if (GET_MODE_CLASS (mode) != MODE_FLOAT
9519 && GET_MODE_CLASS (mode) != MODE_COMPLEX_FLOAT)
9520 return false;
9522 /* Only use callee-saved registers if a potential callee is guaranteed
9523 to spill the requisite width. */
9524 if (GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_REG
9525 || (!call_used_or_fixed_reg_p (regno)
9526 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_ARG))
9527 return false;
9529 else if (V_REG_P (regno))
9531 if (!riscv_v_ext_mode_p (mode))
9532 return false;
9534 if (!V_REG_P (regno + nregs - 1))
9535 return false;
9537 int regno_alignment = riscv_get_v_regno_alignment (mode);
9538 if (regno_alignment != 1)
9539 return ((regno % regno_alignment) == 0);
9541 else if (VTYPE_REG_P (regno) || VL_REG_P (regno) || VXRM_REG_P (regno)
9542 || FRM_REG_P (regno))
9543 return true;
9544 else
9545 return false;
9547 /* Require same callee-savedness for all registers. */
9548 for (unsigned i = 1; i < nregs; i++)
9549 if (call_used_or_fixed_reg_p (regno)
9550 != call_used_or_fixed_reg_p (regno + i))
9551 return false;
9553 /* Only use even registers in RV32 ZDINX */
9554 if (!TARGET_64BIT && TARGET_ZDINX){
9555 if (GET_MODE_CLASS (mode) == MODE_FLOAT &&
9556 GET_MODE_UNIT_SIZE (mode) == GET_MODE_SIZE (DFmode))
9557 return !(regno & 1);
9560 return true;
9563 /* Implement TARGET_MODES_TIEABLE_P.
9565 Don't allow floating-point modes to be tied, since type punning of
9566 single-precision and double-precision is implementation defined. */
9568 static bool
9569 riscv_modes_tieable_p (machine_mode mode1, machine_mode mode2)
9571 /* We don't allow different REG_CLASS modes tieable since it
9572 will cause ICE in register allocation (RA).
9573 E.g. V2SI and DI are not tieable. */
9574 if (riscv_v_ext_mode_p (mode1) != riscv_v_ext_mode_p (mode2))
9575 return false;
9576 return (mode1 == mode2
9577 || !(GET_MODE_CLASS (mode1) == MODE_FLOAT
9578 && GET_MODE_CLASS (mode2) == MODE_FLOAT));
9581 /* Implement TARGET_CLASS_MAX_NREGS. */
9583 static unsigned char
9584 riscv_class_max_nregs (reg_class_t rclass, machine_mode mode)
9586 if (reg_class_subset_p (rclass, FP_REGS))
9587 return riscv_hard_regno_nregs (FP_REG_FIRST, mode);
9589 if (reg_class_subset_p (rclass, GR_REGS))
9590 return riscv_hard_regno_nregs (GP_REG_FIRST, mode);
9592 if (reg_class_subset_p (rclass, V_REGS))
9593 return riscv_hard_regno_nregs (V_REG_FIRST, mode);
9595 return 0;
9598 /* Implement TARGET_MEMORY_MOVE_COST. */
9600 static int
9601 riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
9603 return (tune_param->memory_cost
9604 + memory_move_secondary_cost (mode, rclass, in));
9607 /* Return the number of instructions that can be issued per cycle. */
9609 static int
9610 riscv_issue_rate (void)
9612 return tune_param->issue_rate;
9615 /* Implement TARGET_SCHED_VARIABLE_ISSUE. */
9616 static int
9617 riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
9619 if (DEBUG_INSN_P (insn))
9620 return more;
9622 rtx_code code = GET_CODE (PATTERN (insn));
9623 if (code == USE || code == CLOBBER)
9624 return more;
9626 /* GHOST insns are used for blockage and similar cases which
9627 effectively end a cycle. */
9628 if (get_attr_type (insn) == TYPE_GHOST)
9629 return 0;
9631 /* If we ever encounter an insn with an unknown type, trip
9632 an assert so we can find and fix this problem. */
9633 gcc_assert (get_attr_type (insn) != TYPE_UNKNOWN);
9635 /* If we ever encounter an insn without an insn reservation, trip
9636 an assert so we can find and fix this problem. */
9637 gcc_assert (insn_has_dfa_reservation_p (insn));
9639 return more - 1;
9642 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
9643 instruction fusion of some sort. */
9645 static bool
9646 riscv_macro_fusion_p (void)
9648 return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
9651 /* Return true iff the instruction fusion described by OP is enabled. */
9653 static bool
9654 riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
9656 return tune_param->fusible_ops & op;
9659 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
9660 should be kept together during scheduling. */
9662 static bool
9663 riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
9665 rtx prev_set = single_set (prev);
9666 rtx curr_set = single_set (curr);
9667 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
9668 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
9670 if (!riscv_macro_fusion_p ())
9671 return false;
9673 if (simple_sets_p
9674 && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW)
9675 || riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS)))
9677 /* We are trying to match the following:
9678 prev (slli) == (set (reg:DI rD)
9679 (ashift:DI (reg:DI rS) (const_int 32)))
9680 curr (slri) == (set (reg:DI rD)
9681 (lshiftrt:DI (reg:DI rD) (const_int <shift>)))
9682 with <shift> being either 32 for FUSE_ZEXTW, or
9683 `less than 32 for FUSE_ZEXTWS. */
9685 if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
9686 && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
9687 && REG_P (SET_DEST (prev_set))
9688 && REG_P (SET_DEST (curr_set))
9689 && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
9690 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
9691 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
9692 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
9693 && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
9694 && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
9695 && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
9696 || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
9697 && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
9698 return true;
9701 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
9703 /* We are trying to match the following:
9704 prev (slli) == (set (reg:DI rD)
9705 (ashift:DI (reg:DI rS) (const_int 48)))
9706 curr (slri) == (set (reg:DI rD)
9707 (lshiftrt:DI (reg:DI rD) (const_int 48))) */
9709 if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
9710 && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
9711 && REG_P (SET_DEST (prev_set))
9712 && REG_P (SET_DEST (curr_set))
9713 && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
9714 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
9715 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
9716 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
9717 && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
9718 && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
9719 return true;
9722 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
9724 /* We are trying to match the following:
9725 prev (add) == (set (reg:DI rD)
9726 (plus:DI (reg:DI rS1) (reg:DI rS2))
9727 curr (ld) == (set (reg:DI rD)
9728 (mem:DI (reg:DI rD))) */
9730 if (MEM_P (SET_SRC (curr_set))
9731 && REG_P (XEXP (SET_SRC (curr_set), 0))
9732 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
9733 && GET_CODE (SET_SRC (prev_set)) == PLUS
9734 && REG_P (XEXP (SET_SRC (prev_set), 0))
9735 && REG_P (XEXP (SET_SRC (prev_set), 1)))
9736 return true;
9738 /* We are trying to match the following:
9739 prev (add) == (set (reg:DI rD)
9740 (plus:DI (reg:DI rS1) (reg:DI rS2)))
9741 curr (lw) == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
9743 if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
9744 || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
9745 && MEM_P (XEXP (SET_SRC (curr_set), 0))
9746 && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
9747 && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set))
9748 && GET_CODE (SET_SRC (prev_set)) == PLUS
9749 && REG_P (XEXP (SET_SRC (prev_set), 0))
9750 && REG_P (XEXP (SET_SRC (prev_set), 1)))
9751 return true;
9754 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDPREINCREMENT))
9756 /* We are trying to match the following:
9757 prev (add) == (set (reg:DI rS)
9758 (plus:DI (reg:DI rS) (const_int))
9759 curr (ld) == (set (reg:DI rD)
9760 (mem:DI (reg:DI rS))) */
9762 if (MEM_P (SET_SRC (curr_set))
9763 && REG_P (XEXP (SET_SRC (curr_set), 0))
9764 && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
9765 && GET_CODE (SET_SRC (prev_set)) == PLUS
9766 && REG_P (XEXP (SET_SRC (prev_set), 0))
9767 && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)))
9768 return true;
9771 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
9773 /* We are trying to match the following:
9774 prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20))
9775 curr (addi) == (set (reg:DI rD)
9776 (plus:DI (reg:DI rD) (const_int IMM12))) */
9778 if ((GET_CODE (SET_SRC (curr_set)) == LO_SUM
9779 || (GET_CODE (SET_SRC (curr_set)) == PLUS
9780 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
9781 && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))))
9782 && (GET_CODE (SET_SRC (prev_set)) == HIGH
9783 || (CONST_INT_P (SET_SRC (prev_set))
9784 && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))))
9785 return true;
9788 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
9790 /* We are trying to match the following:
9791 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
9792 curr (addi) == (set (reg:DI rD)
9793 (plus:DI (reg:DI rD) (const_int IMM12)))
9795 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
9796 curr (addi) == (set (reg:DI rD)
9797 (lo_sum:DI (reg:DI rD) (const_int IMM12))) */
9799 if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
9800 && XINT (SET_SRC (prev_set), 1) == UNSPEC_AUIPC
9801 && (GET_CODE (SET_SRC (curr_set)) == LO_SUM
9802 || (GET_CODE (SET_SRC (curr_set)) == PLUS
9803 && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
9804 && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))))
9806 return true;
9809 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
9811 /* We are trying to match the following:
9812 prev (lui) == (set (reg:DI rD) (const_int UPPER_IMM_20))
9813 curr (ld) == (set (reg:DI rD)
9814 (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
9816 if (CONST_INT_P (SET_SRC (prev_set))
9817 && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
9818 && MEM_P (SET_SRC (curr_set))
9819 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
9820 return true;
9822 if (GET_CODE (SET_SRC (prev_set)) == HIGH
9823 && MEM_P (SET_SRC (curr_set))
9824 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == LO_SUM
9825 && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
9826 return true;
9828 if (GET_CODE (SET_SRC (prev_set)) == HIGH
9829 && (GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
9830 || GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND)
9831 && MEM_P (XEXP (SET_SRC (curr_set), 0))
9832 && (GET_CODE (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == LO_SUM
9833 && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0), 0))))
9834 return true;
9837 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
9839 /* We are trying to match the following:
9840 prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
9841 curr (ld) == (set (reg:DI rD)
9842 (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
9844 if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
9845 && XINT (prev_set, 1) == UNSPEC_AUIPC
9846 && MEM_P (SET_SRC (curr_set))
9847 && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
9848 return true;
9851 if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ALIGNED_STD))
9853 /* We are trying to match the following:
9854 prev (sd) == (set (mem (plus (reg sp|fp) (const_int)))
9855 (reg rS1))
9856 curr (sd) == (set (mem (plus (reg sp|fp) (const_int)))
9857 (reg rS2)) */
9859 if (MEM_P (SET_DEST (prev_set))
9860 && MEM_P (SET_DEST (curr_set))
9861 /* We can probably relax this condition. The documentation is a bit
9862 unclear about sub-word cases. So we just model DImode for now. */
9863 && GET_MODE (SET_DEST (curr_set)) == DImode
9864 && GET_MODE (SET_DEST (prev_set)) == DImode)
9866 rtx base_prev, base_curr, offset_prev, offset_curr;
9868 extract_base_offset_in_addr (SET_DEST (prev_set), &base_prev, &offset_prev);
9869 extract_base_offset_in_addr (SET_DEST (curr_set), &base_curr, &offset_curr);
9871 /* Fail if we did not find both bases. */
9872 if (base_prev == NULL_RTX || base_curr == NULL_RTX)
9873 return false;
9875 /* Fail if either base is not a register. */
9876 if (!REG_P (base_prev) || !REG_P (base_curr))
9877 return false;
9879 /* Fail if the bases are not the same register. */
9880 if (REGNO (base_prev) != REGNO (base_curr))
9881 return false;
9883 /* Originally the thought was to check MEM_ALIGN, but that was
9884 reporting incorrect alignments, even for SP/FP accesses, so we
9885 gave up on that approach. Instead just check for stack/hfp
9886 which we know are aligned. */
9887 if (REGNO (base_prev) != STACK_POINTER_REGNUM
9888 && REGNO (base_prev) != HARD_FRAME_POINTER_REGNUM)
9889 return false;
9891 /* The two stores must be contained within opposite halves of the
9892 same 16 byte aligned block of memory. We know that the stack
9893 pointer and the frame pointer have suitable alignment. So we
9894 just need to check the offsets of the two stores for suitable
9895 alignment. */
9896 /* Get the smaller offset into OFFSET_PREV. */
9897 if (INTVAL (offset_prev) > INTVAL (offset_curr))
9898 std::swap (offset_prev, offset_curr);
9900 /* If the smaller offset (OFFSET_PREV) is not 16 byte aligned,
9901 then fail. */
9902 if ((INTVAL (offset_prev) % 16) != 0)
9903 return false;
9905 /* The higher offset must be 8 bytes more than the lower
9906 offset. */
9907 return (INTVAL (offset_prev) + 8 == INTVAL (offset_curr));
9911 return false;
9914 /* Adjust the cost/latency of instructions for scheduling.
9915 For now this is just used to change the latency of vector instructions
9916 according to their LMUL. We assume that an insn with LMUL == 8 requires
9917 eight times more execution cycles than the same insn with LMUL == 1.
9918 As this may cause very high latencies which lead to scheduling artifacts
9919 we currently only perform the adjustment when -madjust-lmul-cost is given.
9921 static int
9922 riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
9923 unsigned int)
9925 /* Only do adjustments for the generic out-of-order scheduling model. */
9926 if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo)
9927 return cost;
9929 if (recog_memoized (insn) < 0)
9930 return cost;
9932 enum attr_type type = get_attr_type (insn);
9934 if (type == TYPE_VFREDO || type == TYPE_VFWREDO)
9936 /* TODO: For ordered reductions scale the base cost relative to the
9937 number of units. */
9941 /* Don't do any LMUL-based latency adjustment unless explicitly asked to. */
9942 if (!TARGET_ADJUST_LMUL_COST)
9943 return cost;
9945 /* vsetvl has a vlmul attribute but its latency does not depend on it. */
9946 if (type == TYPE_VSETVL || type == TYPE_VSETVL_PRE)
9947 return cost;
9949 enum riscv_vector::vlmul_type lmul =
9950 (riscv_vector::vlmul_type)get_attr_vlmul (insn);
9952 double factor = 1;
9953 switch (lmul)
9955 case riscv_vector::LMUL_2:
9956 factor = 2;
9957 break;
9958 case riscv_vector::LMUL_4:
9959 factor = 4;
9960 break;
9961 case riscv_vector::LMUL_8:
9962 factor = 8;
9963 break;
9964 case riscv_vector::LMUL_F2:
9965 factor = 0.5;
9966 break;
9967 case riscv_vector::LMUL_F4:
9968 factor = 0.25;
9969 break;
9970 case riscv_vector::LMUL_F8:
9971 factor = 0.125;
9972 break;
9973 default:
9974 factor = 1;
9977 /* If the latency was nonzero, keep it that way. */
9978 int new_cost = MAX (cost > 0 ? 1 : 0, cost * factor);
9980 return new_cost;
9983 /* Auxiliary function to emit RISC-V ELF attribute. */
9984 static void
9985 riscv_emit_attribute ()
9987 fprintf (asm_out_file, "\t.attribute arch, \"%s\"\n",
9988 riscv_arch_str ().c_str ());
9990 fprintf (asm_out_file, "\t.attribute unaligned_access, %d\n",
9991 TARGET_STRICT_ALIGN ? 0 : 1);
9993 fprintf (asm_out_file, "\t.attribute stack_align, %d\n",
9994 riscv_stack_boundary / 8);
9997 /* Output .variant_cc for function symbol which follows vector calling
9998 convention. */
10000 static void
10001 riscv_asm_output_variant_cc (FILE *stream, const tree decl, const char *name)
10003 if (TREE_CODE (decl) == FUNCTION_DECL)
10005 riscv_cc cc = (riscv_cc) fndecl_abi (decl).id ();
10006 if (cc == RISCV_CC_V)
10008 fprintf (stream, "\t.variant_cc\t");
10009 assemble_name (stream, name);
10010 fprintf (stream, "\n");
10015 /* Implement ASM_DECLARE_FUNCTION_NAME. */
10017 void
10018 riscv_declare_function_name (FILE *stream, const char *name, tree fndecl)
10020 riscv_asm_output_variant_cc (stream, fndecl, name);
10021 ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
10022 ASM_OUTPUT_FUNCTION_LABEL (stream, name, fndecl);
10023 if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
10025 fprintf (stream, "\t.option push\n");
10027 struct cl_target_option *local_cl_target =
10028 TREE_TARGET_OPTION (DECL_FUNCTION_SPECIFIC_TARGET (fndecl));
10029 struct cl_target_option *global_cl_target =
10030 TREE_TARGET_OPTION (target_option_default_node);
10032 const char *local_arch_str = get_arch_str (local_cl_target);
10033 const char *arch_str = local_arch_str != NULL
10034 ? local_arch_str
10035 : riscv_arch_str (true).c_str ();
10036 fprintf (stream, "\t.option arch, %s\n", arch_str);
10037 const char *local_tune_str = get_tune_str (local_cl_target);
10038 const char *global_tune_str = get_tune_str (global_cl_target);
10039 if (strcmp (local_tune_str, global_tune_str) != 0)
10040 fprintf (stream, "\t# tune = %s\n", local_tune_str);
10044 void
10045 riscv_declare_function_size (FILE *stream, const char *name, tree fndecl)
10047 if (!flag_inhibit_size_directive)
10048 ASM_OUTPUT_MEASURED_SIZE (stream, name);
10050 if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
10052 fprintf (stream, "\t.option pop\n");
10056 /* Implement ASM_OUTPUT_DEF_FROM_DECLS. */
10058 void
10059 riscv_asm_output_alias (FILE *stream, const tree decl, const tree target)
10061 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
10062 const char *value = IDENTIFIER_POINTER (target);
10063 riscv_asm_output_variant_cc (stream, decl, name);
10064 ASM_OUTPUT_DEF (stream, name, value);
10067 /* Implement ASM_OUTPUT_EXTERNAL. */
10069 void
10070 riscv_asm_output_external (FILE *stream, tree decl, const char *name)
10072 default_elf_asm_output_external (stream, decl, name);
10073 riscv_asm_output_variant_cc (stream, decl, name);
10076 /* Implement TARGET_ASM_FILE_START. */
10078 static void
10079 riscv_file_start (void)
10081 default_file_start ();
10083 /* Instruct GAS to generate position-[in]dependent code. */
10084 fprintf (asm_out_file, "\t.option %spic\n", (flag_pic ? "" : "no"));
10086 /* If the user specifies "-mno-relax" on the command line then disable linker
10087 relaxation in the assembler. */
10088 if (! riscv_mrelax)
10089 fprintf (asm_out_file, "\t.option norelax\n");
10091 /* If the user specifies "-mcsr-check" on the command line then enable csr
10092 check in the assembler. */
10093 if (riscv_mcsr_check)
10094 fprintf (asm_out_file, "\t.option csr-check\n");
10096 if (riscv_emit_attribute_p)
10097 riscv_emit_attribute ();
10100 /* Implement TARGET_ASM_OUTPUT_MI_THUNK. Generate rtl rather than asm text
10101 in order to avoid duplicating too much logic from elsewhere. */
10103 static void
10104 riscv_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10105 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10106 tree function)
10108 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
10109 rtx this_rtx, temp1, temp2, fnaddr;
10110 rtx_insn *insn;
10112 riscv_in_thunk_func = true;
10114 /* Pretend to be a post-reload pass while generating rtl. */
10115 reload_completed = 1;
10117 /* Mark the end of the (empty) prologue. */
10118 emit_note (NOTE_INSN_PROLOGUE_END);
10120 /* Determine if we can use a sibcall to call FUNCTION directly. */
10121 fnaddr = gen_rtx_MEM (FUNCTION_MODE, XEXP (DECL_RTL (function), 0));
10123 /* We need two temporary registers in some cases. */
10124 temp1 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP_REGNUM);
10125 temp2 = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
10127 /* Find out which register contains the "this" pointer. */
10128 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10129 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1);
10130 else
10131 this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST);
10133 /* Add DELTA to THIS_RTX. */
10134 if (delta != 0)
10136 rtx offset = GEN_INT (delta);
10137 if (!SMALL_OPERAND (delta))
10139 riscv_emit_move (temp1, offset);
10140 offset = temp1;
10142 emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
10145 /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
10146 if (vcall_offset != 0)
10148 rtx addr;
10150 /* Set TEMP1 to *THIS_RTX. */
10151 riscv_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx));
10153 /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET. */
10154 addr = riscv_add_offset (temp2, temp1, vcall_offset);
10156 /* Load the offset and add it to THIS_RTX. */
10157 riscv_emit_move (temp1, gen_rtx_MEM (Pmode, addr));
10158 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
10161 /* Jump to the target function. */
10162 rtx callee_cc = gen_int_mode (fndecl_abi (function).id (), SImode);
10163 insn = emit_call_insn (gen_sibcall (fnaddr, const0_rtx, callee_cc));
10164 SIBLING_CALL_P (insn) = 1;
10166 /* Run just enough of rest_of_compilation. This sequence was
10167 "borrowed" from alpha.cc. */
10168 insn = get_insns ();
10169 split_all_insns_noflow ();
10170 shorten_branches (insn);
10171 assemble_start_function (thunk_fndecl, fnname);
10172 final_start_function (insn, file, 1);
10173 final (insn, file, 1);
10174 final_end_function ();
10175 assemble_end_function (thunk_fndecl, fnname);
10177 /* Clean up the vars set above. Note that final_end_function resets
10178 the global pointer for us. */
10179 reload_completed = 0;
10180 riscv_in_thunk_func = false;
10183 /* Allocate a chunk of memory for per-function machine-dependent data. */
10185 static struct machine_function *
10186 riscv_init_machine_status (void)
10188 return ggc_cleared_alloc<machine_function> ();
10191 /* Return the VLEN value associated with -march and -mrvv-vector-bits.
10192 TODO: So far we only support length-agnostic value. */
10193 static poly_uint16
10194 riscv_convert_vector_chunks (struct gcc_options *opts)
10196 int chunk_num;
10197 int min_vlen = TARGET_MIN_VLEN_OPTS (opts);
10198 if (min_vlen > 32)
10200 /* When targeting minimum VLEN > 32, we should use 64-bit chunk size.
10201 Otherwise we can not include SEW = 64bits.
10202 Runtime invariant: The single indeterminate represent the
10203 number of 64-bit chunks in a vector beyond minimum length of 64 bits.
10204 Thus the number of bytes in a vector is 8 + 8 * x1 which is
10205 riscv_vector_chunks * 8 = poly_int (8, 8). */
10206 riscv_bytes_per_vector_chunk = 8;
10207 /* Adjust BYTES_PER_RISCV_VECTOR according to TARGET_MIN_VLEN:
10208 - TARGET_MIN_VLEN = 64bit: [8,8]
10209 - TARGET_MIN_VLEN = 128bit: [16,16]
10210 - TARGET_MIN_VLEN = 256bit: [32,32]
10211 - TARGET_MIN_VLEN = 512bit: [64,64]
10212 - TARGET_MIN_VLEN = 1024bit: [128,128]
10213 - TARGET_MIN_VLEN = 2048bit: [256,256]
10214 - TARGET_MIN_VLEN = 4096bit: [512,512]
10215 FIXME: We currently DON'T support TARGET_MIN_VLEN > 4096bit. */
10216 chunk_num = min_vlen / 64;
10218 else
10220 /* When targeting minimum VLEN = 32, we should use 32-bit
10221 chunk size. Runtime invariant: The single indeterminate represent the
10222 number of 32-bit chunks in a vector beyond minimum length of 32 bits.
10223 Thus the number of bytes in a vector is 4 + 4 * x1 which is
10224 riscv_vector_chunks * 4 = poly_int (4, 4). */
10225 riscv_bytes_per_vector_chunk = 4;
10226 chunk_num = 1;
10229 /* Set riscv_vector_chunks as poly (1, 1) run-time constant if TARGET_VECTOR
10230 is enabled. Set riscv_vector_chunks as 1 compile-time constant if
10231 TARGET_VECTOR is disabled. riscv_vector_chunks is used in "riscv-modes.def"
10232 to set RVV mode size. The RVV machine modes size are run-time constant if
10233 TARGET_VECTOR is enabled. The RVV machine modes size remains default
10234 compile-time constant if TARGET_VECTOR is disabled. */
10235 if (TARGET_VECTOR_OPTS_P (opts))
10237 switch (opts->x_rvv_vector_bits)
10239 case RVV_VECTOR_BITS_SCALABLE:
10240 return poly_uint16 (chunk_num, chunk_num);
10241 case RVV_VECTOR_BITS_ZVL:
10242 return (int) min_vlen / (riscv_bytes_per_vector_chunk * 8);
10243 default:
10244 gcc_unreachable ();
10247 else
10248 return 1;
10251 /* 'Unpack' up the internal tuning structs and update the options
10252 in OPTS. The caller must have set up selected_tune and selected_arch
10253 as all the other target-specific codegen decisions are
10254 derived from them. */
10255 void
10256 riscv_override_options_internal (struct gcc_options *opts)
10258 const struct riscv_tune_info *cpu;
10260 /* The presence of the M extension implies that division instructions
10261 are present, so include them unless explicitly disabled. */
10262 if (TARGET_MUL_OPTS_P (opts) && (target_flags_explicit & MASK_DIV) == 0)
10263 opts->x_target_flags |= MASK_DIV;
10264 else if (!TARGET_MUL_OPTS_P (opts) && TARGET_DIV_OPTS_P (opts))
10265 error ("%<-mdiv%> requires %<-march%> to subsume the %<M%> extension");
10267 /* We might use a multiplication to calculate the scalable vector length at
10268 runtime. Therefore, require the M extension. */
10269 if (TARGET_VECTOR && !TARGET_MUL)
10270 sorry ("Currently the %<V%> implementation requires the %<M%> extension");
10272 /* Likewise floating-point division and square root. */
10273 if ((TARGET_HARD_FLOAT_OPTS_P (opts) || TARGET_ZFINX_OPTS_P (opts))
10274 && ((target_flags_explicit & MASK_FDIV) == 0))
10275 opts->x_target_flags |= MASK_FDIV;
10277 /* Handle -mtune, use -mcpu if -mtune is not given, and use default -mtune
10278 if both -mtune and -mcpu are not given. */
10279 const char *tune_string = get_tune_str (opts);
10280 cpu = riscv_parse_tune (tune_string, false);
10281 riscv_microarchitecture = cpu->microarchitecture;
10282 tune_param = opts->x_optimize_size
10283 ? &optimize_size_tune_info
10284 : cpu->tune_param;
10286 /* Use -mtune's setting for slow_unaligned_access, even when optimizing
10287 for size. For architectures that trap and emulate unaligned accesses,
10288 the performance cost is too great, even for -Os. Similarly, if
10289 -m[no-]strict-align is left unspecified, heed -mtune's advice. */
10290 riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access
10291 || TARGET_STRICT_ALIGN);
10293 /* By default, when -mno-vector-strict-align is not specified, do not allow
10294 unaligned vector memory accesses except if -mtune's setting explicitly
10295 allows it. */
10296 riscv_vector_unaligned_access_p = opts->x_rvv_vector_strict_align == 0
10297 || cpu->tune_param->vector_unaligned_access;
10299 /* Make a note if user explicitly passed -mstrict-align for later
10300 builtin macro generation. Can't use target_flags_explicitly since
10301 it is set even for -mno-strict-align. */
10302 riscv_user_wants_strict_align = TARGET_STRICT_ALIGN_OPTS_P (opts);
10304 if ((target_flags_explicit & MASK_STRICT_ALIGN) == 0
10305 && cpu->tune_param->slow_unaligned_access)
10306 opts->x_target_flags |= MASK_STRICT_ALIGN;
10308 /* If the user hasn't specified a branch cost, use the processor's
10309 default. */
10310 if (opts->x_riscv_branch_cost == 0)
10311 opts->x_riscv_branch_cost = tune_param->branch_cost;
10313 /* FIXME: We don't allow TARGET_MIN_VLEN > 4096 since the datatypes of
10314 both GET_MODE_SIZE and GET_MODE_BITSIZE are poly_uint16.
10316 We can only allow TARGET_MIN_VLEN * 8 (LMUL) < 65535. */
10317 if (TARGET_MIN_VLEN_OPTS (opts) > 4096)
10318 sorry ("Current RISC-V GCC does not support VLEN greater than 4096bit for "
10319 "'V' Extension");
10321 /* FIXME: We don't support RVV in big-endian for now, we may enable RVV with
10322 big-endian after finishing full coverage testing. */
10323 if (TARGET_VECTOR && TARGET_BIG_ENDIAN)
10324 sorry ("Current RISC-V GCC does not support RVV in big-endian mode");
10326 /* Convert -march and -mrvv-vector-bits to a chunks count. */
10327 riscv_vector_chunks = riscv_convert_vector_chunks (opts);
10330 /* Implement TARGET_OPTION_OVERRIDE. */
10332 void
10333 riscv_option_override (void)
10335 #ifdef SUBTARGET_OVERRIDE_OPTIONS
10336 SUBTARGET_OVERRIDE_OPTIONS;
10337 #endif
10339 flag_pcc_struct_return = 0;
10341 if (flag_pic)
10342 g_switch_value = 0;
10344 /* Always prefer medlow than medany for RV32 since medlow can access
10345 full address space. */
10346 if (riscv_cmodel == CM_LARGE && !TARGET_64BIT)
10347 riscv_cmodel = CM_MEDLOW;
10349 if (riscv_cmodel == CM_LARGE && TARGET_EXPLICIT_RELOCS)
10350 sorry ("code model %qs with %qs", "large", "-mexplicit-relocs");
10352 if (riscv_cmodel == CM_LARGE && flag_pic)
10353 sorry ("code model %qs with %qs", "large",
10354 global_options.x_flag_pic > 1 ? "-fPIC" : "-fpic");
10356 if (flag_pic)
10357 riscv_cmodel = CM_PIC;
10359 /* We need to save the fp with ra for non-leaf functions with no fp and ra
10360 for leaf functions while no-omit-frame-pointer with
10361 omit-leaf-frame-pointer. The x_flag_omit_frame_pointer has the first
10362 priority to determine whether the frame pointer is needed. If we do not
10363 override it, the fp and ra will be stored for leaf functions, which is not
10364 our wanted. */
10365 riscv_save_frame_pointer = false;
10366 if (TARGET_OMIT_LEAF_FRAME_POINTER_P (global_options.x_target_flags))
10368 if (!global_options.x_flag_omit_frame_pointer)
10369 riscv_save_frame_pointer = true;
10371 global_options.x_flag_omit_frame_pointer = 1;
10374 /* We get better code with explicit relocs for CM_MEDLOW, but
10375 worse code for the others (for now). Pick the best default. */
10376 if ((target_flags_explicit & MASK_EXPLICIT_RELOCS) == 0)
10377 if (riscv_cmodel == CM_MEDLOW)
10378 target_flags |= MASK_EXPLICIT_RELOCS;
10380 /* Require that the ISA supports the requested floating-point ABI. */
10381 if (UNITS_PER_FP_ARG > (TARGET_HARD_FLOAT ? UNITS_PER_FP_REG : 0))
10382 error ("requested ABI requires %<-march%> to subsume the %qc extension",
10383 UNITS_PER_FP_ARG > 8 ? 'Q' : (UNITS_PER_FP_ARG > 4 ? 'D' : 'F'));
10385 /* RVE requires specific ABI. */
10386 if (TARGET_RVE)
10388 if (!TARGET_64BIT && riscv_abi != ABI_ILP32E)
10389 error ("rv32e requires ilp32e ABI");
10390 else if (TARGET_64BIT && riscv_abi != ABI_LP64E)
10391 error ("rv64e requires lp64e ABI");
10394 /* ILP32E does not support the 'd' extension. */
10395 if (riscv_abi == ABI_ILP32E && UNITS_PER_FP_REG > 4)
10396 error ("ILP32E ABI does not support the %qc extension",
10397 UNITS_PER_FP_REG > 8 ? 'Q' : 'D');
10399 if (riscv_abi == ABI_LP64E)
10401 if (warning (OPT_Wdeprecated, "LP64E ABI is marked for deprecation in GCC"))
10402 inform (UNKNOWN_LOCATION, "if you need LP64E please notify the GCC "
10403 "project via %{PR116152%}", "https://gcc.gnu.org/PR116152");
10406 /* Zfinx require abi ilp32, ilp32e, lp64 or lp64e. */
10407 if (TARGET_ZFINX
10408 && riscv_abi != ABI_ILP32 && riscv_abi != ABI_LP64
10409 && riscv_abi != ABI_ILP32E && riscv_abi != ABI_LP64E)
10410 error ("z*inx requires ABI ilp32, ilp32e, lp64 or lp64e");
10412 /* We do not yet support ILP32 on RV64. */
10413 if (BITS_PER_WORD != POINTER_SIZE)
10414 error ("ABI requires %<-march=rv%d%>", POINTER_SIZE);
10416 /* Validate -mpreferred-stack-boundary= value. */
10417 riscv_stack_boundary = ABI_STACK_BOUNDARY;
10418 if (riscv_preferred_stack_boundary_arg)
10420 int min = ctz_hwi (STACK_BOUNDARY / 8);
10421 int max = 8;
10423 if (!IN_RANGE (riscv_preferred_stack_boundary_arg, min, max))
10424 error ("%<-mpreferred-stack-boundary=%d%> must be between %d and %d",
10425 riscv_preferred_stack_boundary_arg, min, max);
10427 riscv_stack_boundary = 8 << riscv_preferred_stack_boundary_arg;
10430 if (riscv_emit_attribute_p < 0)
10431 #ifdef HAVE_AS_RISCV_ATTRIBUTE
10432 riscv_emit_attribute_p = TARGET_RISCV_ATTRIBUTE;
10433 #else
10434 riscv_emit_attribute_p = 0;
10436 if (riscv_emit_attribute_p)
10437 error ("%<-mriscv-attribute%> RISC-V ELF attribute requires GNU as 2.32"
10438 " [%<-mriscv-attribute%>]");
10439 #endif
10441 if (riscv_stack_protector_guard == SSP_GLOBAL
10442 && OPTION_SET_P (riscv_stack_protector_guard_offset_str))
10444 error ("incompatible options %<-mstack-protector-guard=global%> and "
10445 "%<-mstack-protector-guard-offset=%s%>",
10446 riscv_stack_protector_guard_offset_str);
10449 if (riscv_stack_protector_guard == SSP_TLS
10450 && !(OPTION_SET_P (riscv_stack_protector_guard_offset_str)
10451 && OPTION_SET_P (riscv_stack_protector_guard_reg_str)))
10453 error ("both %<-mstack-protector-guard-offset%> and "
10454 "%<-mstack-protector-guard-reg%> must be used "
10455 "with %<-mstack-protector-guard=sysreg%>");
10458 if (OPTION_SET_P (riscv_stack_protector_guard_reg_str))
10460 const char *str = riscv_stack_protector_guard_reg_str;
10461 int reg = decode_reg_name (str);
10463 if (!IN_RANGE (reg, GP_REG_FIRST + 1, GP_REG_LAST))
10464 error ("%qs is not a valid base register in %qs", str,
10465 "-mstack-protector-guard-reg=");
10467 riscv_stack_protector_guard_reg = reg;
10470 if (OPTION_SET_P (riscv_stack_protector_guard_offset_str))
10472 char *end;
10473 const char *str = riscv_stack_protector_guard_offset_str;
10474 errno = 0;
10475 long offs = strtol (riscv_stack_protector_guard_offset_str, &end, 0);
10477 if (!*str || *end || errno)
10478 error ("%qs is not a valid number in %qs", str,
10479 "-mstack-protector-guard-offset=");
10481 if (!SMALL_OPERAND (offs))
10482 error ("%qs is not a valid offset in %qs", str,
10483 "-mstack-protector-guard-offset=");
10485 riscv_stack_protector_guard_offset = offs;
10488 int guard_size = param_stack_clash_protection_guard_size;
10490 /* Enforce that interval is the same size as guard size so the mid-end does
10491 the right thing. */
10492 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
10493 param_stack_clash_protection_probe_interval,
10494 guard_size);
10496 /* The maybe_set calls won't update the value if the user has explicitly set
10497 one. Which means we need to validate that probing interval and guard size
10498 are equal. */
10499 int probe_interval
10500 = param_stack_clash_protection_probe_interval;
10501 if (guard_size != probe_interval)
10502 error ("stack clash guard size %<%d%> must be equal to probing interval "
10503 "%<%d%>", guard_size, probe_interval);
10505 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
10506 param_sched_pressure_algorithm,
10507 SCHED_PRESSURE_MODEL);
10509 /* Function to allocate machine-dependent function status. */
10510 init_machine_status = &riscv_init_machine_status;
10512 riscv_override_options_internal (&global_options);
10514 /* Save these options as the default ones in case we push and pop them later
10515 while processing functions with potential target attributes. */
10516 target_option_default_node = target_option_current_node
10517 = build_target_option_node (&global_options, &global_options_set);
10520 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
10521 Used by riscv_set_current_function to
10522 make sure optab availability predicates are recomputed when necessary. */
10524 void
10525 riscv_save_restore_target_globals (tree new_tree)
10527 if (TREE_TARGET_GLOBALS (new_tree))
10528 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
10529 else if (new_tree == target_option_default_node)
10530 restore_target_globals (&default_target_globals);
10531 else
10532 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
10535 /* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
10536 using the information saved in PTR. */
10538 static void
10539 riscv_option_restore (struct gcc_options *opts,
10540 struct gcc_options * /* opts_set */,
10541 struct cl_target_option * /* ptr */)
10543 riscv_override_options_internal (opts);
10546 static GTY (()) tree riscv_previous_fndecl;
10548 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
10550 static void
10551 riscv_conditional_register_usage (void)
10553 /* We have only x0~x15 on RV32E/RV64E. */
10554 if (TARGET_RVE)
10556 for (int r = 16; r <= 31; r++)
10557 fixed_regs[r] = 1;
10560 if (riscv_abi == ABI_ILP32E)
10562 for (int r = 16; r <= 31; r++)
10563 call_used_regs[r] = 1;
10566 if (!TARGET_HARD_FLOAT)
10568 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
10569 fixed_regs[regno] = call_used_regs[regno] = 1;
10572 /* In the soft-float ABI, there are no callee-saved FP registers. */
10573 if (UNITS_PER_FP_ARG == 0)
10575 for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
10576 call_used_regs[regno] = 1;
10579 if (!TARGET_VECTOR)
10581 for (int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
10582 fixed_regs[regno] = call_used_regs[regno] = 1;
10584 fixed_regs[VTYPE_REGNUM] = call_used_regs[VTYPE_REGNUM] = 1;
10585 fixed_regs[VL_REGNUM] = call_used_regs[VL_REGNUM] = 1;
10586 fixed_regs[VXRM_REGNUM] = call_used_regs[VXRM_REGNUM] = 1;
10587 fixed_regs[FRM_REGNUM] = call_used_regs[FRM_REGNUM] = 1;
10591 /* Return a register priority for hard reg REGNO. */
10593 static int
10594 riscv_register_priority (int regno)
10596 /* Favor compressed registers to improve the odds of RVC instruction
10597 selection. */
10598 if (riscv_compressed_reg_p (regno))
10599 return 1;
10601 return 0;
10604 /* Implement TARGET_TRAMPOLINE_INIT. */
10606 static void
10607 riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
10609 rtx addr, end_addr, mem;
10610 uint32_t trampoline[4];
10611 unsigned int i;
10612 HOST_WIDE_INT static_chain_offset, target_function_offset;
10614 /* Work out the offsets of the pointers from the start of the
10615 trampoline code. */
10616 gcc_assert (ARRAY_SIZE (trampoline) * 4 == TRAMPOLINE_CODE_SIZE);
10618 /* Get pointers to the beginning and end of the code block. */
10619 addr = force_reg (Pmode, XEXP (m_tramp, 0));
10620 end_addr = riscv_force_binary (Pmode, PLUS, addr,
10621 GEN_INT (TRAMPOLINE_CODE_SIZE));
10624 if (Pmode == SImode)
10626 chain_value = force_reg (Pmode, chain_value);
10628 rtx target_function = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
10629 /* lui t2, hi(chain)
10630 lui t0, hi(func)
10631 addi t2, t2, lo(chain)
10632 jr t0, lo(func)
10634 unsigned HOST_WIDE_INT lui_hi_chain_code, lui_hi_func_code;
10635 unsigned HOST_WIDE_INT lo_chain_code, lo_func_code;
10637 rtx uimm_mask = force_reg (SImode, gen_int_mode (-IMM_REACH, SImode));
10639 /* 0xfff. */
10640 rtx imm12_mask = gen_reg_rtx (SImode);
10641 emit_insn (gen_one_cmplsi2 (imm12_mask, uimm_mask));
10643 rtx fixup_value = force_reg (SImode, gen_int_mode (IMM_REACH/2, SImode));
10645 /* Gen lui t2, hi(chain). */
10646 rtx hi_chain = riscv_force_binary (SImode, PLUS, chain_value,
10647 fixup_value);
10648 hi_chain = riscv_force_binary (SImode, AND, hi_chain,
10649 uimm_mask);
10650 lui_hi_chain_code = OPCODE_LUI | (STATIC_CHAIN_REGNUM << SHIFT_RD);
10651 rtx lui_hi_chain = riscv_force_binary (SImode, IOR, hi_chain,
10652 gen_int_mode (lui_hi_chain_code, SImode));
10654 mem = adjust_address (m_tramp, SImode, 0);
10655 riscv_emit_move (mem, riscv_swap_instruction (lui_hi_chain));
10657 /* Gen lui t0, hi(func). */
10658 rtx hi_func = riscv_force_binary (SImode, PLUS, target_function,
10659 fixup_value);
10660 hi_func = riscv_force_binary (SImode, AND, hi_func,
10661 uimm_mask);
10662 lui_hi_func_code = OPCODE_LUI | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD);
10663 rtx lui_hi_func = riscv_force_binary (SImode, IOR, hi_func,
10664 gen_int_mode (lui_hi_func_code, SImode));
10666 mem = adjust_address (m_tramp, SImode, 1 * GET_MODE_SIZE (SImode));
10667 riscv_emit_move (mem, riscv_swap_instruction (lui_hi_func));
10669 /* Gen addi t2, t2, lo(chain). */
10670 rtx lo_chain = riscv_force_binary (SImode, AND, chain_value,
10671 imm12_mask);
10672 lo_chain = riscv_force_binary (SImode, ASHIFT, lo_chain, GEN_INT (20));
10674 lo_chain_code = OPCODE_ADDI
10675 | (STATIC_CHAIN_REGNUM << SHIFT_RD)
10676 | (STATIC_CHAIN_REGNUM << SHIFT_RS1);
10678 rtx addi_lo_chain = riscv_force_binary (SImode, IOR, lo_chain,
10679 force_reg (SImode, GEN_INT (lo_chain_code)));
10681 mem = adjust_address (m_tramp, SImode, 2 * GET_MODE_SIZE (SImode));
10682 riscv_emit_move (mem, riscv_swap_instruction (addi_lo_chain));
10684 /* Gen jr t0, lo(func). */
10685 rtx lo_func = riscv_force_binary (SImode, AND, target_function,
10686 imm12_mask);
10687 lo_func = riscv_force_binary (SImode, ASHIFT, lo_func, GEN_INT (20));
10689 lo_func_code = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
10691 rtx jr_lo_func = riscv_force_binary (SImode, IOR, lo_func,
10692 force_reg (SImode, GEN_INT (lo_func_code)));
10694 mem = adjust_address (m_tramp, SImode, 3 * GET_MODE_SIZE (SImode));
10695 riscv_emit_move (mem, riscv_swap_instruction (jr_lo_func));
10697 else
10699 static_chain_offset = TRAMPOLINE_CODE_SIZE;
10700 target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
10702 /* auipc t2, 0
10703 l[wd] t0, target_function_offset(t2)
10704 l[wd] t2, static_chain_offset(t2)
10705 jr t0
10707 trampoline[0] = OPCODE_AUIPC | (STATIC_CHAIN_REGNUM << SHIFT_RD);
10708 trampoline[1] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
10709 | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD)
10710 | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
10711 | (target_function_offset << SHIFT_IMM);
10712 trampoline[2] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
10713 | (STATIC_CHAIN_REGNUM << SHIFT_RD)
10714 | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
10715 | (static_chain_offset << SHIFT_IMM);
10716 trampoline[3] = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
10718 /* Copy the trampoline code. */
10719 for (i = 0; i < ARRAY_SIZE (trampoline); i++)
10721 if (BYTES_BIG_ENDIAN)
10722 trampoline[i] = __builtin_bswap32(trampoline[i]);
10723 mem = adjust_address (m_tramp, SImode, i * GET_MODE_SIZE (SImode));
10724 riscv_emit_move (mem, gen_int_mode (trampoline[i], SImode));
10727 /* Set up the static chain pointer field. */
10728 mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
10729 riscv_emit_move (mem, chain_value);
10731 /* Set up the target function field. */
10732 mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
10733 riscv_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
10736 /* Flush the code part of the trampoline. */
10737 emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
10738 emit_insn (gen_clear_cache (addr, end_addr));
10741 /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */
10743 static bool
10744 riscv_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
10745 tree exp ATTRIBUTE_UNUSED)
10747 /* Don't use sibcalls when use save-restore routine. */
10748 if (TARGET_SAVE_RESTORE)
10749 return false;
10751 /* Don't use sibcall for naked functions. */
10752 if (cfun->machine->naked_p)
10753 return false;
10755 /* Don't use sibcall for interrupt functions. */
10756 if (cfun->machine->interrupt_handler_p)
10757 return false;
10759 /* Don't use sibcalls in the large model, because a sibcall instruction
10760 expanding and a epilogue expanding both use RISCV_PROLOGUE_TEMP
10761 register. */
10762 if (riscv_cmodel == CM_LARGE)
10763 return false;
10765 return true;
10768 /* Get the interrupt type, return UNKNOWN_MODE if it's not
10769 interrupt function. */
10770 static enum riscv_privilege_levels
10771 riscv_get_interrupt_type (tree decl)
10773 gcc_assert (decl != NULL_TREE);
10775 if ((TREE_CODE(decl) != FUNCTION_DECL)
10776 || (!riscv_interrupt_type_p (TREE_TYPE (decl))))
10777 return UNKNOWN_MODE;
10779 tree attr_args
10780 = TREE_VALUE (lookup_attribute ("interrupt",
10781 TYPE_ATTRIBUTES (TREE_TYPE (decl))));
10783 if (attr_args && TREE_CODE (TREE_VALUE (attr_args)) != VOID_TYPE)
10785 const char *string = TREE_STRING_POINTER (TREE_VALUE (attr_args));
10787 if (!strcmp (string, "user"))
10788 return USER_MODE;
10789 else if (!strcmp (string, "supervisor"))
10790 return SUPERVISOR_MODE;
10791 else /* Must be "machine". */
10792 return MACHINE_MODE;
10794 else
10795 /* Interrupt attributes are machine mode by default. */
10796 return MACHINE_MODE;
10799 /* Implement `TARGET_SET_CURRENT_FUNCTION'. Unpack the codegen decisions
10800 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
10801 of the function, if such exists. This function may be called multiple
10802 times on a single function so use aarch64_previous_fndecl to avoid
10803 setting up identical state. */
10805 /* Sanity checking for above function attributes. */
10806 static void
10807 riscv_set_current_function (tree decl)
10809 if (decl == NULL_TREE
10810 || current_function_decl == NULL_TREE
10811 || current_function_decl == error_mark_node
10812 || ! cfun->machine)
10813 return;
10815 if (!cfun->machine->attributes_checked_p)
10817 cfun->machine->naked_p = riscv_naked_function_p (decl);
10818 cfun->machine->interrupt_handler_p
10819 = riscv_interrupt_type_p (TREE_TYPE (decl));
10821 if (cfun->machine->naked_p && cfun->machine->interrupt_handler_p)
10822 error ("function attributes %qs and %qs are mutually exclusive",
10823 "interrupt", "naked");
10825 if (cfun->machine->interrupt_handler_p)
10827 tree ret = TREE_TYPE (TREE_TYPE (decl));
10828 tree args = TYPE_ARG_TYPES (TREE_TYPE (decl));
10830 if (TREE_CODE (ret) != VOID_TYPE)
10831 error ("%qs function cannot return a value", "interrupt");
10833 if (args && TREE_CODE (TREE_VALUE (args)) != VOID_TYPE)
10834 error ("%qs function cannot have arguments", "interrupt");
10836 cfun->machine->interrupt_mode = riscv_get_interrupt_type (decl);
10838 gcc_assert (cfun->machine->interrupt_mode != UNKNOWN_MODE);
10841 /* Don't print the above diagnostics more than once. */
10842 cfun->machine->attributes_checked_p = 1;
10845 if (!decl || decl == riscv_previous_fndecl)
10846 return;
10848 tree old_tree = (riscv_previous_fndecl
10849 ? DECL_FUNCTION_SPECIFIC_TARGET (riscv_previous_fndecl)
10850 : NULL_TREE);
10852 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (decl);
10854 /* If current function has no attributes but the previous one did,
10855 use the default node. */
10856 if (!new_tree && old_tree)
10857 new_tree = target_option_default_node;
10859 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
10860 the default have been handled by aarch64_save_restore_target_globals from
10861 aarch64_pragma_target_parse. */
10862 if (old_tree == new_tree)
10863 return;
10865 riscv_previous_fndecl = decl;
10867 /* First set the target options. */
10868 cl_target_option_restore (&global_options, &global_options_set,
10869 TREE_TARGET_OPTION (new_tree));
10871 /* The ISA extension can vary based on the function extension like target.
10872 Thus, make sure that the machine modes are reflected correctly here. */
10873 init_adjust_machine_modes ();
10875 riscv_save_restore_target_globals (new_tree);
10878 /* Implement TARGET_MERGE_DECL_ATTRIBUTES. */
10879 static tree
10880 riscv_merge_decl_attributes (tree olddecl, tree newdecl)
10882 tree combined_attrs;
10884 enum riscv_privilege_levels old_interrupt_type
10885 = riscv_get_interrupt_type (olddecl);
10886 enum riscv_privilege_levels new_interrupt_type
10887 = riscv_get_interrupt_type (newdecl);
10889 /* Check old and new has same interrupt type. */
10890 if ((old_interrupt_type != UNKNOWN_MODE)
10891 && (new_interrupt_type != UNKNOWN_MODE)
10892 && (old_interrupt_type != new_interrupt_type))
10893 error ("%qs function cannot have different interrupt type", "interrupt");
10895 /* Create combined attributes. */
10896 combined_attrs = merge_attributes (DECL_ATTRIBUTES (olddecl),
10897 DECL_ATTRIBUTES (newdecl));
10899 return combined_attrs;
10902 /* Implement TARGET_CANNOT_COPY_INSN_P. */
10904 static bool
10905 riscv_cannot_copy_insn_p (rtx_insn *insn)
10907 return recog_memoized (insn) >= 0 && get_attr_cannot_copy (insn);
10910 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. */
10912 static bool
10913 riscv_slow_unaligned_access (machine_mode mode, unsigned int)
10915 return VECTOR_MODE_P (mode) ? TARGET_VECTOR_MISALIGN_SUPPORTED
10916 : riscv_slow_unaligned_access_p;
10919 static bool
10920 riscv_overlap_op_by_pieces (void)
10922 return tune_param->overlap_op_by_pieces;
10925 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10927 static bool
10928 riscv_can_change_mode_class (machine_mode from, machine_mode to,
10929 reg_class_t rclass)
10931 /* We have RVV VLS modes and VLA modes sharing same REG_CLASS.
10932 In 'cprop_hardreg' stage, we will try to do hard reg copy propagation
10933 between wider mode (FROM) and narrow mode (TO).
10935 E.g. We should not allow copy propagation
10936 - RVVMF8BI (precision = [16, 16]) -> V32BI (precision = [32, 0])
10937 since we can't order their size which will cause ICE in regcprop.
10939 TODO: Even though they are have different size, they always change
10940 the whole register. We may enhance such case in regcprop to optimize
10941 it in the future. */
10942 if (reg_classes_intersect_p (V_REGS, rclass)
10943 && !ordered_p (GET_MODE_PRECISION (from), GET_MODE_PRECISION (to)))
10944 return false;
10946 /* Subregs of modes larger than one vector are ambiguous.
10947 A V4DImode with rv64gcv_zvl128b could, for example, span two registers/one
10948 register group of two at VLEN = 128 or one register at VLEN >= 256 and
10949 we cannot, statically, determine which part of it to extract.
10950 Therefore prevent that. */
10951 if (reg_classes_intersect_p (V_REGS, rclass)
10952 && riscv_v_ext_vls_mode_p (from)
10953 && !ordered_p (BITS_PER_RISCV_VECTOR, GET_MODE_PRECISION (from)))
10954 return false;
10956 return !reg_classes_intersect_p (FP_REGS, rclass);
10959 /* Implement TARGET_CONSTANT_ALIGNMENT. */
10961 static HOST_WIDE_INT
10962 riscv_constant_alignment (const_tree exp, HOST_WIDE_INT align)
10964 if ((TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR)
10965 && (riscv_align_data_type == riscv_align_data_type_xlen))
10966 return MAX (align, BITS_PER_WORD);
10967 return align;
10970 /* Implement TARGET_PROMOTE_FUNCTION_MODE. */
10972 /* This function is equivalent to default_promote_function_mode_always_promote
10973 except that it returns a promoted mode even if type is NULL_TREE. This is
10974 needed by libcalls which have no type (only a mode) such as fixed conversion
10975 routines that take a signed or unsigned char/short/int argument and convert
10976 it to a fixed type. */
10978 static machine_mode
10979 riscv_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
10980 machine_mode mode,
10981 int *punsignedp ATTRIBUTE_UNUSED,
10982 const_tree fntype ATTRIBUTE_UNUSED,
10983 int for_return ATTRIBUTE_UNUSED)
10985 int unsignedp;
10987 if (type != NULL_TREE)
10988 return promote_mode (type, mode, punsignedp);
10990 unsignedp = *punsignedp;
10991 scalar_mode smode = as_a <scalar_mode> (mode);
10992 PROMOTE_MODE (smode, unsignedp, type);
10993 *punsignedp = unsignedp;
10994 return smode;
10997 /* Implement TARGET_MACHINE_DEPENDENT_REORG. */
10999 static void
11000 riscv_reorg (void)
11002 /* Do nothing unless we have -msave-restore */
11003 if (TARGET_SAVE_RESTORE)
11004 riscv_remove_unneeded_save_restore_calls ();
11007 /* Return nonzero if register FROM_REGNO can be renamed to register
11008 TO_REGNO. */
11010 bool
11011 riscv_hard_regno_rename_ok (unsigned from_regno ATTRIBUTE_UNUSED,
11012 unsigned to_regno)
11014 /* Interrupt functions can only use registers that have already been
11015 saved by the prologue, even if they would normally be
11016 call-clobbered. */
11017 return !cfun->machine->interrupt_handler_p || df_regs_ever_live_p (to_regno);
11020 /* Implement TARGET_NEW_ADDRESS_PROFITABLE_P. */
11022 bool
11023 riscv_new_address_profitable_p (rtx memref, rtx_insn *insn, rtx new_addr)
11025 /* Prefer old address if it is less expensive. */
11026 addr_space_t as = MEM_ADDR_SPACE (memref);
11027 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
11028 int old_cost = address_cost (XEXP (memref, 0), GET_MODE (memref), as, speed);
11029 int new_cost = address_cost (new_addr, GET_MODE (memref), as, speed);
11030 return new_cost <= old_cost;
11033 /* Helper function for generating gpr_save pattern. */
11036 riscv_gen_gpr_save_insn (struct riscv_frame_info *frame)
11038 unsigned count = riscv_save_libcall_count (frame->mask);
11039 /* 1 for unspec 2 for clobber t0/t1 and 1 for ra. */
11040 unsigned veclen = 1 + 2 + 1 + count;
11041 rtvec vec = rtvec_alloc (veclen);
11043 gcc_assert (veclen <= ARRAY_SIZE (gpr_save_reg_order));
11045 RTVEC_ELT (vec, 0) =
11046 gen_rtx_UNSPEC_VOLATILE (VOIDmode,
11047 gen_rtvec (1, GEN_INT (count)), UNSPECV_GPR_SAVE);
11049 for (unsigned i = 1; i < veclen; ++i)
11051 unsigned regno = gpr_save_reg_order[i];
11052 rtx reg = gen_rtx_REG (Pmode, regno);
11053 rtx elt;
11055 /* t0 and t1 are CLOBBERs, others are USEs. */
11056 if (i < 3)
11057 elt = gen_rtx_CLOBBER (Pmode, reg);
11058 else
11059 elt = gen_rtx_USE (Pmode, reg);
11061 RTVEC_ELT (vec, i) = elt;
11064 /* Largest number of caller-save register must set in mask if we are
11065 not using __riscv_save_0. */
11066 gcc_assert ((count == 0) ||
11067 BITSET_P (frame->mask, gpr_save_reg_order[veclen - 1]));
11069 return gen_rtx_PARALLEL (VOIDmode, vec);
11072 static HOST_WIDE_INT
11073 zcmp_base_adj (int regs_num)
11075 return riscv_16bytes_align ((regs_num) *GET_MODE_SIZE (word_mode));
11078 static HOST_WIDE_INT
11079 zcmp_additional_adj (HOST_WIDE_INT total, int regs_num)
11081 return total - zcmp_base_adj (regs_num);
11084 bool
11085 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT total, int regs_num)
11087 HOST_WIDE_INT additioanl_bytes = zcmp_additional_adj (total, regs_num);
11088 return additioanl_bytes == 0 || additioanl_bytes == 1 * ZCMP_SP_INC_STEP
11089 || additioanl_bytes == 2 * ZCMP_SP_INC_STEP
11090 || additioanl_bytes == ZCMP_MAX_SPIMM * ZCMP_SP_INC_STEP;
11093 /* Return true if it's valid gpr_save pattern. */
11095 bool
11096 riscv_gpr_save_operation_p (rtx op)
11098 unsigned len = XVECLEN (op, 0);
11100 if (len > ARRAY_SIZE (gpr_save_reg_order))
11101 return false;
11103 for (unsigned i = 0; i < len; i++)
11105 rtx elt = XVECEXP (op, 0, i);
11106 if (i == 0)
11108 /* First element in parallel is unspec. */
11109 if (GET_CODE (elt) != UNSPEC_VOLATILE
11110 || GET_CODE (XVECEXP (elt, 0, 0)) != CONST_INT
11111 || XINT (elt, 1) != UNSPECV_GPR_SAVE)
11112 return false;
11114 else
11116 /* Two CLOBBER and USEs, must check the order. */
11117 unsigned expect_code = i < 3 ? CLOBBER : USE;
11118 if (GET_CODE (elt) != expect_code
11119 || !REG_P (XEXP (elt, 1))
11120 || (REGNO (XEXP (elt, 1)) != gpr_save_reg_order[i]))
11121 return false;
11123 break;
11125 return true;
11128 /* Implement TARGET_ASAN_SHADOW_OFFSET. */
11130 static unsigned HOST_WIDE_INT
11131 riscv_asan_shadow_offset (void)
11133 /* We only have libsanitizer support for RV64 at present.
11135 This number must match ASAN_SHADOW_OFFSET_CONST in the file
11136 libsanitizer/asan/asan_mapping.h. */
11137 return TARGET_64BIT ? HOST_WIDE_INT_UC (0xd55550000) : 0;
11140 /* Implement TARGET_MANGLE_TYPE. */
11142 static const char *
11143 riscv_mangle_type (const_tree type)
11145 /* Half-precision float, _Float16 is "DF16_" and __bf16 is "DF16b". */
11146 if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
11148 if (TYPE_MODE (type) == HFmode)
11149 return "DF16_";
11151 if (TYPE_MODE (type) == BFmode)
11152 return "DF16b";
11154 gcc_unreachable ();
11157 /* Mangle all vector type for vector extension. */
11158 /* The mangle name follows the rule of RVV LLVM
11159 that is "u" + length of (abi_name) + abi_name. */
11160 if (TYPE_NAME (type) != NULL)
11162 const char *res = riscv_vector::mangle_builtin_type (type);
11163 if (res)
11164 return res;
11167 /* Use the default mangling. */
11168 return NULL;
11171 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
11173 static bool
11174 riscv_scalar_mode_supported_p (scalar_mode mode)
11176 if (mode == HFmode || mode == BFmode)
11177 return true;
11178 else
11179 return default_scalar_mode_supported_p (mode);
11182 /* Implement TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P - return TRUE
11183 if MODE is HFmode or BFmode, and punt to the generic implementation
11184 otherwise. */
11186 static bool
11187 riscv_libgcc_floating_mode_supported_p (scalar_float_mode mode)
11189 if (mode == HFmode || mode == BFmode)
11190 return true;
11191 else
11192 return default_libgcc_floating_mode_supported_p (mode);
11195 /* Set the value of FLT_EVAL_METHOD.
11196 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
11198 0: evaluate all operations and constants, whose semantic type has at
11199 most the range and precision of type float, to the range and
11200 precision of float; evaluate all other operations and constants to
11201 the range and precision of the semantic type;
11203 N, where _FloatN is a supported interchange floating type
11204 evaluate all operations and constants, whose semantic type has at
11205 most the range and precision of _FloatN type, to the range and
11206 precision of the _FloatN type; evaluate all other operations and
11207 constants to the range and precision of the semantic type;
11209 If we have the zfh/zhinx/zvfh extensions then we support _Float16
11210 in native precision, so we should set this to 16. */
11211 static enum flt_eval_method
11212 riscv_excess_precision (enum excess_precision_type type)
11214 switch (type)
11216 case EXCESS_PRECISION_TYPE_FAST:
11217 case EXCESS_PRECISION_TYPE_STANDARD:
11218 return ((TARGET_ZFH || TARGET_ZHINX || TARGET_ZVFH)
11219 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
11220 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
11221 case EXCESS_PRECISION_TYPE_IMPLICIT:
11222 case EXCESS_PRECISION_TYPE_FLOAT16:
11223 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
11224 default:
11225 gcc_unreachable ();
11227 return FLT_EVAL_METHOD_UNPREDICTABLE;
11230 /* Implement TARGET_FLOATN_MODE. */
11231 static opt_scalar_float_mode
11232 riscv_floatn_mode (int n, bool extended)
11234 if (!extended && n == 16)
11235 return HFmode;
11237 return default_floatn_mode (n, extended);
11240 /* Record that we have no arithmetic or comparison libfuncs for
11241 machine_mode MODE. */
11242 static void
11243 riscv_block_arith_comp_libfuncs_for_mode (machine_mode mode)
11245 /* Half-precision float or Brain float operations. The compiler handles all
11246 operations with NULL libfuncs by converting to SFmode. */
11248 /* Arithmetic. */
11249 set_optab_libfunc (add_optab, mode, NULL);
11250 set_optab_libfunc (sdiv_optab, mode, NULL);
11251 set_optab_libfunc (smul_optab, mode, NULL);
11252 set_optab_libfunc (neg_optab, mode, NULL);
11253 set_optab_libfunc (sub_optab, mode, NULL);
11255 /* Comparisons. */
11256 set_optab_libfunc (eq_optab, mode, NULL);
11257 set_optab_libfunc (ne_optab, mode, NULL);
11258 set_optab_libfunc (lt_optab, mode, NULL);
11259 set_optab_libfunc (le_optab, mode, NULL);
11260 set_optab_libfunc (ge_optab, mode, NULL);
11261 set_optab_libfunc (gt_optab, mode, NULL);
11262 set_optab_libfunc (unord_optab, mode, NULL);
11265 static void
11266 riscv_init_libfuncs (void)
11268 riscv_block_arith_comp_libfuncs_for_mode (HFmode);
11269 riscv_block_arith_comp_libfuncs_for_mode (BFmode);
11271 /* Convert between BFmode and HFmode using only trunc libfunc if needed. */
11272 set_conv_libfunc (sext_optab, BFmode, HFmode, "__trunchfbf2");
11273 set_conv_libfunc (sext_optab, HFmode, BFmode, "__truncbfhf2");
11274 set_conv_libfunc (trunc_optab, BFmode, HFmode, "__trunchfbf2");
11275 set_conv_libfunc (trunc_optab, HFmode, BFmode, "__truncbfhf2");
11278 #if CHECKING_P
11279 void
11280 riscv_reinit (void)
11282 riscv_option_override ();
11283 init_adjust_machine_modes ();
11284 init_derived_machine_modes ();
11285 reinit_regs ();
11286 init_optabs ();
11288 #endif
11290 #if CHECKING_P
11291 #undef TARGET_RUN_TARGET_SELFTESTS
11292 #define TARGET_RUN_TARGET_SELFTESTS selftest::riscv_run_selftests
11293 #endif /* #if CHECKING_P */
11295 /* Implement TARGET_VECTOR_MODE_SUPPORTED_P. */
11297 static bool
11298 riscv_vector_mode_supported_p (machine_mode mode)
11300 if (TARGET_VECTOR)
11301 return riscv_v_ext_mode_p (mode);
11303 return false;
11306 /* Implement TARGET_VERIFY_TYPE_CONTEXT. */
11308 static bool
11309 riscv_verify_type_context (location_t loc, type_context_kind context,
11310 const_tree type, bool silent_p)
11312 return riscv_vector::verify_type_context (loc, context, type, silent_p);
11315 /* Implement TARGET_VECTOR_ALIGNMENT. */
11317 static HOST_WIDE_INT
11318 riscv_vector_alignment (const_tree type)
11320 /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can
11321 be set for non-predicate vectors of booleans. Modes are the most
11322 direct way we have of identifying real RVV predicate types. */
11323 /* FIXME: RVV didn't mention the alignment of bool, we uses
11324 one byte align. */
11325 if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL)
11326 return 8;
11328 widest_int min_size
11329 = constant_lower_bound (wi::to_poly_widest (TYPE_SIZE (type)));
11330 return wi::umin (min_size, 128).to_uhwi ();
11333 /* Implement REGMODE_NATURAL_SIZE. */
11335 poly_uint64
11336 riscv_regmode_natural_size (machine_mode mode)
11338 /* The natural size for RVV data modes is one RVV data vector,
11339 and similarly for predicates. We can't independently modify
11340 anything smaller than that. */
11341 /* ??? For now, only do this for variable-width RVV registers.
11342 Doing it for constant-sized registers breaks lower-subreg.c. */
11344 if (riscv_v_ext_mode_p (mode))
11346 poly_uint64 size = GET_MODE_SIZE (mode);
11347 if (riscv_v_ext_tuple_mode_p (mode))
11349 size = GET_MODE_SIZE (riscv_vector::get_subpart_mode (mode));
11350 if (known_lt (size, BYTES_PER_RISCV_VECTOR))
11351 return size;
11353 else if (riscv_v_ext_vector_mode_p (mode))
11355 /* RVV mask modes always consume a single register. */
11356 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
11357 return BYTES_PER_RISCV_VECTOR;
11359 if (!size.is_constant ())
11360 return BYTES_PER_RISCV_VECTOR;
11361 else if (!riscv_v_ext_vls_mode_p (mode))
11362 /* For -march=rv64gc_zve32f, the natural vector register size
11363 is 32bits which is smaller than scalar register size, so we
11364 return minimum size between vector register size and scalar
11365 register size. */
11366 return MIN (size.to_constant (), UNITS_PER_WORD);
11368 return UNITS_PER_WORD;
11371 /* Implement the TARGET_DWARF_POLY_INDETERMINATE_VALUE hook. */
11373 static unsigned int
11374 riscv_dwarf_poly_indeterminate_value (unsigned int i, unsigned int *factor,
11375 int *offset)
11377 /* Polynomial invariant 1 == (VLENB / BYTES_PER_RISCV_VECTOR) - 1.
11378 1. TARGET_MIN_VLEN == 32, polynomial invariant 1 == (VLENB / 4) - 1.
11379 2. TARGET_MIN_VLEN > 32, polynomial invariant 1 == (VLENB / 8) - 1.
11381 gcc_assert (i == 1);
11382 *factor = BYTES_PER_RISCV_VECTOR.coeffs[1];
11383 *offset = 1;
11384 return RISCV_DWARF_VLENB;
11387 /* Implement TARGET_ESTIMATED_POLY_VALUE. */
11389 static HOST_WIDE_INT
11390 riscv_estimated_poly_value (poly_int64 val,
11391 poly_value_estimate_kind kind = POLY_VALUE_LIKELY)
11393 if (TARGET_VECTOR)
11394 return riscv_vector::estimated_poly_value (val, kind);
11395 return default_estimated_poly_value (val, kind);
11398 /* Return true if the vector misalignment factor is supported by the
11399 target. */
11400 bool
11401 riscv_support_vector_misalignment (machine_mode mode,
11402 const_tree type ATTRIBUTE_UNUSED,
11403 int misalignment,
11404 bool is_packed ATTRIBUTE_UNUSED)
11406 /* Depend on movmisalign pattern. */
11407 return default_builtin_support_vector_misalignment (mode, type, misalignment,
11408 is_packed);
11411 /* Implement TARGET_VECTORIZE_GET_MASK_MODE. */
11413 static opt_machine_mode
11414 riscv_get_mask_mode (machine_mode mode)
11416 if (TARGET_VECTOR && riscv_v_ext_mode_p (mode))
11417 return riscv_vector::get_mask_mode (mode);
11419 return default_get_mask_mode (mode);
11422 /* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
11423 it isn't worth branching around empty masked ops (including masked
11424 stores). */
11426 static bool
11427 riscv_empty_mask_is_expensive (unsigned)
11429 return false;
11432 /* Return true if a shift-amount matches the trailing cleared bits on
11433 a bitmask. */
11435 bool
11436 riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
11438 return shamt == ctz_hwi (mask);
11441 static HARD_REG_SET
11442 vector_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
11444 HARD_REG_SET zeroed_hardregs;
11445 CLEAR_HARD_REG_SET (zeroed_hardregs);
11447 /* Find a register to hold vl. */
11448 unsigned vl_regno = INVALID_REGNUM;
11449 /* Skip the first GPR, otherwise the existing vl is kept due to the same
11450 between vl and avl. */
11451 for (unsigned regno = GP_REG_FIRST + 1; regno <= GP_REG_LAST; regno++)
11453 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
11455 vl_regno = regno;
11456 break;
11460 if (vl_regno > GP_REG_LAST)
11461 sorry ("cannot allocate vl register for %qs on this target",
11462 "-fzero-call-used-regs");
11464 /* Vector configurations need not be saved and restored here. The
11465 -fzero-call-used-regs=* option will zero all vector registers and
11466 return. So there's no vector operations between them. */
11468 bool emitted_vlmax_vsetvl = false;
11469 rtx vl = gen_rtx_REG (Pmode, vl_regno); /* vl is VLMAX. */
11470 for (unsigned regno = V_REG_FIRST; regno <= V_REG_LAST; ++regno)
11472 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
11474 rtx target = regno_reg_rtx[regno];
11475 machine_mode mode = GET_MODE (target);
11477 if (!emitted_vlmax_vsetvl)
11479 riscv_vector::emit_hard_vlmax_vsetvl (mode, vl);
11480 emitted_vlmax_vsetvl = true;
11483 rtx ops[] = {target, CONST0_RTX (mode)};
11484 riscv_vector::emit_vlmax_insn_lra (code_for_pred_mov (mode),
11485 riscv_vector::UNARY_OP, ops, vl);
11487 SET_HARD_REG_BIT (zeroed_hardregs, regno);
11491 return zeroed_hardregs;
11494 /* Generate a sequence of instructions that zero registers specified by
11495 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
11496 zeroed. */
11497 HARD_REG_SET
11498 riscv_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
11500 HARD_REG_SET zeroed_hardregs;
11501 CLEAR_HARD_REG_SET (zeroed_hardregs);
11503 if (TARGET_VECTOR)
11504 zeroed_hardregs |= vector_zero_call_used_regs (need_zeroed_hardregs);
11506 return zeroed_hardregs | default_zero_call_used_regs (need_zeroed_hardregs
11507 & ~zeroed_hardregs);
11510 /* Implement target hook TARGET_ARRAY_MODE. */
11512 static opt_machine_mode
11513 riscv_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
11515 machine_mode vmode;
11516 if (TARGET_VECTOR
11517 && riscv_vector::get_tuple_mode (mode, nelems).exists (&vmode))
11518 return vmode;
11520 return opt_machine_mode ();
11523 /* Given memory reference MEM, expand code to compute the aligned
11524 memory address, shift and mask values and store them into
11525 *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK. */
11527 void
11528 riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask,
11529 rtx *not_mask)
11531 /* Align the memory address to a word. */
11532 rtx addr = force_reg (Pmode, XEXP (mem, 0));
11534 rtx addr_mask = gen_int_mode (-4, Pmode);
11536 rtx aligned_addr = gen_reg_rtx (Pmode);
11537 emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, addr_mask));
11539 *aligned_mem = change_address (mem, SImode, aligned_addr);
11541 /* Calculate the shift amount. */
11542 emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
11543 gen_int_mode (3, SImode)));
11544 emit_move_insn (*shift, gen_rtx_ASHIFT (SImode, *shift,
11545 gen_int_mode (3, SImode)));
11547 /* Calculate the mask. */
11548 int unshifted_mask = GET_MODE_MASK (GET_MODE (mem));
11550 emit_move_insn (*mask, gen_int_mode (unshifted_mask, SImode));
11552 emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask,
11553 gen_lowpart (QImode, *shift)));
11555 emit_move_insn (*not_mask, gen_rtx_NOT (SImode, *mask));
11558 /* Leftshift a subword within an SImode register. */
11560 void
11561 riscv_lshift_subword (machine_mode mode, rtx value, rtx shift,
11562 rtx *shifted_value)
11564 rtx value_reg = gen_reg_rtx (SImode);
11565 emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
11566 mode, 0));
11568 emit_move_insn (*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
11569 gen_lowpart (QImode, shift)));
11572 /* Return TRUE if we should use the divmod expander, FALSE otherwise. This
11573 allows the behavior to be tuned for specific implementations as well as
11574 when optimizing for size. */
11576 bool
11577 riscv_use_divmod_expander (void)
11579 return tune_param->use_divmod_expansion;
11582 /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */
11584 static machine_mode
11585 riscv_preferred_simd_mode (scalar_mode mode)
11587 if (TARGET_VECTOR && !TARGET_XTHEADVECTOR)
11588 return riscv_vector::preferred_simd_mode (mode);
11590 return word_mode;
11593 /* Implement target hook TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. */
11595 static poly_uint64
11596 riscv_vectorize_preferred_vector_alignment (const_tree type)
11598 if (riscv_v_ext_mode_p (TYPE_MODE (type)))
11599 return TYPE_ALIGN (TREE_TYPE (type));
11600 return TYPE_ALIGN (type);
11603 /* Return true if it is static FRM rounding mode. */
11605 static bool
11606 riscv_static_frm_mode_p (int mode)
11608 switch (mode)
11610 case riscv_vector::FRM_RDN:
11611 case riscv_vector::FRM_RUP:
11612 case riscv_vector::FRM_RTZ:
11613 case riscv_vector::FRM_RMM:
11614 case riscv_vector::FRM_RNE:
11615 return true;
11616 default:
11617 return false;
11620 gcc_unreachable ();
11623 /* Implement the floating-point Mode Switching. */
11625 static void
11626 riscv_emit_frm_mode_set (int mode, int prev_mode)
11628 rtx backup_reg = DYNAMIC_FRM_RTL (cfun);
11630 if (prev_mode == riscv_vector::FRM_DYN_CALL)
11631 emit_insn (gen_frrmsi (backup_reg)); /* Backup frm when DYN_CALL. */
11633 if (mode != prev_mode)
11635 rtx frm = gen_int_mode (mode, SImode);
11637 if (mode == riscv_vector::FRM_DYN_CALL
11638 && prev_mode != riscv_vector::FRM_DYN && STATIC_FRM_P (cfun))
11639 /* No need to emit when prev mode is DYN already. */
11640 emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
11641 else if (mode == riscv_vector::FRM_DYN_EXIT && STATIC_FRM_P (cfun)
11642 && prev_mode != riscv_vector::FRM_DYN
11643 && prev_mode != riscv_vector::FRM_DYN_CALL)
11644 /* No need to emit when prev mode is DYN or DYN_CALL already. */
11645 emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
11646 else if (mode == riscv_vector::FRM_DYN
11647 && prev_mode != riscv_vector::FRM_DYN_CALL)
11648 /* Restore frm value from backup when switch to DYN mode. */
11649 emit_insn (gen_fsrmsi_restore (backup_reg));
11650 else if (riscv_static_frm_mode_p (mode))
11651 /* Set frm value when switch to static mode. */
11652 emit_insn (gen_fsrmsi_restore (frm));
11656 /* Implement Mode switching. */
11658 static void
11659 riscv_emit_mode_set (int entity, int mode, int prev_mode,
11660 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
11662 switch (entity)
11664 case RISCV_VXRM:
11665 if (mode != VXRM_MODE_NONE && mode != prev_mode)
11666 emit_insn (gen_vxrmsi (gen_int_mode (mode, SImode)));
11667 break;
11668 case RISCV_FRM:
11669 riscv_emit_frm_mode_set (mode, prev_mode);
11670 break;
11671 default:
11672 gcc_unreachable ();
11676 /* Adjust the FRM_NONE insn after a call to FRM_DYN for the
11677 underlying emit. */
11679 static int
11680 riscv_frm_adjust_mode_after_call (rtx_insn *cur_insn, int mode)
11682 rtx_insn *insn = prev_nonnote_nondebug_insn_bb (cur_insn);
11684 if (insn && CALL_P (insn))
11685 return riscv_vector::FRM_DYN;
11687 return mode;
11690 /* Insert the backup frm insn to the end of the bb if and only if the call
11691 is the last insn of this bb. */
11693 static void
11694 riscv_frm_emit_after_bb_end (rtx_insn *cur_insn)
11696 edge eg;
11697 bool abnormal_edge_p = false;
11698 edge_iterator eg_iterator;
11699 basic_block bb = BLOCK_FOR_INSN (cur_insn);
11701 FOR_EACH_EDGE (eg, eg_iterator, bb->succs)
11703 if (eg->flags & EDGE_ABNORMAL)
11704 abnormal_edge_p = true;
11705 else
11707 start_sequence ();
11708 emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
11709 rtx_insn *backup_insn = get_insns ();
11710 end_sequence ();
11712 insert_insn_on_edge (backup_insn, eg);
11716 if (abnormal_edge_p)
11718 start_sequence ();
11719 emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
11720 rtx_insn *backup_insn = get_insns ();
11721 end_sequence ();
11723 insert_insn_end_basic_block (backup_insn, bb);
11726 commit_edge_insertions ();
11729 /* Return mode that frm must be switched into
11730 prior to the execution of insn. */
11732 static int
11733 riscv_frm_mode_needed (rtx_insn *cur_insn, int code)
11735 if (!DYNAMIC_FRM_RTL(cfun))
11737 /* The dynamic frm will be initialized only onece during cfun. */
11738 DYNAMIC_FRM_RTL (cfun) = gen_reg_rtx (SImode);
11739 emit_insn_at_entry (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
11742 if (CALL_P (cur_insn))
11744 rtx_insn *insn = next_nonnote_nondebug_insn_bb (cur_insn);
11746 if (!insn)
11747 riscv_frm_emit_after_bb_end (cur_insn);
11749 return riscv_vector::FRM_DYN_CALL;
11752 int mode = code >= 0 ? get_attr_frm_mode (cur_insn) : riscv_vector::FRM_NONE;
11754 if (mode == riscv_vector::FRM_NONE)
11755 /* After meet a call, we need to backup the frm because it may be
11756 updated during the call. Here, for each insn, we will check if
11757 the previous insn is a call or not. When previous insn is call,
11758 there will be 2 cases for the emit mode set.
11760 1. Current insn is not MODE_NONE, then the mode switch framework
11761 will do the mode switch from MODE_CALL to MODE_NONE natively.
11762 2. Current insn is MODE_NONE, we need to adjust the MODE_NONE to
11763 the MODE_DYN, and leave the mode switch itself to perform
11764 the emit mode set.
11766 mode = riscv_frm_adjust_mode_after_call (cur_insn, mode);
11768 return mode;
11771 /* Return mode that entity must be switched into
11772 prior to the execution of insn. */
11774 static int
11775 riscv_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
11777 int code = recog_memoized (insn);
11779 switch (entity)
11781 case RISCV_VXRM:
11782 return code >= 0 ? get_attr_vxrm_mode (insn) : VXRM_MODE_NONE;
11783 case RISCV_FRM:
11784 return riscv_frm_mode_needed (insn, code);
11785 default:
11786 gcc_unreachable ();
11790 /* Return TRUE that an insn is asm. */
11792 static bool
11793 asm_insn_p (rtx_insn *insn)
11795 extract_insn (insn);
11797 return recog_data.is_asm;
11800 /* Return TRUE that an insn is unknown for VXRM. */
11802 static bool
11803 vxrm_unknown_p (rtx_insn *insn)
11805 /* Return true if there is a definition of VXRM. */
11806 if (reg_set_p (gen_rtx_REG (SImode, VXRM_REGNUM), insn))
11807 return true;
11809 /* A CALL function may contain an instruction that modifies the VXRM,
11810 return true in this situation. */
11811 if (CALL_P (insn))
11812 return true;
11814 /* Return true for all assembly since users may hardcode a assembly
11815 like this: asm volatile ("csrwi vxrm, 0"). */
11816 if (asm_insn_p (insn))
11817 return true;
11819 return false;
11822 /* Return TRUE that an insn is unknown dynamic for FRM. */
11824 static bool
11825 frm_unknown_dynamic_p (rtx_insn *insn)
11827 /* Return true if there is a definition of FRM. */
11828 if (reg_set_p (gen_rtx_REG (SImode, FRM_REGNUM), insn))
11829 return true;
11831 return false;
11834 /* Return the mode that an insn results in for VXRM. */
11836 static int
11837 riscv_vxrm_mode_after (rtx_insn *insn, int mode)
11839 if (vxrm_unknown_p (insn))
11840 return VXRM_MODE_NONE;
11842 if (recog_memoized (insn) < 0)
11843 return mode;
11845 if (reg_mentioned_p (gen_rtx_REG (SImode, VXRM_REGNUM), PATTERN (insn)))
11846 return get_attr_vxrm_mode (insn);
11847 else
11848 return mode;
11851 /* Return the mode that an insn results in for FRM. */
11853 static int
11854 riscv_frm_mode_after (rtx_insn *insn, int mode)
11856 STATIC_FRM_P (cfun) = STATIC_FRM_P (cfun) || riscv_static_frm_mode_p (mode);
11858 if (CALL_P (insn))
11859 return mode;
11861 if (frm_unknown_dynamic_p (insn))
11862 return riscv_vector::FRM_DYN;
11864 if (recog_memoized (insn) < 0)
11865 return mode;
11867 if (reg_mentioned_p (gen_rtx_REG (SImode, FRM_REGNUM), PATTERN (insn)))
11868 return get_attr_frm_mode (insn);
11869 else
11870 return mode;
11873 /* Return the mode that an insn results in. */
11875 static int
11876 riscv_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
11878 switch (entity)
11880 case RISCV_VXRM:
11881 return riscv_vxrm_mode_after (insn, mode);
11882 case RISCV_FRM:
11883 return riscv_frm_mode_after (insn, mode);
11884 default:
11885 gcc_unreachable ();
11889 /* Return a mode that ENTITY is assumed to be
11890 switched to at function entry. */
11892 static int
11893 riscv_mode_entry (int entity)
11895 switch (entity)
11897 case RISCV_VXRM:
11898 return VXRM_MODE_NONE;
11899 case RISCV_FRM:
11901 /* According to RVV 1.0 spec, all vector floating-point operations use
11902 the dynamic rounding mode in the frm register. Likewise in other
11903 similar places. */
11904 return riscv_vector::FRM_DYN;
11906 default:
11907 gcc_unreachable ();
11911 /* Return a mode that ENTITY is assumed to be
11912 switched to at function exit. */
11914 static int
11915 riscv_mode_exit (int entity)
11917 switch (entity)
11919 case RISCV_VXRM:
11920 return VXRM_MODE_NONE;
11921 case RISCV_FRM:
11922 return riscv_vector::FRM_DYN_EXIT;
11923 default:
11924 gcc_unreachable ();
11928 static int
11929 riscv_mode_priority (int, int n)
11931 return n;
11934 /* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES. */
11935 unsigned int
11936 riscv_autovectorize_vector_modes (vector_modes *modes, bool all)
11938 if (TARGET_VECTOR && !TARGET_XTHEADVECTOR)
11939 return riscv_vector::autovectorize_vector_modes (modes, all);
11941 return default_autovectorize_vector_modes (modes, all);
11944 /* Implement TARGET_VECTORIZE_RELATED_MODE. */
11945 opt_machine_mode
11946 riscv_vectorize_related_mode (machine_mode vector_mode, scalar_mode element_mode,
11947 poly_uint64 nunits)
11949 if (TARGET_VECTOR)
11950 return riscv_vector::vectorize_related_mode (vector_mode, element_mode,
11951 nunits);
11952 return default_vectorize_related_mode (vector_mode, element_mode, nunits);
11955 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
11957 static bool
11958 riscv_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
11959 rtx target, rtx op0, rtx op1,
11960 const vec_perm_indices &sel)
11962 if (TARGET_VECTOR && riscv_v_ext_mode_p (vmode))
11963 return riscv_vector::expand_vec_perm_const (vmode, op_mode, target, op0,
11964 op1, sel);
11966 return false;
11969 static bool
11970 riscv_frame_pointer_required (void)
11972 return riscv_save_frame_pointer && !crtl->is_leaf;
11975 /* Return the appropriate common costs according to VECTYPE from COSTS. */
11976 static const common_vector_cost *
11977 get_common_costs (const cpu_vector_cost *costs, tree vectype)
11979 gcc_assert (costs);
11981 if (vectype && riscv_v_ext_vls_mode_p (TYPE_MODE (vectype)))
11982 return costs->vls;
11983 return costs->vla;
11986 /* Return the CPU vector costs according to -mtune if tune info has non-NULL
11987 vector cost. Otherwise, return the default generic vector costs. */
11988 const cpu_vector_cost *
11989 get_vector_costs ()
11991 const cpu_vector_cost *costs = tune_param->vec_costs;
11992 if (!costs)
11993 return &generic_vector_cost;
11994 return costs;
11997 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11999 static int
12000 riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
12001 tree vectype, int misalign ATTRIBUTE_UNUSED)
12003 const cpu_vector_cost *costs = get_vector_costs ();
12004 bool fp = false;
12006 if (vectype != NULL)
12007 fp = FLOAT_TYPE_P (vectype);
12009 const common_vector_cost *common_costs = get_common_costs (costs, vectype);
12010 gcc_assert (common_costs != NULL);
12011 switch (type_of_cost)
12013 case scalar_stmt:
12014 return fp ? costs->scalar_fp_stmt_cost : costs->scalar_int_stmt_cost;
12016 case scalar_load:
12017 return costs->scalar_load_cost;
12019 case scalar_store:
12020 return costs->scalar_store_cost;
12022 case vector_stmt:
12023 return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
12025 case vector_load:
12026 return common_costs->align_load_cost;
12028 case vector_store:
12029 return common_costs->align_store_cost;
12031 case vec_to_scalar:
12032 return common_costs->vec_to_scalar_cost;
12034 case scalar_to_vec:
12035 return common_costs->scalar_to_vec_cost;
12037 case unaligned_load:
12038 return common_costs->unalign_load_cost;
12039 case vector_gather_load:
12040 return common_costs->gather_load_cost;
12042 case unaligned_store:
12043 return common_costs->unalign_store_cost;
12044 case vector_scatter_store:
12045 return common_costs->scatter_store_cost;
12047 case cond_branch_taken:
12048 return costs->cond_taken_branch_cost;
12050 case cond_branch_not_taken:
12051 return costs->cond_not_taken_branch_cost;
12053 case vec_perm:
12054 return common_costs->permute_cost;
12056 case vec_promote_demote:
12057 return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
12059 case vec_construct:
12060 return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
12062 default:
12063 gcc_unreachable ();
12066 return default_builtin_vectorization_cost (type_of_cost, vectype, misalign);
12069 /* Implement targetm.vectorize.create_costs. */
12071 static vector_costs *
12072 riscv_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
12074 if (TARGET_VECTOR)
12075 return new riscv_vector::costs (vinfo, costing_for_scalar);
12076 /* Default vector costs. */
12077 return new vector_costs (vinfo, costing_for_scalar);
12080 /* Implement TARGET_PREFERRED_ELSE_VALUE. */
12082 static tree
12083 riscv_preferred_else_value (unsigned ifn, tree vectype, unsigned int nops,
12084 tree *ops)
12086 if (riscv_v_ext_mode_p (TYPE_MODE (vectype)))
12088 tree tmp_var = create_tmp_var (vectype);
12089 TREE_NO_WARNING (tmp_var) = 1;
12090 return get_or_create_ssa_default_def (cfun, tmp_var);
12093 return default_preferred_else_value (ifn, vectype, nops, ops);
12096 /* If MEM is in the form of "base+offset", extract the two parts
12097 of address and set to BASE and OFFSET, otherwise return false
12098 after clearing BASE and OFFSET. */
12100 bool
12101 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
12103 rtx addr;
12105 gcc_assert (MEM_P (mem));
12107 addr = XEXP (mem, 0);
12109 if (REG_P (addr))
12111 *base = addr;
12112 *offset = const0_rtx;
12113 return true;
12116 if (GET_CODE (addr) == PLUS
12117 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
12119 *base = XEXP (addr, 0);
12120 *offset = XEXP (addr, 1);
12121 return true;
12124 *base = NULL_RTX;
12125 *offset = NULL_RTX;
12127 return false;
12130 /* Implements target hook vector_mode_supported_any_target_p. */
12132 static bool
12133 riscv_vector_mode_supported_any_target_p (machine_mode)
12135 if (TARGET_XTHEADVECTOR)
12136 return false;
12137 return true;
12140 /* Implements hook TARGET_FUNCTION_VALUE_REGNO_P. */
12142 static bool
12143 riscv_function_value_regno_p (const unsigned regno)
12145 if (GP_RETURN_FIRST <= regno && regno <= GP_RETURN_LAST)
12146 return true;
12148 if (FP_RETURN_FIRST <= regno && regno <= FP_RETURN_LAST)
12149 return true;
12151 if (TARGET_VECTOR && regno == V_RETURN)
12152 return true;
12154 return false;
12157 /* Implements hook TARGET_GET_RAW_RESULT_MODE. */
12159 static fixed_size_mode
12160 riscv_get_raw_result_mode (int regno)
12162 if (!is_a <fixed_size_mode> (reg_raw_mode[regno]))
12163 return as_a <fixed_size_mode> (VOIDmode);
12165 return default_get_reg_raw_mode (regno);
12168 /* Generate a REG rtx of Xmode from the given rtx and mode.
12169 The rtx x can be REG (QI/HI/SI/DI) or const_int.
12170 The machine_mode mode is the original mode from define pattern.
12172 If rtx is REG and Xmode, the RTX x will be returned directly.
12174 If rtx is REG and non-Xmode, the zero extended to new REG of Xmode will be
12175 returned.
12177 If rtx is const_int, a new REG rtx will be created to hold the value of
12178 const_int and then returned.
12180 According to the gccint doc, the constants generated for modes with fewer
12181 bits than in HOST_WIDE_INT must be sign extended to full width. Thus there
12182 will be two cases here, take QImode as example.
12184 For .SAT_SUB (127, y) in QImode, we have (const_int 127) and one simple
12185 mov from const_int to the new REG rtx is good enough here.
12187 For .SAT_SUB (254, y) in QImode, we have (const_int -2) after define_expand.
12188 Aka 0xfffffffffffffffe in Xmode of RV64 but we actually need 0xfe in Xmode
12189 of RV64. So we need to cleanup the highest 56 bits of the new REG rtx moved
12190 from the (const_int -2).
12192 Then the underlying expanding can perform the code generation based on
12193 the REG rtx of Xmode, instead of taking care of these in expand func. */
12195 static rtx
12196 riscv_gen_zero_extend_rtx (rtx x, machine_mode mode)
12198 rtx xmode_reg = gen_reg_rtx (Xmode);
12200 if (!CONST_INT_P (x))
12202 if (mode == Xmode)
12203 return x;
12205 riscv_emit_unary (ZERO_EXTEND, xmode_reg, x);
12206 return xmode_reg;
12209 if (mode == Xmode)
12210 emit_move_insn (xmode_reg, x);
12211 else
12213 rtx reg_x = gen_reg_rtx (mode);
12215 emit_move_insn (reg_x, x);
12216 riscv_emit_unary (ZERO_EXTEND, xmode_reg, reg_x);
12219 return xmode_reg;
12222 /* Implements the unsigned saturation add standard name usadd for int mode.
12224 z = SAT_ADD(x, y).
12226 1. sum = x + y.
12227 2. sum = truncate (sum) for non-Xmode.
12228 3. lt = sum < x.
12229 4. lt = -lt.
12230 5. z = sum | lt. */
12232 void
12233 riscv_expand_usadd (rtx dest, rtx x, rtx y)
12235 machine_mode mode = GET_MODE (dest);
12236 rtx xmode_sum = gen_reg_rtx (Xmode);
12237 rtx xmode_lt = gen_reg_rtx (Xmode);
12238 rtx xmode_x = riscv_gen_zero_extend_rtx (x, mode);
12239 rtx xmode_y = riscv_gen_zero_extend_rtx (y, mode);
12240 rtx xmode_dest = gen_reg_rtx (Xmode);
12242 /* Step-1: sum = x + y */
12243 riscv_emit_binary (PLUS, xmode_sum, xmode_x, xmode_y);
12245 /* Step-1.1: truncate sum for non-Xmode for overflow check. */
12246 if (mode != Xmode)
12248 int shift_bits = GET_MODE_BITSIZE (Xmode)
12249 - GET_MODE_BITSIZE (mode).to_constant ();
12251 gcc_assert (shift_bits > 0);
12253 riscv_emit_binary (ASHIFT, xmode_sum, xmode_sum, GEN_INT (shift_bits));
12254 riscv_emit_binary (LSHIFTRT, xmode_sum, xmode_sum, GEN_INT (shift_bits));
12257 /* Step-2: lt = sum < x */
12258 riscv_emit_binary (LTU, xmode_lt, xmode_sum, xmode_x);
12260 /* Step-3: lt = -lt */
12261 riscv_emit_unary (NEG, xmode_lt, xmode_lt);
12263 /* Step-4: xmode_dest = sum | lt */
12264 riscv_emit_binary (IOR, xmode_dest, xmode_lt, xmode_sum);
12266 /* Step-5: dest = xmode_dest */
12267 emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
12270 /* Return a new const RTX of MAX value based on given mode. Only
12271 int scalar mode is allowed. */
12273 static rtx
12274 riscv_gen_sign_max_cst (machine_mode mode)
12276 switch (mode)
12278 case QImode:
12279 return GEN_INT (INT8_MAX);
12280 case HImode:
12281 return GEN_INT (INT16_MAX);
12282 case SImode:
12283 return GEN_INT (INT32_MAX);
12284 case DImode:
12285 return GEN_INT (INT64_MAX);
12286 default:
12287 gcc_unreachable ();
12291 /* Implements the signed saturation sub standard name ssadd for int mode.
12293 z = SAT_ADD(x, y).
12295 1. sum = x + y
12296 2. xor_0 = x ^ y
12297 3. xor_1 = x ^ sum
12298 4. lt = xor_1 < 0
12299 5. ge = xor_0 >= 0
12300 6. and = ge & lt
12301 7. lt = x < 0
12302 8. neg = -lt
12303 9. max = INT_MAX
12304 10. max = max ^ neg
12305 11. neg = -and
12306 12. max = max & neg
12307 13. and = and - 1
12308 14. z = sum & and
12309 15. z = z | max */
12311 void
12312 riscv_expand_ssadd (rtx dest, rtx x, rtx y)
12314 machine_mode mode = GET_MODE (dest);
12315 unsigned bitsize = GET_MODE_BITSIZE (mode).to_constant ();
12316 rtx shift_bits = GEN_INT (bitsize - 1);
12317 rtx xmode_x = gen_lowpart (Xmode, x);
12318 rtx xmode_y = gen_lowpart (Xmode, y);
12319 rtx xmode_sum = gen_reg_rtx (Xmode);
12320 rtx xmode_dest = gen_reg_rtx (Xmode);
12321 rtx xmode_xor_0 = gen_reg_rtx (Xmode);
12322 rtx xmode_xor_1 = gen_reg_rtx (Xmode);
12323 rtx xmode_ge = gen_reg_rtx (Xmode);
12324 rtx xmode_lt = gen_reg_rtx (Xmode);
12325 rtx xmode_neg = gen_reg_rtx (Xmode);
12326 rtx xmode_and = gen_reg_rtx (Xmode);
12327 rtx xmode_max = gen_reg_rtx (Xmode);
12329 /* Step-1: sum = x + y, xor_0 = x ^ y, xor_1 = x ^ sum. */
12330 riscv_emit_binary (PLUS, xmode_sum, xmode_x, xmode_y);
12331 riscv_emit_binary (XOR, xmode_xor_0, xmode_x, xmode_y);
12332 riscv_emit_binary (XOR, xmode_xor_1, xmode_x, xmode_sum);
12334 /* Step-2: lt = xor_1 < 0, ge = xor_0 >= 0, and = ge & lt. */
12335 riscv_emit_binary (LSHIFTRT, xmode_lt, xmode_xor_1, shift_bits);
12336 riscv_emit_binary (LSHIFTRT, xmode_ge, xmode_xor_0, shift_bits);
12337 riscv_emit_binary (XOR, xmode_ge, xmode_ge, CONST1_RTX (Xmode));
12338 riscv_emit_binary (AND, xmode_and, xmode_lt, xmode_ge);
12339 riscv_emit_binary (AND, xmode_and, xmode_and, CONST1_RTX (Xmode));
12341 /* Step-3: lt = x < 0, neg = -lt */
12342 riscv_emit_binary (LT, xmode_lt, xmode_x, CONST0_RTX (Xmode));
12343 riscv_emit_unary (NEG, xmode_neg, xmode_lt);
12345 /* Step-4: max = 0x7f..., max = max ^ neg, neg = -and, max = max & neg */
12346 riscv_emit_move (xmode_max, riscv_gen_sign_max_cst (mode));
12347 riscv_emit_binary (XOR, xmode_max, xmode_max, xmode_neg);
12348 riscv_emit_unary (NEG, xmode_neg, xmode_and);
12349 riscv_emit_binary (AND, xmode_max, xmode_max, xmode_neg);
12351 /* Step-5: and = and - 1, dest = sum & and */
12352 riscv_emit_binary (PLUS, xmode_and, xmode_and, CONSTM1_RTX (Xmode));
12353 riscv_emit_binary (AND, xmode_dest, xmode_sum, xmode_and);
12355 /* Step-6: xmode_dest = xmode_dest | xmode_max, dest = xmode_dest */
12356 riscv_emit_binary (IOR, xmode_dest, xmode_dest, xmode_max);
12357 emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
12360 /* Implements the unsigned saturation sub standard name usadd for int mode.
12362 z = SAT_SUB(x, y).
12364 1. minus = x - y.
12365 2. lt = x < y.
12366 3. lt = lt - 1.
12367 4. z = minus & lt. */
12369 void
12370 riscv_expand_ussub (rtx dest, rtx x, rtx y)
12372 machine_mode mode = GET_MODE (dest);
12373 rtx xmode_x = riscv_gen_zero_extend_rtx (x, mode);
12374 rtx xmode_y = riscv_gen_zero_extend_rtx (y, mode);
12375 rtx xmode_lt = gen_reg_rtx (Xmode);
12376 rtx xmode_minus = gen_reg_rtx (Xmode);
12377 rtx xmode_dest = gen_reg_rtx (Xmode);
12379 /* Step-1: minus = x - y */
12380 riscv_emit_binary (MINUS, xmode_minus, xmode_x, xmode_y);
12382 /* Step-2: lt = x < y */
12383 riscv_emit_binary (LTU, xmode_lt, xmode_x, xmode_y);
12385 /* Step-3: lt = lt - 1 (lt + (-1)) */
12386 riscv_emit_binary (PLUS, xmode_lt, xmode_lt, CONSTM1_RTX (Xmode));
12388 /* Step-4: xmode_dest = minus & lt */
12389 riscv_emit_binary (AND, xmode_dest, xmode_lt, xmode_minus);
12391 /* Step-5: dest = xmode_dest */
12392 emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
12395 /* Implements the signed saturation sub standard name ssadd for int mode.
12397 z = SAT_SUB(x, y).
12399 1. minus = x - y
12400 2. xor_0 = x ^ y
12401 3. xor_1 = x ^ minus
12402 4. lt_0 = xor_1 < 0
12403 5. lt_1 = xor_0 < 0
12404 6. and = lt_0 & lt_1
12405 7. lt = x < 0
12406 8. neg = -lt
12407 9. max = INT_MAX
12408 10. max = max ^ neg
12409 11. neg = -and
12410 12. max = max & neg
12411 13. and = and - 1
12412 14. z = minus & and
12413 15. z = z | max */
12415 void
12416 riscv_expand_sssub (rtx dest, rtx x, rtx y)
12418 machine_mode mode = GET_MODE (dest);
12419 unsigned bitsize = GET_MODE_BITSIZE (mode).to_constant ();
12420 rtx shift_bits = GEN_INT (bitsize - 1);
12421 rtx xmode_x = gen_lowpart (Xmode, x);
12422 rtx xmode_y = gen_lowpart (Xmode, y);
12423 rtx xmode_minus = gen_reg_rtx (Xmode);
12424 rtx xmode_xor_0 = gen_reg_rtx (Xmode);
12425 rtx xmode_xor_1 = gen_reg_rtx (Xmode);
12426 rtx xmode_lt_0 = gen_reg_rtx (Xmode);
12427 rtx xmode_lt_1 = gen_reg_rtx (Xmode);
12428 rtx xmode_and = gen_reg_rtx (Xmode);
12429 rtx xmode_lt = gen_reg_rtx (Xmode);
12430 rtx xmode_neg = gen_reg_rtx (Xmode);
12431 rtx xmode_max = gen_reg_rtx (Xmode);
12432 rtx xmode_dest = gen_reg_rtx (Xmode);
12434 /* Step-1: mins = x - y, xor_0 = x ^ y, xor_1 = x ^ minus. */
12435 riscv_emit_binary (MINUS, xmode_minus, xmode_x, xmode_y);
12436 riscv_emit_binary (XOR, xmode_xor_0, xmode_x, xmode_y);
12437 riscv_emit_binary (XOR, xmode_xor_1, xmode_x, xmode_minus);
12439 /* Step-2: and = xor_0 < 0 & xor_1 < 0. */
12440 riscv_emit_binary (LSHIFTRT, xmode_lt_0, xmode_xor_0, shift_bits);
12441 riscv_emit_binary (LSHIFTRT, xmode_lt_1, xmode_xor_1, shift_bits);
12442 riscv_emit_binary (AND, xmode_and, xmode_lt_0, xmode_lt_1);
12443 riscv_emit_binary (AND, xmode_and, xmode_and, CONST1_RTX (Xmode));
12445 /* Step-3: lt = x < 0, neg = -lt. */
12446 riscv_emit_binary (LT, xmode_lt, xmode_x, CONST0_RTX (Xmode));
12447 riscv_emit_unary (NEG, xmode_neg, xmode_lt);
12449 /* Step-4: max = 0x7f..., max = max ^ neg, neg = -and, max = max & neg. */
12450 riscv_emit_move (xmode_max, riscv_gen_sign_max_cst (mode));
12451 riscv_emit_binary (XOR, xmode_max, xmode_max, xmode_neg);
12452 riscv_emit_unary (NEG, xmode_neg, xmode_and);
12453 riscv_emit_binary (AND, xmode_max, xmode_max, xmode_neg);
12455 /* Step-5: and = and - 1, dest = minus & and. */
12456 riscv_emit_binary (PLUS, xmode_and, xmode_and, CONSTM1_RTX (Xmode));
12457 riscv_emit_binary (AND, xmode_dest, xmode_minus, xmode_and);
12459 /* Step-6: dest = dest | max. */
12460 riscv_emit_binary (IOR, xmode_dest, xmode_dest, xmode_max);
12461 emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
12464 /* Implement the unsigned saturation truncation for int mode.
12466 b = SAT_TRUNC (a);
12468 1. max = half truncated max
12469 2. lt = a < max
12470 3. lt = lt - 1 (lt 0, ge -1)
12471 4. d = a | lt
12472 5. b = (trunc)d */
12474 void
12475 riscv_expand_ustrunc (rtx dest, rtx src)
12477 machine_mode mode = GET_MODE (dest);
12478 rtx xmode_max = gen_reg_rtx (Xmode);
12479 unsigned precision = GET_MODE_PRECISION (mode).to_constant ();
12481 gcc_assert (precision < 64);
12483 uint64_t max = ((uint64_t)1u << precision) - 1u;
12484 rtx xmode_src = gen_lowpart (Xmode, src);
12485 rtx xmode_dest = gen_reg_rtx (Xmode);
12486 rtx xmode_lt = gen_reg_rtx (Xmode);
12488 /* Step-1: max = half truncated max */
12489 emit_move_insn (xmode_max, gen_int_mode (max, Xmode));
12491 /* Step-2: lt = src < max */
12492 riscv_emit_binary (LTU, xmode_lt, xmode_src, xmode_max);
12494 /* Step-3: lt = lt - 1 */
12495 riscv_emit_binary (PLUS, xmode_lt, xmode_lt, CONSTM1_RTX (Xmode));
12497 /* Step-4: xmode_dest = lt | src */
12498 riscv_emit_binary (IOR, xmode_dest, xmode_lt, xmode_src);
12500 /* Step-5: dest = xmode_dest */
12501 emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
12504 /* Implement the signed saturation truncation for int mode.
12506 b = SAT_TRUNC (a);
12508 1. lt = a < max
12509 2. gt = min < a
12510 3. mask = lt & gt
12511 4. trunc_mask = -mask
12512 5. sat_mask = mask - 1
12513 6. lt = a < 0
12514 7. neg = -lt
12515 8. sat = neg ^ max
12516 9. trunc = src & trunc_mask
12517 10. sat = sat & sat_mask
12518 11. dest = trunc | sat */
12520 void
12521 riscv_expand_sstrunc (rtx dest, rtx src)
12523 machine_mode mode = GET_MODE (dest);
12524 unsigned narrow_prec = GET_MODE_PRECISION (mode).to_constant ();
12525 HOST_WIDE_INT narrow_max = ((int64_t)1 << (narrow_prec - 1)) - 1; // 127
12526 HOST_WIDE_INT narrow_min = -narrow_max - 1; // -128
12528 rtx xmode_narrow_max = gen_reg_rtx (Xmode);
12529 rtx xmode_narrow_min = gen_reg_rtx (Xmode);
12530 rtx xmode_lt = gen_reg_rtx (Xmode);
12531 rtx xmode_gt = gen_reg_rtx (Xmode);
12532 rtx xmode_src = gen_lowpart (Xmode, src);
12533 rtx xmode_dest = gen_reg_rtx (Xmode);
12534 rtx xmode_mask = gen_reg_rtx (Xmode);
12535 rtx xmode_sat = gen_reg_rtx (Xmode);
12536 rtx xmode_trunc = gen_reg_rtx (Xmode);
12537 rtx xmode_sat_mask = gen_reg_rtx (Xmode);
12538 rtx xmode_trunc_mask = gen_reg_rtx (Xmode);
12540 /* Step-1: lt = src < max, gt = min < src, mask = lt & gt */
12541 emit_move_insn (xmode_narrow_min, gen_int_mode (narrow_min, Xmode));
12542 emit_move_insn (xmode_narrow_max, gen_int_mode (narrow_max, Xmode));
12543 riscv_emit_binary (LT, xmode_lt, xmode_src, xmode_narrow_max);
12544 riscv_emit_binary (LT, xmode_gt, xmode_narrow_min, xmode_src);
12545 riscv_emit_binary (AND, xmode_mask, xmode_lt, xmode_gt);
12547 /* Step-2: sat_mask = mask - 1, trunc_mask = ~mask */
12548 riscv_emit_binary (PLUS, xmode_sat_mask, xmode_mask, CONSTM1_RTX (Xmode));
12549 riscv_emit_unary (NEG, xmode_trunc_mask, xmode_mask);
12551 /* Step-3: lt = src < 0, lt = -lt, sat = lt ^ narrow_max */
12552 riscv_emit_binary (LT, xmode_lt, xmode_src, CONST0_RTX (Xmode));
12553 riscv_emit_unary (NEG, xmode_lt, xmode_lt);
12554 riscv_emit_binary (XOR, xmode_sat, xmode_lt, xmode_narrow_max);
12556 /* Step-4: xmode_dest = (src & trunc_mask) | (sat & sat_mask) */
12557 riscv_emit_binary (AND, xmode_trunc, xmode_src, xmode_trunc_mask);
12558 riscv_emit_binary (AND, xmode_sat, xmode_sat, xmode_sat_mask);
12559 riscv_emit_binary (IOR, xmode_dest, xmode_trunc, xmode_sat);
12561 /* Step-5: dest = xmode_dest */
12562 emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
12565 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return TFmode for
12566 TI_LONG_DOUBLE_TYPE which is for long double type, go with the
12567 default one for the others. */
12569 static machine_mode
12570 riscv_c_mode_for_floating_type (enum tree_index ti)
12572 if (ti == TI_LONG_DOUBLE_TYPE)
12573 return TFmode;
12574 return default_mode_for_floating_type (ti);
12577 /* On riscv we have an ABI defined safe buffer. This constant is used to
12578 determining the probe offset for alloca. */
12580 static HOST_WIDE_INT
12581 riscv_stack_clash_protection_alloca_probe_range (void)
12583 return STACK_CLASH_CALLER_GUARD;
12586 /* Initialize the GCC target structure. */
12587 #undef TARGET_ASM_ALIGNED_HI_OP
12588 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
12589 #undef TARGET_ASM_ALIGNED_SI_OP
12590 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
12591 #undef TARGET_ASM_ALIGNED_DI_OP
12592 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
12594 #undef TARGET_OPTION_OVERRIDE
12595 #define TARGET_OPTION_OVERRIDE riscv_option_override
12597 #undef TARGET_OPTION_RESTORE
12598 #define TARGET_OPTION_RESTORE riscv_option_restore
12600 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
12601 #define TARGET_OPTION_VALID_ATTRIBUTE_P riscv_option_valid_attribute_p
12603 #undef TARGET_LEGITIMIZE_ADDRESS
12604 #define TARGET_LEGITIMIZE_ADDRESS riscv_legitimize_address
12606 #undef TARGET_SCHED_ISSUE_RATE
12607 #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
12608 #undef TARGET_SCHED_MACRO_FUSION_P
12609 #define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
12610 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
12611 #define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
12613 #undef TARGET_SCHED_VARIABLE_ISSUE
12614 #define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue
12616 #undef TARGET_SCHED_ADJUST_COST
12617 #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
12619 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
12620 #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
12622 #undef TARGET_SET_CURRENT_FUNCTION
12623 #define TARGET_SET_CURRENT_FUNCTION riscv_set_current_function
12625 #undef TARGET_REGISTER_MOVE_COST
12626 #define TARGET_REGISTER_MOVE_COST riscv_register_move_cost
12627 #undef TARGET_MEMORY_MOVE_COST
12628 #define TARGET_MEMORY_MOVE_COST riscv_memory_move_cost
12629 #undef TARGET_RTX_COSTS
12630 #define TARGET_RTX_COSTS riscv_rtx_costs
12631 #undef TARGET_ADDRESS_COST
12632 #define TARGET_ADDRESS_COST riscv_address_cost
12633 #undef TARGET_INSN_COST
12634 #define TARGET_INSN_COST riscv_insn_cost
12636 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
12637 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST riscv_max_noce_ifcvt_seq_cost
12638 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
12639 #define TARGET_NOCE_CONVERSION_PROFITABLE_P riscv_noce_conversion_profitable_p
12641 #undef TARGET_ASM_FILE_START
12642 #define TARGET_ASM_FILE_START riscv_file_start
12643 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
12644 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
12645 #undef TARGET_ASM_FILE_END
12646 #define TARGET_ASM_FILE_END file_end_indicate_exec_stack
12648 #undef TARGET_EXPAND_BUILTIN_VA_START
12649 #define TARGET_EXPAND_BUILTIN_VA_START riscv_va_start
12651 #undef TARGET_PROMOTE_FUNCTION_MODE
12652 #define TARGET_PROMOTE_FUNCTION_MODE riscv_promote_function_mode
12654 #undef TARGET_RETURN_IN_MEMORY
12655 #define TARGET_RETURN_IN_MEMORY riscv_return_in_memory
12657 #undef TARGET_ASM_OUTPUT_MI_THUNK
12658 #define TARGET_ASM_OUTPUT_MI_THUNK riscv_output_mi_thunk
12659 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
12660 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
12662 #undef TARGET_PRINT_OPERAND
12663 #define TARGET_PRINT_OPERAND riscv_print_operand
12664 #undef TARGET_PRINT_OPERAND_ADDRESS
12665 #define TARGET_PRINT_OPERAND_ADDRESS riscv_print_operand_address
12666 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
12667 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P riscv_print_operand_punct_valid_p
12669 #undef TARGET_SETUP_INCOMING_VARARGS
12670 #define TARGET_SETUP_INCOMING_VARARGS riscv_setup_incoming_varargs
12671 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
12672 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS riscv_allocate_stack_slots_for_args
12673 #undef TARGET_STRICT_ARGUMENT_NAMING
12674 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
12675 #undef TARGET_MUST_PASS_IN_STACK
12676 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
12677 #undef TARGET_PASS_BY_REFERENCE
12678 #define TARGET_PASS_BY_REFERENCE riscv_pass_by_reference
12679 #undef TARGET_ARG_PARTIAL_BYTES
12680 #define TARGET_ARG_PARTIAL_BYTES riscv_arg_partial_bytes
12681 #undef TARGET_FUNCTION_ARG
12682 #define TARGET_FUNCTION_ARG riscv_function_arg
12683 #undef TARGET_FUNCTION_ARG_ADVANCE
12684 #define TARGET_FUNCTION_ARG_ADVANCE riscv_function_arg_advance
12685 #undef TARGET_FUNCTION_ARG_BOUNDARY
12686 #define TARGET_FUNCTION_ARG_BOUNDARY riscv_function_arg_boundary
12687 #undef TARGET_FNTYPE_ABI
12688 #define TARGET_FNTYPE_ABI riscv_fntype_abi
12689 #undef TARGET_INSN_CALLEE_ABI
12690 #define TARGET_INSN_CALLEE_ABI riscv_insn_callee_abi
12692 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
12693 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
12694 riscv_get_separate_components
12696 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
12697 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
12698 riscv_components_for_bb
12700 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
12701 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
12702 riscv_disqualify_components
12704 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
12705 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
12706 riscv_emit_prologue_components
12708 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
12709 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
12710 riscv_emit_epilogue_components
12712 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
12713 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
12714 riscv_set_handled_components
12716 /* The generic ELF target does not always have TLS support. */
12717 #ifdef HAVE_AS_TLS
12718 #undef TARGET_HAVE_TLS
12719 #define TARGET_HAVE_TLS true
12720 #endif
12722 #undef TARGET_CANNOT_FORCE_CONST_MEM
12723 #define TARGET_CANNOT_FORCE_CONST_MEM riscv_cannot_force_const_mem
12725 #undef TARGET_LEGITIMATE_CONSTANT_P
12726 #define TARGET_LEGITIMATE_CONSTANT_P riscv_legitimate_constant_p
12728 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
12729 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P riscv_use_blocks_for_constant_p
12731 #undef TARGET_LEGITIMATE_ADDRESS_P
12732 #define TARGET_LEGITIMATE_ADDRESS_P riscv_legitimate_address_p
12734 #undef TARGET_CAN_INLINE_P
12735 #define TARGET_CAN_INLINE_P riscv_can_inline_p
12737 #undef TARGET_CAN_ELIMINATE
12738 #define TARGET_CAN_ELIMINATE riscv_can_eliminate
12740 #undef TARGET_CONDITIONAL_REGISTER_USAGE
12741 #define TARGET_CONDITIONAL_REGISTER_USAGE riscv_conditional_register_usage
12743 #undef TARGET_CLASS_MAX_NREGS
12744 #define TARGET_CLASS_MAX_NREGS riscv_class_max_nregs
12746 #undef TARGET_TRAMPOLINE_INIT
12747 #define TARGET_TRAMPOLINE_INIT riscv_trampoline_init
12749 #undef TARGET_IN_SMALL_DATA_P
12750 #define TARGET_IN_SMALL_DATA_P riscv_in_small_data_p
12752 #undef TARGET_HAVE_SRODATA_SECTION
12753 #define TARGET_HAVE_SRODATA_SECTION true
12755 #undef TARGET_ASM_SELECT_SECTION
12756 #define TARGET_ASM_SELECT_SECTION riscv_select_section
12758 #undef TARGET_ASM_UNIQUE_SECTION
12759 #define TARGET_ASM_UNIQUE_SECTION riscv_unique_section
12761 #undef TARGET_ASM_SELECT_RTX_SECTION
12762 #define TARGET_ASM_SELECT_RTX_SECTION riscv_elf_select_rtx_section
12764 #undef TARGET_MIN_ANCHOR_OFFSET
12765 #define TARGET_MIN_ANCHOR_OFFSET (-IMM_REACH/2)
12767 #undef TARGET_MAX_ANCHOR_OFFSET
12768 #define TARGET_MAX_ANCHOR_OFFSET (IMM_REACH/2-1)
12770 #undef TARGET_REGISTER_PRIORITY
12771 #define TARGET_REGISTER_PRIORITY riscv_register_priority
12773 #undef TARGET_CANNOT_COPY_INSN_P
12774 #define TARGET_CANNOT_COPY_INSN_P riscv_cannot_copy_insn_p
12776 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
12777 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV riscv_atomic_assign_expand_fenv
12779 #undef TARGET_INIT_BUILTINS
12780 #define TARGET_INIT_BUILTINS riscv_init_builtins
12782 #undef TARGET_BUILTIN_DECL
12783 #define TARGET_BUILTIN_DECL riscv_builtin_decl
12785 #undef TARGET_GIMPLE_FOLD_BUILTIN
12786 #define TARGET_GIMPLE_FOLD_BUILTIN riscv_gimple_fold_builtin
12788 #undef TARGET_EXPAND_BUILTIN
12789 #define TARGET_EXPAND_BUILTIN riscv_expand_builtin
12791 #undef TARGET_HARD_REGNO_NREGS
12792 #define TARGET_HARD_REGNO_NREGS riscv_hard_regno_nregs
12793 #undef TARGET_HARD_REGNO_MODE_OK
12794 #define TARGET_HARD_REGNO_MODE_OK riscv_hard_regno_mode_ok
12796 #undef TARGET_MODES_TIEABLE_P
12797 #define TARGET_MODES_TIEABLE_P riscv_modes_tieable_p
12799 #undef TARGET_SLOW_UNALIGNED_ACCESS
12800 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
12802 #undef TARGET_OVERLAP_OP_BY_PIECES_P
12803 #define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
12805 #undef TARGET_SECONDARY_MEMORY_NEEDED
12806 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
12808 #undef TARGET_CAN_CHANGE_MODE_CLASS
12809 #define TARGET_CAN_CHANGE_MODE_CLASS riscv_can_change_mode_class
12811 #undef TARGET_CONSTANT_ALIGNMENT
12812 #define TARGET_CONSTANT_ALIGNMENT riscv_constant_alignment
12814 #undef TARGET_MERGE_DECL_ATTRIBUTES
12815 #define TARGET_MERGE_DECL_ATTRIBUTES riscv_merge_decl_attributes
12817 #undef TARGET_ATTRIBUTE_TABLE
12818 #define TARGET_ATTRIBUTE_TABLE riscv_attribute_table
12820 #undef TARGET_WARN_FUNC_RETURN
12821 #define TARGET_WARN_FUNC_RETURN riscv_warn_func_return
12823 /* The low bit is ignored by jump instructions so is safe to use. */
12824 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
12825 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
12827 #undef TARGET_MACHINE_DEPENDENT_REORG
12828 #define TARGET_MACHINE_DEPENDENT_REORG riscv_reorg
12830 #undef TARGET_NEW_ADDRESS_PROFITABLE_P
12831 #define TARGET_NEW_ADDRESS_PROFITABLE_P riscv_new_address_profitable_p
12833 #undef TARGET_MANGLE_TYPE
12834 #define TARGET_MANGLE_TYPE riscv_mangle_type
12836 #undef TARGET_SCALAR_MODE_SUPPORTED_P
12837 #define TARGET_SCALAR_MODE_SUPPORTED_P riscv_scalar_mode_supported_p
12839 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
12840 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
12841 riscv_libgcc_floating_mode_supported_p
12843 #undef TARGET_INIT_LIBFUNCS
12844 #define TARGET_INIT_LIBFUNCS riscv_init_libfuncs
12846 #undef TARGET_C_EXCESS_PRECISION
12847 #define TARGET_C_EXCESS_PRECISION riscv_excess_precision
12849 #undef TARGET_FLOATN_MODE
12850 #define TARGET_FLOATN_MODE riscv_floatn_mode
12852 #undef TARGET_ASAN_SHADOW_OFFSET
12853 #define TARGET_ASAN_SHADOW_OFFSET riscv_asan_shadow_offset
12855 #ifdef TARGET_BIG_ENDIAN_DEFAULT
12856 #undef TARGET_DEFAULT_TARGET_FLAGS
12857 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_ENDIAN)
12858 #endif
12860 #undef TARGET_VECTOR_MODE_SUPPORTED_P
12861 #define TARGET_VECTOR_MODE_SUPPORTED_P riscv_vector_mode_supported_p
12863 #undef TARGET_VERIFY_TYPE_CONTEXT
12864 #define TARGET_VERIFY_TYPE_CONTEXT riscv_verify_type_context
12866 #undef TARGET_ESTIMATED_POLY_VALUE
12867 #define TARGET_ESTIMATED_POLY_VALUE riscv_estimated_poly_value
12869 #undef TARGET_VECTORIZE_GET_MASK_MODE
12870 #define TARGET_VECTORIZE_GET_MASK_MODE riscv_get_mask_mode
12872 #undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
12873 #define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE riscv_empty_mask_is_expensive
12875 #undef TARGET_VECTOR_ALIGNMENT
12876 #define TARGET_VECTOR_ALIGNMENT riscv_vector_alignment
12878 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
12879 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT riscv_support_vector_misalignment
12881 #undef TARGET_DWARF_POLY_INDETERMINATE_VALUE
12882 #define TARGET_DWARF_POLY_INDETERMINATE_VALUE riscv_dwarf_poly_indeterminate_value
12884 #undef TARGET_ZERO_CALL_USED_REGS
12885 #define TARGET_ZERO_CALL_USED_REGS riscv_zero_call_used_regs
12887 #undef TARGET_ARRAY_MODE
12888 #define TARGET_ARRAY_MODE riscv_array_mode
12890 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
12891 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE riscv_preferred_simd_mode
12893 #undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
12894 #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
12895 riscv_vectorize_preferred_vector_alignment
12897 #undef TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE
12898 #define TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE \
12899 riscv_stack_clash_protection_alloca_probe_range
12901 /* Mode switching hooks. */
12903 #undef TARGET_MODE_EMIT
12904 #define TARGET_MODE_EMIT riscv_emit_mode_set
12905 #undef TARGET_MODE_NEEDED
12906 #define TARGET_MODE_NEEDED riscv_mode_needed
12907 #undef TARGET_MODE_AFTER
12908 #define TARGET_MODE_AFTER riscv_mode_after
12909 #undef TARGET_MODE_ENTRY
12910 #define TARGET_MODE_ENTRY riscv_mode_entry
12911 #undef TARGET_MODE_EXIT
12912 #define TARGET_MODE_EXIT riscv_mode_exit
12913 #undef TARGET_MODE_PRIORITY
12914 #define TARGET_MODE_PRIORITY riscv_mode_priority
12916 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
12917 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
12918 riscv_autovectorize_vector_modes
12920 #undef TARGET_VECTORIZE_RELATED_MODE
12921 #define TARGET_VECTORIZE_RELATED_MODE riscv_vectorize_related_mode
12923 #undef TARGET_VECTORIZE_VEC_PERM_CONST
12924 #define TARGET_VECTORIZE_VEC_PERM_CONST riscv_vectorize_vec_perm_const
12926 #undef TARGET_FRAME_POINTER_REQUIRED
12927 #define TARGET_FRAME_POINTER_REQUIRED riscv_frame_pointer_required
12929 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
12930 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
12931 riscv_builtin_vectorization_cost
12933 #undef TARGET_VECTORIZE_CREATE_COSTS
12934 #define TARGET_VECTORIZE_CREATE_COSTS riscv_vectorize_create_costs
12936 #undef TARGET_PREFERRED_ELSE_VALUE
12937 #define TARGET_PREFERRED_ELSE_VALUE riscv_preferred_else_value
12939 #undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P
12940 #define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P riscv_vector_mode_supported_any_target_p
12942 #undef TARGET_FUNCTION_VALUE_REGNO_P
12943 #define TARGET_FUNCTION_VALUE_REGNO_P riscv_function_value_regno_p
12945 #undef TARGET_GET_RAW_RESULT_MODE
12946 #define TARGET_GET_RAW_RESULT_MODE riscv_get_raw_result_mode
12948 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
12949 #define TARGET_C_MODE_FOR_FLOATING_TYPE riscv_c_mode_for_floating_type
12951 struct gcc_target targetm = TARGET_INITIALIZER;
12953 #include "gt-riscv.h"