1 /* Target Definitions for NVPTX.
2 Copyright (C) 2014-2025 Free Software Foundation, Inc.
3 Contributed by Bernd Schmidt <bernds@codesourcery.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
25 #include "config/nvptx/nvptx-opts.h"
28 /* Run-time Target. */
30 /* Use '--with-arch' for default '-misa'. */
31 #define OPTION_DEFAULT_SPECS \
32 { "arch", "%{!misa=*:-misa=%(VALUE)}" }, \
34 /* Assembler supports '-v' option; handle similar to
35 '../../gcc.cc:asm_options', 'HAVE_GNU_AS'. */
36 #define ASM_SPEC "%{v}"
38 #define STARTFILE_SPEC "%{mmainkernel:crt0.o%s}"
40 #define TARGET_CPU_CPP_BUILTINS() nvptx_cpu_cpp_builtins ()
42 /* Avoid the default in ../../gcc.cc, which adds "-pthread", which is not
43 supported for nvptx. */
44 #define GOMP_SELF_SPECS ""
48 #define BITS_BIG_ENDIAN 0
49 #define BYTES_BIG_ENDIAN 0
50 #define WORDS_BIG_ENDIAN 0
52 /* Chosen such that we won't have to deal with multi-word subregs. */
53 #define UNITS_PER_WORD 8
55 /* Alignments in bits. */
56 #define PARM_BOUNDARY 32
57 #define STACK_BOUNDARY 128
58 #define FUNCTION_BOUNDARY 32
59 #define BIGGEST_ALIGNMENT 128
60 #define STRICT_ALIGNMENT 1
62 #define MAX_STACK_ALIGNMENT (1024 * 8)
64 #define DATA_ALIGNMENT nvptx_data_alignment
66 /* Copied from elf.h and other places. We'd otherwise use
67 BIGGEST_ALIGNMENT and fail a number of testcases. */
68 #define MAX_OFILE_ALIGNMENT (32768 * 8)
72 #define DEFAULT_SIGNED_CHAR 1
74 #define SHORT_TYPE_SIZE 16
75 #define INT_TYPE_SIZE 32
76 #define LONG_TYPE_SIZE (TARGET_ABI64 ? 64 : 32)
77 #define LONG_LONG_TYPE_SIZE 64
78 #define TARGET_SUPPORTS_WIDE_INT 1
81 #define SIZE_TYPE (TARGET_ABI64 ? "long unsigned int" : "unsigned int")
83 #define PTRDIFF_TYPE (TARGET_ABI64 ? "long int" : "int")
85 #define POINTER_SIZE (TARGET_ABI64 ? 64 : 32)
86 #define Pmode (TARGET_ABI64 ? DImode : SImode)
87 #define STACK_SIZE_MODE Pmode
89 /* We always have to maintain the '-msoft-stack' pointer, but the PTX "native"
90 stack pointer is handled implicitly at function level. */
91 #define STACK_SAVEAREA_MODE(LEVEL) \
92 (TARGET_SOFT_STACK ? Pmode \
93 : (LEVEL == SAVE_FUNCTION ? VOIDmode \
96 #include "nvptx-gen.h"
98 /* There are no 'TARGET_PTX_3_1' and smaller conditionals: our baseline is
99 PTX ISA Version 3.1. */
100 #define TARGET_PTX_4_1 (ptx_version_option >= PTX_VERSION_4_1)
101 #define TARGET_PTX_4_2 (ptx_version_option >= PTX_VERSION_4_2)
102 #define TARGET_PTX_6_0 (ptx_version_option >= PTX_VERSION_6_0)
103 #define TARGET_PTX_6_3 (ptx_version_option >= PTX_VERSION_6_3)
104 #define TARGET_PTX_7_0 (ptx_version_option >= PTX_VERSION_7_0)
105 #define TARGET_PTX_7_3 (ptx_version_option >= PTX_VERSION_7_3)
106 #define TARGET_PTX_7_8 (ptx_version_option >= PTX_VERSION_7_8)
108 /* Registers. Since ptx is a virtual target, we just define a few
109 hard registers for special purposes and leave pseudos unallocated.
110 We have to have some available hard registers, to keep gcc setup
112 #define FIRST_PSEUDO_REGISTER 16
113 #define FIXED_REGISTERS { 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
114 #define CALL_USED_REGISTERS { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
116 /* Register Classes. */
117 enum reg_class
{ NO_REGS
, ALL_REGS
, LIM_REG_CLASSES
};
118 #define REG_CLASS_NAMES { "NO_REGS", "ALL_REGS" }
119 #define REG_CLASS_CONTENTS { { 0x0000 }, { 0xFFFF } }
120 #define N_REG_CLASSES (int) LIM_REG_CLASSES
122 #define GENERAL_REGS ALL_REGS
123 #define REGNO_REG_CLASS(R) ((void)(R), ALL_REGS)
124 #define BASE_REG_CLASS ALL_REGS
125 #define INDEX_REG_CLASS NO_REGS
127 #define REGNO_OK_FOR_BASE_P(X) true
128 #define REGNO_OK_FOR_INDEX_P(X) false
130 #define CLASS_MAX_NREGS(class, mode) \
131 ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
133 #define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \
134 if ((MODE) == QImode || (MODE) == HImode) \
141 /* Stack and Calling. */
143 #define FRAME_GROWS_DOWNWARD 0
144 #define STACK_GROWS_DOWNWARD 1
146 #define NVPTX_RETURN_REGNUM 0
147 #define STACK_POINTER_REGNUM 1
148 #define FRAME_POINTER_REGNUM 2
149 #define ARG_POINTER_REGNUM 3
150 #define STATIC_CHAIN_REGNUM 4
151 /* This register points to the shared memory location with the current warp's
152 soft stack pointer (__nvptx_stacks[tid.y]). */
153 #define SOFTSTACK_SLOT_REGNUM 5
154 /* This register is used to save the previous value of the soft stack pointer
155 in the prologue and restore it when returning. */
156 #define SOFTSTACK_PREV_REGNUM 6
158 #define REGISTER_NAMES \
160 "%value", "%stack", "%frame", "%args", \
161 "%chain", "%sspslot", "%sspprev", "%hr7", \
162 "%hr8", "%hr9", "%hr10", "%hr11", "%hr12", "%hr13", "%hr14", "%hr15" \
165 #define FIRST_PARM_OFFSET(FNDECL) ((void)(FNDECL), 0)
166 #define PUSH_ARGS_REVERSED 1
167 #define ACCUMULATE_OUTGOING_ARGS 1
169 /* Avoid using the argument pointer for frame-related things. */
170 #define FRAME_POINTER_CFA_OFFSET(FNDECL) ((void)(FNDECL), 0)
175 /* Number of arguments passed in registers so far. */
180 #define CUMULATIVE_ARGS struct nvptx_args
182 #define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
183 ((CUM).fntype = (FNTYPE), (CUM).count = 0, (void)0)
185 #define FUNCTION_ARG_REGNO_P(r) 0
187 #define DEFAULT_PCC_STRUCT_RETURN 0
189 #define FUNCTION_PROFILER(file, labelno) \
190 fatal_error (input_location, \
191 "profiling is not yet implemented for this architecture")
193 #define TRAMPOLINE_SIZE 32
194 #define TRAMPOLINE_ALIGNMENT 256
196 /* We don't run reload, so this isn't actually used, but it still needs to be
197 defined. Showing an argp->fp elimination also stops
198 expand_builtin_setjmp_receiver from generating invalid insns. */
199 #define ELIMINABLE_REGS \
201 { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM} \
204 /* Define the offset between two registers, one to be eliminated, and the other
205 its replacement, at the start of a routine. */
207 #define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
210 /* Addressing Modes. */
212 #define MAX_REGS_PER_ADDRESS 1
214 #define LEGITIMATE_PIC_OPERAND_P(X) 1
217 #if defined HOST_WIDE_INT
218 struct GTY(()) machine_function
220 rtx_expr_list
*call_args
; /* Arg list for the current call. */
221 bool doing_call
; /* Within a CALL_ARGS ... CALL_ARGS_END sequence. */
222 bool is_variadic
; /* This call is variadic */
223 bool has_variadic
; /* Current function has a variadic call. */
224 bool has_chain
; /* Current function has outgoing static chain. */
225 bool has_softstack
; /* Current function has a soft stack frame. */
226 bool has_simtreg
; /* Current function has an OpenMP SIMD region. */
227 int num_args
; /* Number of args of current call. */
228 int return_mode
; /* Return mode of current fn.
229 (machine_mode not defined yet.) */
230 rtx axis_predicate
[2]; /* Neutering predicates. */
231 int axis_dim
[2]; /* Maximum number of threads on each axis, dim[0] is
232 vector_length, dim[1] is num_workers. */
233 bool axis_dim_init_p
;
234 rtx bcast_partition
; /* Register containing the size of each
235 vector's partition of share-memory used to
237 rtx red_partition
; /* Similar to bcast_partition, except for vector
239 rtx sync_bar
; /* Synchronization barrier ID for vectors. */
240 rtx unisimt_master
; /* 'Master lane index' for -muniform-simt. */
241 rtx unisimt_predicate
; /* Predicate for -muniform-simt. */
242 rtx unisimt_outside_simt_predicate
; /* Predicate for -muniform-simt. */
243 rtx unisimt_location
; /* Mask location for -muniform-simt. */
244 /* The following two fields hold the maximum size resp. alignment required
245 for per-lane storage in OpenMP SIMD regions. */
246 unsigned HOST_WIDE_INT simt_stack_size
;
247 unsigned HOST_WIDE_INT simt_stack_align
;
253 #define NO_FUNCTION_CSE 1
254 #define SLOW_BYTE_ACCESS 0
255 #define BRANCH_COST(speed_p, predictable_p) 6
257 /* Assembler Format. */
259 #undef ASM_DECLARE_FUNCTION_NAME
260 #define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
261 nvptx_declare_function_name (FILE, NAME, DECL)
263 #undef ASM_DECLARE_FUNCTION_SIZE
264 #define ASM_DECLARE_FUNCTION_SIZE(STREAM, NAME, DECL) \
265 nvptx_function_end (STREAM)
267 #define DWARF2_ASM_LINE_DEBUG_INFO 1
270 #define ASM_APP_ON "\t// #APP \n"
272 #define ASM_APP_OFF "\t// #NO_APP \n"
274 #define DEBUGGER_REGNO(N) N
276 #define TEXT_SECTION_ASM_OP ""
277 #define DATA_SECTION_ASM_OP ""
279 #undef ASM_GENERATE_INTERNAL_LABEL
280 #define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \
284 __p = stpcpy (&(LABEL)[1], PREFIX); \
286 sprint_ul (__p, (unsigned long) (NUM)); \
290 #define ASM_OUTPUT_ALIGN(FILE, POWER) \
298 #define ASM_OUTPUT_SKIP(FILE, N) \
299 nvptx_output_skip (FILE, N)
301 #undef ASM_OUTPUT_ASCII
302 #define ASM_OUTPUT_ASCII(FILE, STR, LENGTH) \
303 nvptx_output_ascii (FILE, STR, LENGTH);
305 #define ASM_DECLARE_OBJECT_NAME(FILE, NAME, DECL) \
306 nvptx_declare_object_name (FILE, NAME, DECL)
308 #undef ASM_OUTPUT_ALIGNED_DECL_COMMON
309 #define ASM_OUTPUT_ALIGNED_DECL_COMMON(FILE, DECL, NAME, SIZE, ALIGN) \
310 nvptx_output_aligned_decl (FILE, NAME, DECL, SIZE, ALIGN)
312 #undef ASM_OUTPUT_ALIGNED_DECL_LOCAL
313 #define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \
314 nvptx_output_aligned_decl (FILE, NAME, DECL, SIZE, ALIGN)
316 #define CASE_VECTOR_PC_RELATIVE flag_pic
317 #define JUMP_TABLES_IN_TEXT_SECTION flag_pic
319 #define ADDR_VEC_ALIGN(VEC) (JUMP_TABLES_IN_TEXT_SECTION ? 5 : 2)
323 #define DWARF2_LINENO_DEBUGGING_INFO 1
325 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
326 ((VALUE) = GET_MODE_BITSIZE ((MODE)), 2)
327 #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
328 ((VALUE) = GET_MODE_BITSIZE ((MODE)), 2)
330 #define SUPPORTS_WEAK 1
332 #define MAKE_DECL_ONE_ONLY(DECL) \
333 (DECL_WEAK (DECL) = 1)
335 /* The documentation states that ASM_OUTPUT_DEF_FROM_DECLS is used in
336 preference to ASM_OUTPUT_DEF if the tree nodes are available. However, we
337 need the tree nodes to emit the prototype, so at this point it's not clear
338 how we can support ASM_OUTPUT_DEF. Still, we need to define it, or
339 ASM_OUTPUT_DEF_FROM_DECLS is ignored. For now, assert, and once we run
340 into it possibly improve by somehow emitting the prototype elsewhere, or
341 emitting a reasonable error message. */
342 #define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2) \
348 gcc_unreachable (); \
351 #define ASM_OUTPUT_DEF_FROM_DECLS(STREAM, NAME, VALUE) \
352 nvptx_asm_output_def_from_decls (STREAM, NAME, VALUE)
354 /* ..., but also override other macros to avoid 'gcc/defaults.h'-initialization
355 due to that dummy 'ASM_OUTPUT_DEF'. */
356 #define TARGET_USE_LOCAL_THUNK_ALIAS_P(DECL) TARGET_SUPPORTS_ALIASES
357 #define TARGET_SUPPORTS_ALIASES (nvptx_alias != 0)
359 #define NO_DOT_IN_LABEL
360 #define ASM_COMMENT_START "//"
362 #define STORE_FLAG_VALUE 1
363 #define FLOAT_STORE_FLAG_VALUE(MODE) REAL_VALUE_ATOF("1.0", (MODE))
365 #define CASE_VECTOR_MODE SImode
367 #define MOVE_RATIO(SPEED) 4
368 #define FUNCTION_MODE QImode
370 /* Implement global constructor, destructor support in a conceptually simpler
371 way than using 'collect2' (the program): implement the respective
372 functionality in the nvptx-tools 'ld'. This however still requires the
373 compiler-side effects corresponding to 'USE_COLLECT2': the global
374 constructor, destructor support functions need to have external linkage, and
375 therefore names that are "unique across the whole link". Use
376 '!targetm.have_ctors_dtors' to achieve this (..., and thus don't need to
377 provide 'targetm.asm_out.constructor', 'targetm.asm_out.destructor'). */
378 #define TARGET_HAVE_CTORS_DTORS false
380 /* See 'libgcc/config/nvptx/crt0.c' for wrapping of 'main'. */
381 #define HAS_INIT_SECTION
383 /* The C++ front end insists to link against libstdc++ -- which we don't build.
384 Tell it to instead link against the innocuous libgcc. */
385 #define LIBSTDCXX "gcc"
387 #endif /* GCC_NVPTX_H */