[PR testsuite/116860] Testsuite adjustment for recently added tests
[official-gcc.git] / gcc / config / aarch64 / aarch64-builtins.cc
blob6d5479c2e4492078312b05561d682ead9e9c2d13
1 /* Builtins' description for AArch64 SIMD architecture.
2 Copyright (C) 2011-2025 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "function.h"
28 #include "basic-block.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "ssa.h"
33 #include "memmodel.h"
34 #include "tm_p.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "explow.h"
42 #include "expr.h"
43 #include "langhooks.h"
44 #include "gimple-iterator.h"
45 #include "case-cfn-macros.h"
46 #include "emit-rtl.h"
47 #include "stringpool.h"
48 #include "attribs.h"
49 #include "gimple-fold.h"
50 #include "builtins.h"
51 #include "aarch64-builtins.h"
53 using namespace aarch64;
55 #define v8qi_UP E_V8QImode
56 #define v8di_UP E_V8DImode
57 #define v4hi_UP E_V4HImode
58 #define v4hf_UP E_V4HFmode
59 #define v2si_UP E_V2SImode
60 #define v2sf_UP E_V2SFmode
61 #define v1df_UP E_V1DFmode
62 #define v1di_UP E_V1DImode
63 #define di_UP E_DImode
64 #define df_UP E_DFmode
65 #define v16qi_UP E_V16QImode
66 #define v8hi_UP E_V8HImode
67 #define v8hf_UP E_V8HFmode
68 #define v4si_UP E_V4SImode
69 #define v4sf_UP E_V4SFmode
70 #define v2di_UP E_V2DImode
71 #define v2df_UP E_V2DFmode
72 #define ti_UP E_TImode
73 #define oi_UP E_OImode
74 #define ci_UP E_CImode
75 #define xi_UP E_XImode
76 #define si_UP E_SImode
77 #define sf_UP E_SFmode
78 #define hi_UP E_HImode
79 #define hf_UP E_HFmode
80 #define qi_UP E_QImode
81 #define bf_UP E_BFmode
82 #define v4bf_UP E_V4BFmode
83 #define v8bf_UP E_V8BFmode
84 #define v2x8qi_UP E_V2x8QImode
85 #define v2x4hi_UP E_V2x4HImode
86 #define v2x4hf_UP E_V2x4HFmode
87 #define v2x4bf_UP E_V2x4BFmode
88 #define v2x2si_UP E_V2x2SImode
89 #define v2x2sf_UP E_V2x2SFmode
90 #define v2x1di_UP E_V2x1DImode
91 #define v2x1df_UP E_V2x1DFmode
92 #define v2x16qi_UP E_V2x16QImode
93 #define v2x8hi_UP E_V2x8HImode
94 #define v2x8hf_UP E_V2x8HFmode
95 #define v2x8bf_UP E_V2x8BFmode
96 #define v2x4si_UP E_V2x4SImode
97 #define v2x4sf_UP E_V2x4SFmode
98 #define v2x2di_UP E_V2x2DImode
99 #define v2x2df_UP E_V2x2DFmode
100 #define v3x8qi_UP E_V3x8QImode
101 #define v3x4hi_UP E_V3x4HImode
102 #define v3x4hf_UP E_V3x4HFmode
103 #define v3x4bf_UP E_V3x4BFmode
104 #define v3x2si_UP E_V3x2SImode
105 #define v3x2sf_UP E_V3x2SFmode
106 #define v3x1di_UP E_V3x1DImode
107 #define v3x1df_UP E_V3x1DFmode
108 #define v3x16qi_UP E_V3x16QImode
109 #define v3x8hi_UP E_V3x8HImode
110 #define v3x8hf_UP E_V3x8HFmode
111 #define v3x8bf_UP E_V3x8BFmode
112 #define v3x4si_UP E_V3x4SImode
113 #define v3x4sf_UP E_V3x4SFmode
114 #define v3x2di_UP E_V3x2DImode
115 #define v3x2df_UP E_V3x2DFmode
116 #define v4x8qi_UP E_V4x8QImode
117 #define v4x4hi_UP E_V4x4HImode
118 #define v4x4hf_UP E_V4x4HFmode
119 #define v4x4bf_UP E_V4x4BFmode
120 #define v4x2si_UP E_V4x2SImode
121 #define v4x2sf_UP E_V4x2SFmode
122 #define v4x1di_UP E_V4x1DImode
123 #define v4x1df_UP E_V4x1DFmode
124 #define v4x16qi_UP E_V4x16QImode
125 #define v4x8hi_UP E_V4x8HImode
126 #define v4x8hf_UP E_V4x8HFmode
127 #define v4x8bf_UP E_V4x8BFmode
128 #define v4x4si_UP E_V4x4SImode
129 #define v4x4sf_UP E_V4x4SFmode
130 #define v4x2di_UP E_V4x2DImode
131 #define v4x2df_UP E_V4x2DFmode
132 #define UP(X) X##_UP
134 #define MODE_d_bf16 E_V4BFmode
135 #define MODE_d_f16 E_V4HFmode
136 #define MODE_d_f32 E_V2SFmode
137 #define MODE_d_f64 E_V1DFmode
138 #define MODE_d_mf8 E_V8QImode
139 #define MODE_d_s8 E_V8QImode
140 #define MODE_d_s16 E_V4HImode
141 #define MODE_d_s32 E_V2SImode
142 #define MODE_d_s64 E_V1DImode
143 #define MODE_d_u8 E_V8QImode
144 #define MODE_d_u16 E_V4HImode
145 #define MODE_d_u32 E_V2SImode
146 #define MODE_d_u64 E_V1DImode
147 #define MODE_d_p8 E_V8QImode
148 #define MODE_d_p16 E_V4HImode
149 #define MODE_d_p64 E_V1DImode
150 #define MODE_q_bf16 E_V8BFmode
151 #define MODE_q_f16 E_V8HFmode
152 #define MODE_q_f32 E_V4SFmode
153 #define MODE_q_f64 E_V2DFmode
154 #define MODE_q_mf8 E_V16QImode
155 #define MODE_q_s8 E_V16QImode
156 #define MODE_q_s16 E_V8HImode
157 #define MODE_q_s32 E_V4SImode
158 #define MODE_q_s64 E_V2DImode
159 #define MODE_q_u8 E_V16QImode
160 #define MODE_q_u16 E_V8HImode
161 #define MODE_q_u32 E_V4SImode
162 #define MODE_q_u64 E_V2DImode
163 #define MODE_q_p8 E_V16QImode
164 #define MODE_q_p16 E_V8HImode
165 #define MODE_q_p64 E_V2DImode
166 #define MODE_q_p128 E_TImode
168 #define QUAL_bf16 qualifier_none
169 #define QUAL_f16 qualifier_none
170 #define QUAL_f32 qualifier_none
171 #define QUAL_f64 qualifier_none
172 #define QUAL_s8 qualifier_none
173 #define QUAL_s16 qualifier_none
174 #define QUAL_s32 qualifier_none
175 #define QUAL_s64 qualifier_none
176 #define QUAL_u8 qualifier_unsigned
177 #define QUAL_u16 qualifier_unsigned
178 #define QUAL_u32 qualifier_unsigned
179 #define QUAL_u64 qualifier_unsigned
180 #define QUAL_p8 qualifier_poly
181 #define QUAL_p16 qualifier_poly
182 #define QUAL_p64 qualifier_poly
183 #define QUAL_p128 qualifier_poly
184 #define QUAL_mf8 qualifier_modal_float
186 #define LENGTH_d ""
187 #define LENGTH_q "q"
189 #define SIMD_INTR_MODE(suffix, length) MODE_##length##_##suffix
190 #define SIMD_INTR_QUAL(suffix) QUAL_##suffix
191 #define SIMD_INTR_LENGTH_CHAR(length) LENGTH_##length
193 #define SIMD_MAX_BUILTIN_ARGS 5
195 /* Flags that describe what a function might do. */
196 const unsigned int FLAG_READ_FPCR = 1U << 0;
197 const unsigned int FLAG_RAISE_FP_EXCEPTIONS = 1U << 1;
198 const unsigned int FLAG_READ_MEMORY = 1U << 2;
199 const unsigned int FLAG_PREFETCH_MEMORY = 1U << 3;
200 const unsigned int FLAG_WRITE_MEMORY = 1U << 4;
201 const unsigned int FLAG_USES_FPMR = 1U << 5;
203 /* Indicates that READ_FPCR and RAISE_FP_EXCEPTIONS should be set for
204 floating-point modes but not for integer modes. */
205 const unsigned int FLAG_AUTO_FP = 1U << 6;
207 const unsigned int FLAG_QUIET = 0;
208 const unsigned int FLAG_DEFAULT = FLAG_AUTO_FP;
209 const unsigned int FLAG_FP = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS;
210 const unsigned int FLAG_ALL = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS
211 | FLAG_READ_MEMORY | FLAG_PREFETCH_MEMORY | FLAG_WRITE_MEMORY;
212 const unsigned int FLAG_STORE = FLAG_WRITE_MEMORY;
213 const unsigned int FLAG_LOAD = FLAG_READ_MEMORY;
214 const unsigned int FLAG_FP8 = FLAG_FP | FLAG_USES_FPMR;
216 typedef struct
218 const char *name;
219 machine_mode mode;
220 const enum insn_code code;
221 unsigned int fcode;
222 enum aarch64_type_qualifiers *qualifiers;
223 unsigned int flags;
224 } aarch64_simd_builtin_datum;
226 static enum aarch64_type_qualifiers
227 aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
228 = { qualifier_none, qualifier_none };
229 #define TYPES_UNOP (aarch64_types_unop_qualifiers)
230 static enum aarch64_type_qualifiers
231 aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
232 = { qualifier_unsigned, qualifier_unsigned };
233 #define TYPES_UNOPU (aarch64_types_unopu_qualifiers)
234 static enum aarch64_type_qualifiers
235 aarch64_types_unopus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
236 = { qualifier_unsigned, qualifier_none };
237 #define TYPES_UNOPUS (aarch64_types_unopus_qualifiers)
238 static enum aarch64_type_qualifiers
239 aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
240 = { qualifier_none, qualifier_none, qualifier_maybe_immediate };
241 #define TYPES_BINOP (aarch64_types_binop_qualifiers)
242 static enum aarch64_type_qualifiers
243 aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
244 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
245 #define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
246 static enum aarch64_type_qualifiers
247 aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
248 = { qualifier_unsigned, qualifier_unsigned, qualifier_none };
249 #define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers)
250 static enum aarch64_type_qualifiers
251 aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
252 = { qualifier_none, qualifier_none, qualifier_unsigned };
253 #define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
254 static enum aarch64_type_qualifiers
255 aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
256 = { qualifier_unsigned, qualifier_none, qualifier_none };
257 #define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers)
258 static enum aarch64_type_qualifiers
259 aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
260 = { qualifier_poly, qualifier_poly, qualifier_poly };
261 #define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
262 static enum aarch64_type_qualifiers
263 aarch64_types_binop_ppu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
264 = { qualifier_poly, qualifier_poly, qualifier_unsigned };
265 #define TYPES_BINOP_PPU (aarch64_types_binop_ppu_qualifiers)
267 static enum aarch64_type_qualifiers
268 aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
269 = { qualifier_none, qualifier_none, qualifier_none, qualifier_none };
270 #define TYPES_TERNOP (aarch64_types_ternop_qualifiers)
271 static enum aarch64_type_qualifiers
272 aarch64_types_ternop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
273 = { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index };
274 #define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers)
275 static enum aarch64_type_qualifiers
276 aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
277 = { qualifier_unsigned, qualifier_unsigned,
278 qualifier_unsigned, qualifier_unsigned };
279 #define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers)
280 static enum aarch64_type_qualifiers
281 aarch64_types_ternopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
282 = { qualifier_unsigned, qualifier_unsigned,
283 qualifier_unsigned, qualifier_lane_index };
284 #define TYPES_TERNOPU_LANE (aarch64_types_ternopu_lane_qualifiers)
285 static enum aarch64_type_qualifiers
286 aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
287 = { qualifier_unsigned, qualifier_unsigned,
288 qualifier_unsigned, qualifier_immediate };
289 #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
290 static enum aarch64_type_qualifiers
291 aarch64_types_ternop_sssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
292 = { qualifier_none, qualifier_none, qualifier_none, qualifier_unsigned };
293 #define TYPES_TERNOP_SSSU (aarch64_types_ternop_sssu_qualifiers)
294 static enum aarch64_type_qualifiers
295 aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
296 = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none };
297 #define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers)
298 static enum aarch64_type_qualifiers
299 aarch64_types_ternop_suss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
300 = { qualifier_none, qualifier_unsigned, qualifier_none, qualifier_none };
301 #define TYPES_TERNOP_SUSS (aarch64_types_ternop_suss_qualifiers)
302 static enum aarch64_type_qualifiers
303 aarch64_types_binop_pppu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
304 = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_unsigned };
305 #define TYPES_TERNOP_PPPU (aarch64_types_binop_pppu_qualifiers)
307 static enum aarch64_type_qualifiers
308 aarch64_types_quadop_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS]
309 = { qualifier_none, qualifier_none, qualifier_none,
310 qualifier_none, qualifier_lane_pair_index };
311 #define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers)
312 static enum aarch64_type_qualifiers
313 aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
314 = { qualifier_none, qualifier_none, qualifier_none,
315 qualifier_none, qualifier_lane_index };
316 #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers)
317 static enum aarch64_type_qualifiers
318 aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
319 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
320 qualifier_unsigned, qualifier_lane_index };
321 #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers)
323 static enum aarch64_type_qualifiers
324 aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
325 = { qualifier_none, qualifier_none, qualifier_unsigned,
326 qualifier_none, qualifier_lane_quadtup_index };
327 #define TYPES_QUADOPSSUS_LANE_QUADTUP \
328 (aarch64_types_quadopssus_lane_quadtup_qualifiers)
329 static enum aarch64_type_qualifiers
330 aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
331 = { qualifier_none, qualifier_none, qualifier_none,
332 qualifier_unsigned, qualifier_lane_quadtup_index };
333 #define TYPES_QUADOPSSSU_LANE_QUADTUP \
334 (aarch64_types_quadopsssu_lane_quadtup_qualifiers)
336 static enum aarch64_type_qualifiers
337 aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
338 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
339 qualifier_unsigned, qualifier_immediate };
340 #define TYPES_QUADOPUI (aarch64_types_quadopu_imm_qualifiers)
342 static enum aarch64_type_qualifiers
343 aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
344 = { qualifier_none, qualifier_none, qualifier_immediate };
345 #define TYPES_GETREG (aarch64_types_binop_imm_qualifiers)
346 #define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers)
347 static enum aarch64_type_qualifiers
348 aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
349 = { qualifier_unsigned, qualifier_none, qualifier_immediate };
350 #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
351 static enum aarch64_type_qualifiers
352 aarch64_types_fcvt_from_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
353 = { qualifier_none, qualifier_unsigned, qualifier_immediate };
354 #define TYPES_FCVTIMM_SUS (aarch64_types_fcvt_from_unsigned_qualifiers)
355 static enum aarch64_type_qualifiers
356 aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
357 = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
358 #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
359 #define TYPES_USHIFT2IMM (aarch64_types_ternopu_imm_qualifiers)
360 static enum aarch64_type_qualifiers
361 aarch64_types_shift2_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
362 = { qualifier_unsigned, qualifier_unsigned, qualifier_none, qualifier_immediate };
363 #define TYPES_SHIFT2IMM_UUSS (aarch64_types_shift2_to_unsigned_qualifiers)
365 static enum aarch64_type_qualifiers
366 aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
367 = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate};
368 #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers)
369 #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers)
370 #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers)
371 #define TYPES_SHIFT2IMM (aarch64_types_ternop_s_imm_qualifiers)
373 static enum aarch64_type_qualifiers
374 aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
375 = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_immediate};
376 #define TYPES_SHIFTINSERTP (aarch64_types_ternop_p_imm_qualifiers)
378 static enum aarch64_type_qualifiers
379 aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS]
380 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
381 qualifier_immediate };
382 #define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers)
384 static enum aarch64_type_qualifiers
385 aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
386 = { qualifier_none, qualifier_const_pointer_map_mode };
387 #define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
388 #define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers)
389 static enum aarch64_type_qualifiers
390 aarch64_types_load1_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
391 = { qualifier_unsigned, qualifier_const_pointer_map_mode };
392 #define TYPES_LOAD1_U (aarch64_types_load1_u_qualifiers)
393 #define TYPES_LOADSTRUCT_U (aarch64_types_load1_u_qualifiers)
394 static enum aarch64_type_qualifiers
395 aarch64_types_load1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
396 = { qualifier_poly, qualifier_const_pointer_map_mode };
397 #define TYPES_LOAD1_P (aarch64_types_load1_p_qualifiers)
398 #define TYPES_LOADSTRUCT_P (aarch64_types_load1_p_qualifiers)
400 static enum aarch64_type_qualifiers
401 aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
402 = { qualifier_none, qualifier_const_pointer_map_mode,
403 qualifier_none, qualifier_struct_load_store_lane_index };
404 #define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers)
405 static enum aarch64_type_qualifiers
406 aarch64_types_loadstruct_lane_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
407 = { qualifier_unsigned, qualifier_const_pointer_map_mode,
408 qualifier_unsigned, qualifier_struct_load_store_lane_index };
409 #define TYPES_LOADSTRUCT_LANE_U (aarch64_types_loadstruct_lane_u_qualifiers)
410 static enum aarch64_type_qualifiers
411 aarch64_types_loadstruct_lane_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
412 = { qualifier_poly, qualifier_const_pointer_map_mode,
413 qualifier_poly, qualifier_struct_load_store_lane_index };
414 #define TYPES_LOADSTRUCT_LANE_P (aarch64_types_loadstruct_lane_p_qualifiers)
416 static enum aarch64_type_qualifiers
417 aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
418 = { qualifier_poly, qualifier_unsigned,
419 qualifier_poly, qualifier_poly };
420 #define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers)
421 static enum aarch64_type_qualifiers
422 aarch64_types_bsl_s_qualifiers[SIMD_MAX_BUILTIN_ARGS]
423 = { qualifier_none, qualifier_unsigned,
424 qualifier_none, qualifier_none };
425 #define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers)
426 static enum aarch64_type_qualifiers
427 aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
428 = { qualifier_unsigned, qualifier_unsigned,
429 qualifier_unsigned, qualifier_unsigned };
430 #define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers)
432 /* The first argument (return type) of a store should be void type,
433 which we represent with qualifier_void. Their first operand will be
434 a DImode pointer to the location to store to, so we must use
435 qualifier_map_mode | qualifier_pointer to build a pointer to the
436 element type of the vector. */
437 static enum aarch64_type_qualifiers
438 aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
439 = { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
440 #define TYPES_STORE1 (aarch64_types_store1_qualifiers)
441 #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
442 static enum aarch64_type_qualifiers
443 aarch64_types_store1_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
444 = { qualifier_void, qualifier_pointer_map_mode, qualifier_unsigned };
445 #define TYPES_STORE1_U (aarch64_types_store1_u_qualifiers)
446 #define TYPES_STORESTRUCT_U (aarch64_types_store1_u_qualifiers)
447 static enum aarch64_type_qualifiers
448 aarch64_types_store1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
449 = { qualifier_void, qualifier_pointer_map_mode, qualifier_poly };
450 #define TYPES_STORE1_P (aarch64_types_store1_p_qualifiers)
451 #define TYPES_STORESTRUCT_P (aarch64_types_store1_p_qualifiers)
453 static enum aarch64_type_qualifiers
454 aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
455 = { qualifier_void, qualifier_pointer_map_mode,
456 qualifier_none, qualifier_struct_load_store_lane_index };
457 #define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
458 static enum aarch64_type_qualifiers
459 aarch64_types_storestruct_lane_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
460 = { qualifier_void, qualifier_pointer_map_mode,
461 qualifier_unsigned, qualifier_struct_load_store_lane_index };
462 #define TYPES_STORESTRUCT_LANE_U (aarch64_types_storestruct_lane_u_qualifiers)
463 static enum aarch64_type_qualifiers
464 aarch64_types_storestruct_lane_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
465 = { qualifier_void, qualifier_pointer_map_mode,
466 qualifier_poly, qualifier_struct_load_store_lane_index };
467 #define TYPES_STORESTRUCT_LANE_P (aarch64_types_storestruct_lane_p_qualifiers)
469 constexpr insn_code CODE_FOR_aarch64_sdot_prodv8qi
470 = CODE_FOR_sdot_prodv2siv8qi;
471 constexpr insn_code CODE_FOR_aarch64_udot_prodv8qi
472 = CODE_FOR_udot_prodv2siv8qi;
473 constexpr insn_code CODE_FOR_aarch64_usdot_prodv8qi
474 = CODE_FOR_usdot_prodv2siv8qi;
475 constexpr insn_code CODE_FOR_aarch64_sdot_prodv16qi
476 = CODE_FOR_sdot_prodv4siv16qi;
477 constexpr insn_code CODE_FOR_aarch64_udot_prodv16qi
478 = CODE_FOR_udot_prodv4siv16qi;
479 constexpr insn_code CODE_FOR_aarch64_usdot_prodv16qi
480 = CODE_FOR_usdot_prodv4siv16qi;
482 #define CF0(N, X) CODE_FOR_aarch64_##N##X
483 #define CF1(N, X) CODE_FOR_##N##X##1
484 #define CF2(N, X) CODE_FOR_##N##X##2
485 #define CF3(N, X) CODE_FOR_##N##X##3
486 #define CF4(N, X) CODE_FOR_##N##X##4
487 #define CF10(N, X) CODE_FOR_##N##X
489 /* Define cascading VAR<N> macros that are used from
490 aarch64-builtin-iterators.h to iterate over modes. These definitions
491 will end up generating a number of VAR1 expansions and code later on in the
492 file should redefine VAR1 to whatever it needs to process on a per-mode
493 basis. */
494 #define VAR2(T, N, MAP, FLAG, A, B) \
495 VAR1 (T, N, MAP, FLAG, A) \
496 VAR1 (T, N, MAP, FLAG, B)
497 #define VAR3(T, N, MAP, FLAG, A, B, C) \
498 VAR2 (T, N, MAP, FLAG, A, B) \
499 VAR1 (T, N, MAP, FLAG, C)
500 #define VAR4(T, N, MAP, FLAG, A, B, C, D) \
501 VAR3 (T, N, MAP, FLAG, A, B, C) \
502 VAR1 (T, N, MAP, FLAG, D)
503 #define VAR5(T, N, MAP, FLAG, A, B, C, D, E) \
504 VAR4 (T, N, MAP, FLAG, A, B, C, D) \
505 VAR1 (T, N, MAP, FLAG, E)
506 #define VAR6(T, N, MAP, FLAG, A, B, C, D, E, F) \
507 VAR5 (T, N, MAP, FLAG, A, B, C, D, E) \
508 VAR1 (T, N, MAP, FLAG, F)
509 #define VAR7(T, N, MAP, FLAG, A, B, C, D, E, F, G) \
510 VAR6 (T, N, MAP, FLAG, A, B, C, D, E, F) \
511 VAR1 (T, N, MAP, FLAG, G)
512 #define VAR8(T, N, MAP, FLAG, A, B, C, D, E, F, G, H) \
513 VAR7 (T, N, MAP, FLAG, A, B, C, D, E, F, G) \
514 VAR1 (T, N, MAP, FLAG, H)
515 #define VAR9(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I) \
516 VAR8 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H) \
517 VAR1 (T, N, MAP, FLAG, I)
518 #define VAR10(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J) \
519 VAR9 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I) \
520 VAR1 (T, N, MAP, FLAG, J)
521 #define VAR11(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K) \
522 VAR10 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J) \
523 VAR1 (T, N, MAP, FLAG, K)
524 #define VAR12(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L) \
525 VAR11 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K) \
526 VAR1 (T, N, MAP, FLAG, L)
527 #define VAR13(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M) \
528 VAR12 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L) \
529 VAR1 (T, N, MAP, FLAG, M)
530 #define VAR14(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
531 VAR13 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M) \
532 VAR1 (T, X, MAP, FLAG, N)
533 #define VAR15(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \
534 VAR14 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
535 VAR1 (T, X, MAP, FLAG, O)
536 #define VAR16(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
537 VAR15 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \
538 VAR1 (T, X, MAP, FLAG, P)
540 #include "aarch64-builtin-iterators.h"
542 /* The builtins below should be expanded through the standard optabs
543 CODE_FOR_[u]avg<mode>3_[floor,ceil]. However the mapping scheme in
544 aarch64-simd-builtins.def does not easily allow us to have a pre-mode
545 ("uavg") and post-mode string ("_ceil") in the CODE_FOR_* construction.
546 So the builtins use a name that is natural for AArch64 instructions
547 e.g. "aarch64_srhadd<mode>" and we re-map these to the optab-related
548 CODE_FOR_ here. */
549 #undef VAR1
550 #define VAR1(F,T1,T2,I,M) \
551 constexpr insn_code CODE_FOR_aarch64_##F##M = CODE_FOR_##T1##M##3##T2;
553 BUILTIN_VDQ_BHSI (srhadd, avg, _ceil, 0)
554 BUILTIN_VDQ_BHSI (urhadd, uavg, _ceil, 0)
555 BUILTIN_VDQ_BHSI (shadd, avg, _floor, 0)
556 BUILTIN_VDQ_BHSI (uhadd, uavg, _floor, 0)
558 /* The builtins below should be expanded through the standard optabs
559 CODE_FOR_extend<mode><Vwide>2. */
560 #undef VAR1
561 #define VAR1(F,T,N,M) \
562 constexpr insn_code CODE_FOR_aarch64_##F##M = CODE_FOR_##T##N##M##2;
564 VAR1 (float_extend_lo_, extend, v2sf, v2df)
565 VAR1 (float_extend_lo_, extend, v4hf, v4sf)
567 /* __builtin_aarch64_float_truncate_lo_<mode> should be expanded through the
568 standard optabs CODE_FOR_trunc<Vwide><mode>2. */
569 constexpr insn_code CODE_FOR_aarch64_float_truncate_lo_v4hf
570 = CODE_FOR_truncv4sfv4hf2;
571 constexpr insn_code CODE_FOR_aarch64_float_truncate_lo_v2sf
572 = CODE_FOR_truncv2dfv2sf2;
574 #undef VAR1
575 #define VAR1(T, N, MAP, FLAG, A) \
576 {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T, FLAG_##FLAG},
578 static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
579 #include "aarch64-simd-builtins.def"
582 /* There's only 8 CRC32 builtins. Probably not worth their own .def file. */
583 #define AARCH64_CRC32_BUILTINS \
584 CRC32_BUILTIN (crc32b, QI) \
585 CRC32_BUILTIN (crc32h, HI) \
586 CRC32_BUILTIN (crc32w, SI) \
587 CRC32_BUILTIN (crc32x, DI) \
588 CRC32_BUILTIN (crc32cb, QI) \
589 CRC32_BUILTIN (crc32ch, HI) \
590 CRC32_BUILTIN (crc32cw, SI) \
591 CRC32_BUILTIN (crc32cx, DI)
593 /* The next 8 FCMLA instrinsics require some special handling compared the
594 normal simd intrinsics. */
595 #define AARCH64_SIMD_FCMLA_LANEQ_BUILTINS \
596 FCMLA_LANEQ_BUILTIN (0, v2sf, fcmla, V2SF, false) \
597 FCMLA_LANEQ_BUILTIN (90, v2sf, fcmla, V2SF, false) \
598 FCMLA_LANEQ_BUILTIN (180, v2sf, fcmla, V2SF, false) \
599 FCMLA_LANEQ_BUILTIN (270, v2sf, fcmla, V2SF, false) \
600 FCMLA_LANEQ_BUILTIN (0, v4hf, fcmla_laneq, V4HF, true) \
601 FCMLA_LANEQ_BUILTIN (90, v4hf, fcmla_laneq, V4HF, true) \
602 FCMLA_LANEQ_BUILTIN (180, v4hf, fcmla_laneq, V4HF, true) \
603 FCMLA_LANEQ_BUILTIN (270, v4hf, fcmla_laneq, V4HF, true) \
606 /* vreinterpret intrinsics are defined for any pair of element types.
607 { _bf16 } { _bf16 }
608 { _f16 _f32 _f64 } { _f16 _f32 _f64 }
609 { _mf8 } { _mf8 }
610 { _s8 _s16 _s32 _s64 } x { _s8 _s16 _s32 _s64 }
611 { _u8 _u16 _u32 _u64 } { _u8 _u16 _u32 _u64 }
612 { _p8 _p16 _p64 } { _p8 _p16 _p64 }. */
613 #define VREINTERPRET_BUILTIN2(A, B) \
614 VREINTERPRET_BUILTIN (A, B, d)
616 #define VREINTERPRET_BUILTINS1(A) \
617 VREINTERPRET_BUILTIN2 (A, bf16) \
618 VREINTERPRET_BUILTIN2 (A, f16) \
619 VREINTERPRET_BUILTIN2 (A, f32) \
620 VREINTERPRET_BUILTIN2 (A, f64) \
621 VREINTERPRET_BUILTIN2 (A, mf8) \
622 VREINTERPRET_BUILTIN2 (A, s8) \
623 VREINTERPRET_BUILTIN2 (A, s16) \
624 VREINTERPRET_BUILTIN2 (A, s32) \
625 VREINTERPRET_BUILTIN2 (A, s64) \
626 VREINTERPRET_BUILTIN2 (A, u8) \
627 VREINTERPRET_BUILTIN2 (A, u16) \
628 VREINTERPRET_BUILTIN2 (A, u32) \
629 VREINTERPRET_BUILTIN2 (A, u64) \
630 VREINTERPRET_BUILTIN2 (A, p8) \
631 VREINTERPRET_BUILTIN2 (A, p16) \
632 VREINTERPRET_BUILTIN2 (A, p64)
634 #define VREINTERPRET_BUILTINS \
635 VREINTERPRET_BUILTINS1 (bf16) \
636 VREINTERPRET_BUILTINS1 (f16) \
637 VREINTERPRET_BUILTINS1 (f32) \
638 VREINTERPRET_BUILTINS1 (f64) \
639 VREINTERPRET_BUILTINS1 (mf8) \
640 VREINTERPRET_BUILTINS1 (s8) \
641 VREINTERPRET_BUILTINS1 (s16) \
642 VREINTERPRET_BUILTINS1 (s32) \
643 VREINTERPRET_BUILTINS1 (s64) \
644 VREINTERPRET_BUILTINS1 (u8) \
645 VREINTERPRET_BUILTINS1 (u16) \
646 VREINTERPRET_BUILTINS1 (u32) \
647 VREINTERPRET_BUILTINS1 (u64) \
648 VREINTERPRET_BUILTINS1 (p8) \
649 VREINTERPRET_BUILTINS1 (p16) \
650 VREINTERPRET_BUILTINS1 (p64)
652 /* vreinterpretq intrinsics are additionally defined for p128.
653 { _bf16 } { _bf16 }
654 { _f16 _f32 _f64 } { _f16 _f32 _f64 }
655 { _mf8 } { _mf8 }
656 { _s8 _s16 _s32 _s64 } x { _s8 _s16 _s32 _s64 }
657 { _u8 _u16 _u32 _u64 } { _u8 _u16 _u32 _u64 }
658 { _p8 _p16 _p64 _p128 } { _p8 _p16 _p64 _p128 }. */
659 #define VREINTERPRETQ_BUILTIN2(A, B) \
660 VREINTERPRET_BUILTIN (A, B, q)
662 #define VREINTERPRETQ_BUILTINS1(A) \
663 VREINTERPRETQ_BUILTIN2 (A, bf16) \
664 VREINTERPRETQ_BUILTIN2 (A, f16) \
665 VREINTERPRETQ_BUILTIN2 (A, f32) \
666 VREINTERPRETQ_BUILTIN2 (A, f64) \
667 VREINTERPRETQ_BUILTIN2 (A, mf8) \
668 VREINTERPRETQ_BUILTIN2 (A, s8) \
669 VREINTERPRETQ_BUILTIN2 (A, s16) \
670 VREINTERPRETQ_BUILTIN2 (A, s32) \
671 VREINTERPRETQ_BUILTIN2 (A, s64) \
672 VREINTERPRETQ_BUILTIN2 (A, u8) \
673 VREINTERPRETQ_BUILTIN2 (A, u16) \
674 VREINTERPRETQ_BUILTIN2 (A, u32) \
675 VREINTERPRETQ_BUILTIN2 (A, u64) \
676 VREINTERPRETQ_BUILTIN2 (A, p8) \
677 VREINTERPRETQ_BUILTIN2 (A, p16) \
678 VREINTERPRETQ_BUILTIN2 (A, p64) \
679 VREINTERPRETQ_BUILTIN2 (A, p128)
681 #define VREINTERPRETQ_BUILTINS \
682 VREINTERPRETQ_BUILTINS1 (bf16) \
683 VREINTERPRETQ_BUILTINS1 (f16) \
684 VREINTERPRETQ_BUILTINS1 (f32) \
685 VREINTERPRETQ_BUILTINS1 (f64) \
686 VREINTERPRETQ_BUILTINS1 (mf8) \
687 VREINTERPRETQ_BUILTINS1 (s8) \
688 VREINTERPRETQ_BUILTINS1 (s16) \
689 VREINTERPRETQ_BUILTINS1 (s32) \
690 VREINTERPRETQ_BUILTINS1 (s64) \
691 VREINTERPRETQ_BUILTINS1 (u8) \
692 VREINTERPRETQ_BUILTINS1 (u16) \
693 VREINTERPRETQ_BUILTINS1 (u32) \
694 VREINTERPRETQ_BUILTINS1 (u64) \
695 VREINTERPRETQ_BUILTINS1 (p8) \
696 VREINTERPRETQ_BUILTINS1 (p16) \
697 VREINTERPRETQ_BUILTINS1 (p64) \
698 VREINTERPRETQ_BUILTINS1 (p128)
700 #define AARCH64_SIMD_VREINTERPRET_BUILTINS \
701 VREINTERPRET_BUILTINS \
702 VREINTERPRETQ_BUILTINS
704 #define AARCH64_SIMD_VGET_LOW_BUILTINS \
705 VGET_LOW_BUILTIN(mf8) \
706 VGET_LOW_BUILTIN(f16) \
707 VGET_LOW_BUILTIN(f32) \
708 VGET_LOW_BUILTIN(f64) \
709 VGET_LOW_BUILTIN(p8) \
710 VGET_LOW_BUILTIN(p16) \
711 VGET_LOW_BUILTIN(p64) \
712 VGET_LOW_BUILTIN(s8) \
713 VGET_LOW_BUILTIN(s16) \
714 VGET_LOW_BUILTIN(s32) \
715 VGET_LOW_BUILTIN(s64) \
716 VGET_LOW_BUILTIN(u8) \
717 VGET_LOW_BUILTIN(u16) \
718 VGET_LOW_BUILTIN(u32) \
719 VGET_LOW_BUILTIN(u64) \
720 VGET_LOW_BUILTIN(bf16)
722 #define AARCH64_SIMD_VGET_HIGH_BUILTINS \
723 VGET_HIGH_BUILTIN(mf8) \
724 VGET_HIGH_BUILTIN(f16) \
725 VGET_HIGH_BUILTIN(f32) \
726 VGET_HIGH_BUILTIN(f64) \
727 VGET_HIGH_BUILTIN(p8) \
728 VGET_HIGH_BUILTIN(p16) \
729 VGET_HIGH_BUILTIN(p64) \
730 VGET_HIGH_BUILTIN(s8) \
731 VGET_HIGH_BUILTIN(s16) \
732 VGET_HIGH_BUILTIN(s32) \
733 VGET_HIGH_BUILTIN(s64) \
734 VGET_HIGH_BUILTIN(u8) \
735 VGET_HIGH_BUILTIN(u16) \
736 VGET_HIGH_BUILTIN(u32) \
737 VGET_HIGH_BUILTIN(u64) \
738 VGET_HIGH_BUILTIN(bf16)
740 typedef struct
742 const char *name;
743 machine_mode mode;
744 const enum insn_code icode;
745 unsigned int fcode;
746 } aarch64_crc_builtin_datum;
748 /* Hold information about how to expand the FCMLA_LANEQ builtins. */
749 typedef struct
751 const char *name;
752 machine_mode mode;
753 const enum insn_code icode;
754 unsigned int fcode;
755 bool lane;
756 } aarch64_fcmla_laneq_builtin_datum;
758 /* Hold information about how to declare SIMD intrinsics. */
759 typedef struct
761 const char *name;
762 unsigned int fcode;
763 unsigned int op_count;
764 machine_mode op_modes[SIMD_MAX_BUILTIN_ARGS];
765 enum aarch64_type_qualifiers qualifiers[SIMD_MAX_BUILTIN_ARGS];
766 unsigned int flags;
767 bool skip;
768 } aarch64_simd_intrinsic_datum;
770 #define CRC32_BUILTIN(N, M) \
771 AARCH64_BUILTIN_##N,
773 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
774 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M,
776 #define VREINTERPRET_BUILTIN(A, B, L) \
777 AARCH64_SIMD_BUILTIN_VREINTERPRET##L##_##A##_##B,
779 #define VGET_LOW_BUILTIN(A) \
780 AARCH64_SIMD_BUILTIN_VGET_LOW_##A,
782 #define VGET_HIGH_BUILTIN(A) \
783 AARCH64_SIMD_BUILTIN_VGET_HIGH_##A,
785 #undef VAR1
786 #define VAR1(T, N, MAP, FLAG, A) \
787 AARCH64_SIMD_BUILTIN_##T##_##N##A,
789 #undef ENTRY
790 #define ENTRY(N, S, T0, T1, T2, T3, U, F) \
791 AARCH64_##N,
793 enum aarch64_builtins
795 AARCH64_BUILTIN_MIN,
797 AARCH64_BUILTIN_GET_FPCR,
798 AARCH64_BUILTIN_SET_FPCR,
799 AARCH64_BUILTIN_GET_FPSR,
800 AARCH64_BUILTIN_SET_FPSR,
802 AARCH64_BUILTIN_GET_FPCR64,
803 AARCH64_BUILTIN_SET_FPCR64,
804 AARCH64_BUILTIN_GET_FPSR64,
805 AARCH64_BUILTIN_SET_FPSR64,
807 AARCH64_BUILTIN_RSQRT_DF,
808 AARCH64_BUILTIN_RSQRT_SF,
809 AARCH64_BUILTIN_RSQRT_V2DF,
810 AARCH64_BUILTIN_RSQRT_V2SF,
811 AARCH64_BUILTIN_RSQRT_V4SF,
812 AARCH64_SIMD_BUILTIN_BASE,
813 AARCH64_SIMD_BUILTIN_LANE_CHECK,
814 #include "aarch64-simd-builtins.def"
815 /* The first enum element which is based on an insn_data pattern. */
816 AARCH64_SIMD_PATTERN_START = AARCH64_SIMD_BUILTIN_LANE_CHECK + 1,
817 AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_PATTERN_START
818 + ARRAY_SIZE (aarch64_simd_builtin_data) - 1,
819 AARCH64_CRC32_BUILTIN_BASE,
820 AARCH64_CRC32_BUILTINS
821 AARCH64_CRC32_BUILTIN_MAX,
822 /* SIMD intrinsic builtins. */
823 AARCH64_SIMD_VREINTERPRET_BUILTINS
824 AARCH64_SIMD_VGET_LOW_BUILTINS
825 AARCH64_SIMD_VGET_HIGH_BUILTINS
826 /* ARMv8.3-A Pointer Authentication Builtins. */
827 AARCH64_PAUTH_BUILTIN_AUTIA1716,
828 AARCH64_PAUTH_BUILTIN_PACIA1716,
829 AARCH64_PAUTH_BUILTIN_AUTIB1716,
830 AARCH64_PAUTH_BUILTIN_PACIB1716,
831 AARCH64_PAUTH_BUILTIN_XPACLRI,
832 /* Special cased Armv8.3-A Complex FMA by Lane quad Builtins. */
833 AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE,
834 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
835 /* Builtin for Arm8.3-a Javascript conversion instruction. */
836 AARCH64_JSCVT,
837 /* TME builtins. */
838 AARCH64_TME_BUILTIN_TSTART,
839 AARCH64_TME_BUILTIN_TCOMMIT,
840 AARCH64_TME_BUILTIN_TTEST,
841 AARCH64_TME_BUILTIN_TCANCEL,
842 /* Armv8.5-a RNG instruction builtins. */
843 AARCH64_BUILTIN_RNG_RNDR,
844 AARCH64_BUILTIN_RNG_RNDRRS,
845 /* MEMTAG builtins. */
846 AARCH64_MEMTAG_BUILTIN_START,
847 AARCH64_MEMTAG_BUILTIN_IRG,
848 AARCH64_MEMTAG_BUILTIN_GMI,
849 AARCH64_MEMTAG_BUILTIN_SUBP,
850 AARCH64_MEMTAG_BUILTIN_INC_TAG,
851 AARCH64_MEMTAG_BUILTIN_SET_TAG,
852 AARCH64_MEMTAG_BUILTIN_GET_TAG,
853 AARCH64_MEMTAG_BUILTIN_END,
854 /* LS64 builtins. */
855 AARCH64_LS64_BUILTIN_LD64B,
856 AARCH64_LS64_BUILTIN_ST64B,
857 AARCH64_LS64_BUILTIN_ST64BV,
858 AARCH64_LS64_BUILTIN_ST64BV0,
859 AARCH64_REV16,
860 AARCH64_REV16L,
861 AARCH64_REV16LL,
862 AARCH64_RBIT,
863 AARCH64_RBITL,
864 AARCH64_RBITLL,
865 /* Pragma builtins. */
866 AARCH64_PRAGMA_BUILTIN_START,
867 #include "aarch64-simd-pragma-builtins.def"
868 AARCH64_PRAGMA_BUILTIN_END,
869 /* System register builtins. */
870 AARCH64_RSR,
871 AARCH64_RSRP,
872 AARCH64_RSR64,
873 AARCH64_RSRF,
874 AARCH64_RSRF64,
875 AARCH64_RSR128,
876 AARCH64_WSR,
877 AARCH64_WSRP,
878 AARCH64_WSR64,
879 AARCH64_WSRF,
880 AARCH64_WSRF64,
881 AARCH64_WSR128,
882 AARCH64_PLD,
883 AARCH64_PLDX,
884 AARCH64_PLI,
885 AARCH64_PLIX,
886 /* Armv8.9-A / Armv9.4-A builtins. */
887 AARCH64_BUILTIN_CHKFEAT,
888 AARCH64_BUILTIN_GCSPR,
889 AARCH64_BUILTIN_GCSPOPM,
890 AARCH64_BUILTIN_GCSSS,
891 AARCH64_BUILTIN_MAX
894 #undef CRC32_BUILTIN
895 #define CRC32_BUILTIN(N, M) \
896 {"__builtin_aarch64_"#N, E_##M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N},
898 static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = {
899 AARCH64_CRC32_BUILTINS
903 #undef FCMLA_LANEQ_BUILTIN
904 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
905 {"__builtin_aarch64_fcmla_laneq"#I#N, E_##M##mode, CODE_FOR_aarch64_##X##I##N, \
906 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, T},
908 /* This structure contains how to manage the mapping form the builtin to the
909 instruction to generate in the backend and how to invoke the instruction. */
910 static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
911 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
914 #undef VREINTERPRET_BUILTIN
915 #define VREINTERPRET_BUILTIN(A, B, L) \
916 {"vreinterpret" SIMD_INTR_LENGTH_CHAR(L) "_" #A "_" #B, \
917 AARCH64_SIMD_BUILTIN_VREINTERPRET##L##_##A##_##B, \
918 2, \
919 { SIMD_INTR_MODE(A, L), SIMD_INTR_MODE(B, L) }, \
920 { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(B) }, \
921 FLAG_DEFAULT, \
922 SIMD_INTR_MODE(A, L) == SIMD_INTR_MODE(B, L) \
923 && SIMD_INTR_QUAL(A) == SIMD_INTR_QUAL(B) \
926 #undef VGET_LOW_BUILTIN
927 #define VGET_LOW_BUILTIN(A) \
928 {"vget_low_" #A, \
929 AARCH64_SIMD_BUILTIN_VGET_LOW_##A, \
930 2, \
931 { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \
932 { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \
933 FLAG_DEFAULT, \
934 false \
937 #undef VGET_HIGH_BUILTIN
938 #define VGET_HIGH_BUILTIN(A) \
939 {"vget_high_" #A, \
940 AARCH64_SIMD_BUILTIN_VGET_HIGH_##A, \
941 2, \
942 { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \
943 { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \
944 FLAG_DEFAULT, \
945 false \
948 static const aarch64_simd_intrinsic_datum aarch64_simd_intrinsic_data[] = {
949 AARCH64_SIMD_VREINTERPRET_BUILTINS
950 AARCH64_SIMD_VGET_LOW_BUILTINS
951 AARCH64_SIMD_VGET_HIGH_BUILTINS
955 #undef CRC32_BUILTIN
957 static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
959 #define NUM_DREG_TYPES 6
960 #define NUM_QREG_TYPES 6
962 /* Internal scalar builtin types. These types are used to support
963 neon intrinsic builtins. They are _not_ user-visible types. Therefore
964 the mangling for these types are implementation defined. */
965 const char *aarch64_scalar_builtin_types[] = {
966 "__builtin_aarch64_simd_qi",
967 "__builtin_aarch64_simd_hi",
968 "__builtin_aarch64_simd_si",
969 "__builtin_aarch64_simd_hf",
970 "__builtin_aarch64_simd_sf",
971 "__builtin_aarch64_simd_di",
972 "__builtin_aarch64_simd_df",
973 "__builtin_aarch64_simd_poly8",
974 "__builtin_aarch64_simd_poly16",
975 "__builtin_aarch64_simd_poly64",
976 "__builtin_aarch64_simd_poly128",
977 "__builtin_aarch64_simd_ti",
978 "__builtin_aarch64_simd_uqi",
979 "__builtin_aarch64_simd_uhi",
980 "__builtin_aarch64_simd_usi",
981 "__builtin_aarch64_simd_udi",
982 "__builtin_aarch64_simd_ei",
983 "__builtin_aarch64_simd_oi",
984 "__builtin_aarch64_simd_ci",
985 "__builtin_aarch64_simd_xi",
986 "__builtin_aarch64_simd_bf",
987 NULL
990 extern const aarch64_simd_type_info aarch64_simd_types[];
991 extern GTY(()) aarch64_simd_type_info_trees aarch64_simd_types_trees[];
993 #undef ENTRY
994 #define ENTRY(E, M, Q, G) \
995 {E, "__" #E, #G "__" #E, E_##M##mode, qualifier_##Q},
996 const struct aarch64_simd_type_info aarch64_simd_types[] = {
997 #include "aarch64-simd-builtin-types.def"
999 #undef ENTRY
1001 struct aarch64_simd_type_info_trees
1002 aarch64_simd_types_trees[ARRAY_SIZE (aarch64_simd_types)];
1004 static machine_mode aarch64_simd_tuple_modes[ARM_NEON_H_TYPES_LAST][3];
1005 static GTY(()) tree aarch64_simd_tuple_types[ARM_NEON_H_TYPES_LAST][3];
1007 static GTY(()) tree aarch64_simd_intOI_type_node = NULL_TREE;
1008 static GTY(()) tree aarch64_simd_intCI_type_node = NULL_TREE;
1009 static GTY(()) tree aarch64_simd_intXI_type_node = NULL_TREE;
1011 /* The user-visible __mfp8 type, and a pointer to that type. Used
1012 across the back-end. */
1013 tree aarch64_mfp8_type_node = NULL_TREE;
1014 tree aarch64_mfp8_ptr_type_node = NULL_TREE;
1016 /* The user-visible __fp16 type, and a pointer to that type. Used
1017 across the back-end. */
1018 tree aarch64_fp16_type_node = NULL_TREE;
1019 tree aarch64_fp16_ptr_type_node = NULL_TREE;
1021 /* Back-end node type for brain float (bfloat) types. */
1022 tree aarch64_bf16_ptr_type_node = NULL_TREE;
1024 /* Wrapper around add_builtin_function. NAME is the name of the built-in
1025 function, TYPE is the function type, CODE is the function subcode
1026 (relative to AARCH64_BUILTIN_GENERAL), and ATTRS is the function
1027 attributes. */
1028 static tree
1029 aarch64_general_add_builtin (const char *name, tree type, unsigned int code,
1030 tree attrs = NULL_TREE)
1032 code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
1033 return add_builtin_function (name, type, code, BUILT_IN_MD,
1034 NULL, attrs);
1037 static tree
1038 aarch64_general_simulate_builtin (const char *name, tree fntype,
1039 unsigned int code,
1040 tree attrs = NULL_TREE)
1042 code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
1043 return simulate_builtin_function_decl (input_location, name, fntype,
1044 code, NULL, attrs);
1047 static const char *
1048 aarch64_mangle_builtin_scalar_type (const_tree type)
1050 int i = 0;
1052 while (aarch64_scalar_builtin_types[i] != NULL)
1054 const char *name = aarch64_scalar_builtin_types[i];
1056 if (TREE_CODE (TYPE_NAME (type)) == TYPE_DECL
1057 && DECL_NAME (TYPE_NAME (type))
1058 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))), name))
1059 return aarch64_scalar_builtin_types[i];
1060 i++;
1062 return NULL;
1065 static const char *
1066 aarch64_mangle_builtin_vector_type (const_tree type)
1068 tree attrs = TYPE_ATTRIBUTES (type);
1069 if (tree attr = lookup_attribute ("Advanced SIMD type", attrs))
1071 tree mangled_name = TREE_VALUE (TREE_VALUE (attr));
1072 return IDENTIFIER_POINTER (mangled_name);
1075 return NULL;
1078 const char *
1079 aarch64_general_mangle_builtin_type (const_tree type)
1081 const char *mangle;
1082 /* Walk through all the AArch64 builtins types tables to filter out the
1083 incoming type. */
1084 if ((mangle = aarch64_mangle_builtin_vector_type (type))
1085 || (mangle = aarch64_mangle_builtin_scalar_type (type)))
1086 return mangle;
1088 return NULL;
1091 /* Helper function for aarch64_simd_builtin_type. */
1092 static tree
1093 aarch64_int_or_fp_type (machine_mode mode,
1094 enum aarch64_type_qualifiers qualifiers)
1096 #define QUAL_TYPE(M) ((qualifiers & qualifier_unsigned) \
1097 ? unsigned_int##M##_type_node : int##M##_type_node);
1098 switch (mode)
1100 case E_QImode:
1101 if (qualifiers & qualifier_modal_float)
1102 return aarch64_mfp8_type_node;
1103 return QUAL_TYPE (QI);
1104 case E_HImode:
1105 return QUAL_TYPE (HI);
1106 case E_SImode:
1107 return QUAL_TYPE (SI);
1108 case E_DImode:
1109 return QUAL_TYPE (DI);
1110 case E_TImode:
1111 return QUAL_TYPE (TI);
1112 case E_OImode:
1113 return aarch64_simd_intOI_type_node;
1114 case E_CImode:
1115 return aarch64_simd_intCI_type_node;
1116 case E_XImode:
1117 return aarch64_simd_intXI_type_node;
1118 case E_HFmode:
1119 return aarch64_fp16_type_node;
1120 case E_SFmode:
1121 return float_type_node;
1122 case E_DFmode:
1123 return double_type_node;
1124 case E_BFmode:
1125 return bfloat16_type_node;
1126 default:
1127 gcc_unreachable ();
1129 #undef QUAL_TYPE
1132 /* Helper function for aarch64_simd_builtin_type. */
1133 static tree
1134 aarch64_lookup_simd_type_in_table (machine_mode mode,
1135 enum aarch64_type_qualifiers qualifiers)
1137 int i;
1138 int nelts = ARRAY_SIZE (aarch64_simd_types);
1139 int q = qualifiers
1140 & (qualifier_poly | qualifier_unsigned | qualifier_modal_float);
1142 for (i = 0; i < nelts; i++)
1144 if (aarch64_simd_types[i].mode == mode
1145 && aarch64_simd_types[i].q == q)
1146 return aarch64_simd_types_trees[i].itype;
1147 if (aarch64_simd_tuple_types[i][0] != NULL_TREE)
1148 for (int j = 0; j < 3; j++)
1149 if (aarch64_simd_tuple_modes[i][j] == mode
1150 && aarch64_simd_types[i].q == q)
1151 return aarch64_simd_tuple_types[i][j];
1154 return NULL_TREE;
1157 /* Return a type for an operand with specified mode and qualifiers. */
1158 static tree
1159 aarch64_simd_builtin_type (machine_mode mode,
1160 enum aarch64_type_qualifiers qualifiers)
1162 tree type = NULL_TREE;
1164 /* For pointers, we want a pointer to the basic type of the vector. */
1165 if ((qualifiers & qualifier_pointer) && VECTOR_MODE_P (mode))
1166 mode = GET_MODE_INNER (mode);
1168 /* Non-poly scalar modes map to standard types not in the table. */
1169 if ((qualifiers & qualifier_poly) || VECTOR_MODE_P (mode))
1170 type = aarch64_lookup_simd_type_in_table (mode, qualifiers);
1171 else
1172 type = aarch64_int_or_fp_type (mode, qualifiers);
1174 gcc_assert (type != NULL_TREE);
1176 /* Add qualifiers. */
1177 if (qualifiers & qualifier_const)
1178 type = build_qualified_type (type, TYPE_QUAL_CONST);
1179 if (qualifiers & qualifier_pointer)
1180 type = build_pointer_type (type);
1182 return type;
1185 static void
1186 aarch64_init_simd_builtin_types (void)
1188 int i;
1189 int nelts = ARRAY_SIZE (aarch64_simd_types);
1190 tree tdecl;
1192 /* Init all the element types built by the front-end. */
1193 aarch64_simd_types_trees[Int8x8_t].eltype = intQI_type_node;
1194 aarch64_simd_types_trees[Int8x16_t].eltype = intQI_type_node;
1195 aarch64_simd_types_trees[Int16x4_t].eltype = intHI_type_node;
1196 aarch64_simd_types_trees[Int16x8_t].eltype = intHI_type_node;
1197 aarch64_simd_types_trees[Int32x2_t].eltype = intSI_type_node;
1198 aarch64_simd_types_trees[Int32x4_t].eltype = intSI_type_node;
1199 aarch64_simd_types_trees[Int64x1_t].eltype = intDI_type_node;
1200 aarch64_simd_types_trees[Int64x2_t].eltype = intDI_type_node;
1201 aarch64_simd_types_trees[Uint8x8_t].eltype = unsigned_intQI_type_node;
1202 aarch64_simd_types_trees[Uint8x16_t].eltype = unsigned_intQI_type_node;
1203 aarch64_simd_types_trees[Uint16x4_t].eltype = unsigned_intHI_type_node;
1204 aarch64_simd_types_trees[Uint16x8_t].eltype = unsigned_intHI_type_node;
1205 aarch64_simd_types_trees[Uint32x2_t].eltype = unsigned_intSI_type_node;
1206 aarch64_simd_types_trees[Uint32x4_t].eltype = unsigned_intSI_type_node;
1207 aarch64_simd_types_trees[Uint64x1_t].eltype = unsigned_intDI_type_node;
1208 aarch64_simd_types_trees[Uint64x2_t].eltype = unsigned_intDI_type_node;
1210 /* Poly types are a world of their own. */
1211 aarch64_simd_types_trees[Poly8_t].eltype
1212 = aarch64_simd_types_trees[Poly8_t].itype
1213 = build_distinct_type_copy (unsigned_intQI_type_node);
1214 /* Prevent front-ends from transforming Poly8_t arrays into string
1215 literals. */
1216 TYPE_STRING_FLAG (aarch64_simd_types_trees[Poly8_t].eltype) = false;
1218 aarch64_simd_types_trees[Poly16_t].eltype
1219 = aarch64_simd_types_trees[Poly16_t].itype
1220 = build_distinct_type_copy (unsigned_intHI_type_node);
1221 aarch64_simd_types_trees[Poly64_t].eltype
1222 = aarch64_simd_types_trees[Poly64_t].itype
1223 = build_distinct_type_copy (unsigned_intDI_type_node);
1224 aarch64_simd_types_trees[Poly128_t].eltype
1225 = aarch64_simd_types_trees[Poly128_t].itype
1226 = build_distinct_type_copy (unsigned_intTI_type_node);
1227 /* Init poly vector element types with scalar poly types. */
1228 aarch64_simd_types_trees[Poly8x8_t].eltype
1229 = aarch64_simd_types_trees[Poly8_t].itype;
1230 aarch64_simd_types_trees[Poly8x16_t].eltype
1231 = aarch64_simd_types_trees[Poly8_t].itype;
1232 aarch64_simd_types_trees[Poly16x4_t].eltype
1233 = aarch64_simd_types_trees[Poly16_t].itype;
1234 aarch64_simd_types_trees[Poly16x8_t].eltype
1235 = aarch64_simd_types_trees[Poly16_t].itype;
1236 aarch64_simd_types_trees[Poly64x1_t].eltype
1237 = aarch64_simd_types_trees[Poly64_t].itype;
1238 aarch64_simd_types_trees[Poly64x2_t].eltype
1239 = aarch64_simd_types_trees[Poly64_t].itype;
1241 /* Continue with standard types. */
1242 aarch64_simd_types_trees[Float16x4_t].eltype = aarch64_fp16_type_node;
1243 aarch64_simd_types_trees[Float16x8_t].eltype = aarch64_fp16_type_node;
1244 aarch64_simd_types_trees[Float32x2_t].eltype = float_type_node;
1245 aarch64_simd_types_trees[Float32x4_t].eltype = float_type_node;
1246 aarch64_simd_types_trees[Float64x1_t].eltype = double_type_node;
1247 aarch64_simd_types_trees[Float64x2_t].eltype = double_type_node;
1249 /* Init Bfloat vector types with underlying __bf16 type. */
1250 aarch64_simd_types_trees[Bfloat16x4_t].eltype = bfloat16_type_node;
1251 aarch64_simd_types_trees[Bfloat16x8_t].eltype = bfloat16_type_node;
1253 /* Init FP8 element types. */
1254 aarch64_simd_types_trees[Mfloat8x8_t].eltype = aarch64_mfp8_type_node;
1255 aarch64_simd_types_trees[Mfloat8x16_t].eltype = aarch64_mfp8_type_node;
1257 for (i = 0; i < nelts; i++)
1259 tree eltype = aarch64_simd_types_trees[i].eltype;
1260 machine_mode mode = aarch64_simd_types[i].mode;
1262 if (aarch64_simd_types_trees[i].itype == NULL)
1264 tree type = build_vector_type (eltype, GET_MODE_NUNITS (mode));
1265 type = build_distinct_type_copy (type);
1266 SET_TYPE_STRUCTURAL_EQUALITY (type);
1268 tree mangled_name = get_identifier (aarch64_simd_types[i].mangle);
1269 tree value = tree_cons (NULL_TREE, mangled_name, NULL_TREE);
1270 TYPE_ATTRIBUTES (type)
1271 = tree_cons (get_identifier ("Advanced SIMD type"), value,
1272 TYPE_ATTRIBUTES (type));
1273 aarch64_simd_types_trees[i].itype = type;
1276 tdecl = add_builtin_type (aarch64_simd_types[i].name,
1277 aarch64_simd_types_trees[i].itype);
1278 TYPE_NAME (aarch64_simd_types_trees[i].itype) = tdecl;
1281 #define AARCH64_BUILD_SIGNED_TYPE(mode) \
1282 make_signed_type (GET_MODE_PRECISION (mode));
1283 aarch64_simd_intOI_type_node = AARCH64_BUILD_SIGNED_TYPE (OImode);
1284 aarch64_simd_intCI_type_node = AARCH64_BUILD_SIGNED_TYPE (CImode);
1285 aarch64_simd_intXI_type_node = AARCH64_BUILD_SIGNED_TYPE (XImode);
1286 #undef AARCH64_BUILD_SIGNED_TYPE
1288 tdecl = add_builtin_type
1289 ("__builtin_aarch64_simd_oi" , aarch64_simd_intOI_type_node);
1290 TYPE_NAME (aarch64_simd_intOI_type_node) = tdecl;
1291 tdecl = add_builtin_type
1292 ("__builtin_aarch64_simd_ci" , aarch64_simd_intCI_type_node);
1293 TYPE_NAME (aarch64_simd_intCI_type_node) = tdecl;
1294 tdecl = add_builtin_type
1295 ("__builtin_aarch64_simd_xi" , aarch64_simd_intXI_type_node);
1296 TYPE_NAME (aarch64_simd_intXI_type_node) = tdecl;
1299 static void
1300 aarch64_init_simd_builtin_scalar_types (void)
1302 /* Define typedefs for all the standard scalar types. */
1303 (*lang_hooks.types.register_builtin_type) (intQI_type_node,
1304 "__builtin_aarch64_simd_qi");
1305 (*lang_hooks.types.register_builtin_type) (intHI_type_node,
1306 "__builtin_aarch64_simd_hi");
1307 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node,
1308 "__builtin_aarch64_simd_hf");
1309 (*lang_hooks.types.register_builtin_type) (intSI_type_node,
1310 "__builtin_aarch64_simd_si");
1311 (*lang_hooks.types.register_builtin_type) (float_type_node,
1312 "__builtin_aarch64_simd_sf");
1313 (*lang_hooks.types.register_builtin_type) (intDI_type_node,
1314 "__builtin_aarch64_simd_di");
1315 (*lang_hooks.types.register_builtin_type) (double_type_node,
1316 "__builtin_aarch64_simd_df");
1317 (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
1318 "__builtin_aarch64_simd_poly8");
1319 (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
1320 "__builtin_aarch64_simd_poly16");
1321 (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
1322 "__builtin_aarch64_simd_poly64");
1323 (*lang_hooks.types.register_builtin_type) (unsigned_intTI_type_node,
1324 "__builtin_aarch64_simd_poly128");
1325 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
1326 "__builtin_aarch64_simd_ti");
1327 (*lang_hooks.types.register_builtin_type) (bfloat16_type_node,
1328 "__builtin_aarch64_simd_bf");
1329 /* Unsigned integer types for various mode sizes. */
1330 (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
1331 "__builtin_aarch64_simd_uqi");
1332 (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
1333 "__builtin_aarch64_simd_uhi");
1334 (*lang_hooks.types.register_builtin_type) (unsigned_intSI_type_node,
1335 "__builtin_aarch64_simd_usi");
1336 (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
1337 "__builtin_aarch64_simd_udi");
1340 /* If MODE is a single Advanced SIMD vector, return the number of lanes in the
1341 vector. If MODE is an Advanced SIMD structure/tuple mode, return the number
1342 of lanes in a single vector. */
1343 static unsigned int
1344 aarch64_num_lanes (machine_mode mode)
1346 unsigned int nregs = targetm.hard_regno_nregs (V0_REGNUM, mode);
1347 return exact_div (GET_MODE_NUNITS (mode), nregs).to_constant ();
1350 /* Return a set of FLAG_* flags derived from FLAGS
1351 that describe what a function with result MODE could do,
1352 taking the command-line flags into account. */
1353 static unsigned int
1354 aarch64_call_properties (unsigned int flags, machine_mode mode)
1356 if ((flags & FLAG_AUTO_FP) && FLOAT_MODE_P (mode))
1357 flags |= FLAG_FP;
1359 /* -fno-trapping-math means that we can assume any FP exceptions
1360 are not user-visible. */
1361 if (!flag_trapping_math)
1362 flags &= ~FLAG_RAISE_FP_EXCEPTIONS;
1364 return flags;
1367 /* Return true if calls to a function with flags F and mode MODE
1368 could modify some form of global state. */
1369 static bool
1370 aarch64_modifies_global_state_p (unsigned int f, machine_mode mode)
1372 unsigned int flags = aarch64_call_properties (f, mode);
1374 if (flags & FLAG_RAISE_FP_EXCEPTIONS)
1375 return true;
1377 if (flags & FLAG_PREFETCH_MEMORY)
1378 return true;
1380 return flags & FLAG_WRITE_MEMORY;
1383 /* Return true if calls to a function with flags F and mode MODE
1384 could read some form of global state. */
1385 static bool
1386 aarch64_reads_global_state_p (unsigned int f, machine_mode mode)
1388 unsigned int flags = aarch64_call_properties (f, mode);
1390 if (flags & FLAG_READ_FPCR)
1391 return true;
1393 return flags & FLAG_READ_MEMORY;
1396 /* Return true if calls to a function with flags F and mode MODE
1397 could raise a signal. */
1398 static bool
1399 aarch64_could_trap_p (unsigned int f, machine_mode mode)
1401 unsigned int flags = aarch64_call_properties (f, mode);
1403 if (flags & FLAG_RAISE_FP_EXCEPTIONS)
1404 return true;
1406 if (flags & (FLAG_READ_MEMORY | FLAG_WRITE_MEMORY))
1407 return true;
1409 return false;
1412 /* Add attribute NAME to ATTRS. */
1413 static tree
1414 aarch64_add_attribute (const char *name, tree attrs)
1416 return tree_cons (get_identifier (name), NULL_TREE, attrs);
1419 /* Return the appropriate attributes for a function that has
1420 flags F and mode MODE. */
1421 static tree
1422 aarch64_get_attributes (unsigned int f, machine_mode mode)
1424 tree attrs = NULL_TREE;
1426 if (!aarch64_modifies_global_state_p (f, mode))
1428 if (aarch64_reads_global_state_p (f, mode))
1429 attrs = aarch64_add_attribute ("pure", attrs);
1430 else
1431 attrs = aarch64_add_attribute ("const", attrs);
1434 if (!flag_non_call_exceptions || !aarch64_could_trap_p (f, mode))
1435 attrs = aarch64_add_attribute ("nothrow", attrs);
1437 return aarch64_add_attribute ("leaf", attrs);
1440 /* Due to the architecture not providing lane variant of the lane instructions
1441 for fcmla we can't use the standard simd builtin expansion code, but we
1442 still want the majority of the validation that would normally be done. */
1444 void
1445 aarch64_init_fcmla_laneq_builtins (void)
1447 unsigned int i = 0;
1449 for (i = 0; i < ARRAY_SIZE (aarch64_fcmla_lane_builtin_data); ++i)
1451 aarch64_fcmla_laneq_builtin_datum* d
1452 = &aarch64_fcmla_lane_builtin_data[i];
1453 tree argtype = aarch64_simd_builtin_type (d->mode, qualifier_none);
1454 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
1455 tree quadtype = aarch64_simd_builtin_type (quadmode, qualifier_none);
1456 tree lanetype
1457 = aarch64_simd_builtin_type (SImode, qualifier_lane_pair_index);
1458 tree ftype = build_function_type_list (argtype, argtype, argtype,
1459 quadtype, lanetype, NULL_TREE);
1460 tree attrs = aarch64_get_attributes (FLAG_FP, d->mode);
1461 tree fndecl
1462 = aarch64_general_add_builtin (d->name, ftype, d->fcode, attrs);
1464 aarch64_builtin_decls[d->fcode] = fndecl;
1468 void
1469 aarch64_init_simd_intrinsics (void)
1471 unsigned int i = 0;
1473 for (i = 0; i < ARRAY_SIZE (aarch64_simd_intrinsic_data); ++i)
1475 auto d = &aarch64_simd_intrinsic_data[i];
1477 if (d->skip)
1478 continue;
1480 tree return_type = void_type_node;
1481 tree args = void_list_node;
1483 for (int op_num = d->op_count - 1; op_num >= 0; op_num--)
1485 machine_mode op_mode = d->op_modes[op_num];
1486 enum aarch64_type_qualifiers qualifiers = d->qualifiers[op_num];
1488 tree eltype = aarch64_simd_builtin_type (op_mode, qualifiers);
1490 if (op_num == 0)
1491 return_type = eltype;
1492 else
1493 args = tree_cons (NULL_TREE, eltype, args);
1496 tree ftype = build_function_type (return_type, args);
1497 tree attrs = aarch64_get_attributes (d->flags, d->op_modes[0]);
1498 unsigned int code
1499 = (d->fcode << AARCH64_BUILTIN_SHIFT | AARCH64_BUILTIN_GENERAL);
1500 tree fndecl = simulate_builtin_function_decl (input_location, d->name,
1501 ftype, code, NULL, attrs);
1502 aarch64_builtin_decls[d->fcode] = fndecl;
1506 void
1507 aarch64_init_simd_builtin_functions (bool called_from_pragma)
1509 unsigned int i, fcode = AARCH64_SIMD_PATTERN_START;
1511 if (!called_from_pragma)
1513 tree lane_check_fpr = build_function_type_list (void_type_node,
1514 size_type_node,
1515 size_type_node,
1516 intSI_type_node,
1517 NULL);
1518 /* aarch64_im_lane_boundsi should be leaf and nothrow as it
1519 is expanded as nop or will cause an user error. */
1520 tree attrs = aarch64_add_attribute ("nothrow", NULL_TREE);
1521 attrs = aarch64_add_attribute ("leaf", attrs);
1522 aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
1523 = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
1524 lane_check_fpr,
1525 AARCH64_SIMD_BUILTIN_LANE_CHECK, attrs);
1528 for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
1530 bool print_type_signature_p = false;
1531 char type_signature[SIMD_MAX_BUILTIN_ARGS + 1] = { 0 };
1532 aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i];
1533 char namebuf[60];
1534 tree ftype = NULL;
1535 tree fndecl = NULL;
1537 d->fcode = fcode;
1539 /* We must track two variables here. op_num is
1540 the operand number as in the RTL pattern. This is
1541 required to access the mode (e.g. V4SF mode) of the
1542 argument, from which the base type can be derived.
1543 arg_num is an index in to the qualifiers data, which
1544 gives qualifiers to the type (e.g. const unsigned).
1545 The reason these two variables may differ by one is the
1546 void return type. While all return types take the 0th entry
1547 in the qualifiers array, there is no operand for them in the
1548 RTL pattern. */
1549 int op_num = insn_data[d->code].n_operands - 1;
1550 int arg_num = d->qualifiers[0] & qualifier_void
1551 ? op_num + 1
1552 : op_num;
1553 tree return_type = void_type_node, args = void_list_node;
1554 tree eltype;
1556 int struct_mode_args = 0;
1557 for (int j = op_num; j >= 0; j--)
1559 machine_mode op_mode = insn_data[d->code].operand[j].mode;
1560 if (aarch64_advsimd_struct_mode_p (op_mode))
1561 struct_mode_args++;
1564 if ((called_from_pragma && struct_mode_args == 0)
1565 || (!called_from_pragma && struct_mode_args > 0))
1566 continue;
1568 /* Build a function type directly from the insn_data for this
1569 builtin. The build_function_type () function takes care of
1570 removing duplicates for us. */
1571 for (; op_num >= 0; arg_num--, op_num--)
1573 machine_mode op_mode = insn_data[d->code].operand[op_num].mode;
1574 enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num];
1576 if (qualifiers & qualifier_unsigned)
1578 type_signature[op_num] = 'u';
1579 print_type_signature_p = true;
1581 else if (qualifiers & qualifier_poly)
1583 type_signature[op_num] = 'p';
1584 print_type_signature_p = true;
1586 else
1587 type_signature[op_num] = 's';
1589 /* Some builtins have different user-facing types
1590 for certain arguments, encoded in d->mode. */
1591 if (qualifiers & qualifier_map_mode)
1592 op_mode = d->mode;
1594 eltype = aarch64_simd_builtin_type (op_mode, qualifiers);
1596 /* If we have reached arg_num == 0, we are at a non-void
1597 return type. Otherwise, we are still processing
1598 arguments. */
1599 if (arg_num == 0)
1600 return_type = eltype;
1601 else
1602 args = tree_cons (NULL_TREE, eltype, args);
1605 ftype = build_function_type (return_type, args);
1607 gcc_assert (ftype != NULL);
1609 if (print_type_signature_p)
1610 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s",
1611 d->name, type_signature);
1612 else
1613 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s",
1614 d->name);
1616 tree attrs = aarch64_get_attributes (d->flags, d->mode);
1618 if (called_from_pragma)
1620 unsigned int raw_code
1621 = (fcode << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
1622 fndecl = simulate_builtin_function_decl (input_location, namebuf,
1623 ftype, raw_code, NULL,
1624 attrs);
1626 else
1627 fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode, attrs);
1629 aarch64_builtin_decls[fcode] = fndecl;
1633 enum class aarch64_builtin_signatures
1635 binary,
1636 binary_lane,
1637 binary_two_lanes,
1638 load,
1639 load_lane,
1640 store,
1641 store_lane,
1642 ternary,
1643 ternary_lane,
1644 unary,
1645 unary_lane,
1648 namespace {
1650 /* Pairs a machine mode with the information needed to turn it into a
1651 function argument type or return type. */
1652 struct simd_type {
1653 tree type () const { return aarch64_simd_builtin_type (mode, qualifiers); }
1654 unsigned nunits () const { return GET_MODE_NUNITS (mode).to_constant (); }
1656 machine_mode mode;
1657 aarch64_type_qualifiers qualifiers;
1660 namespace simd_types {
1661 #define VARIANTS(BASE, D, Q, MODE, QUALIFIERS) \
1662 constexpr simd_type BASE { V##D##MODE, QUALIFIERS }; \
1663 constexpr simd_type BASE##x2 { V2x##D##MODE, QUALIFIERS }; \
1664 constexpr simd_type BASE##x3 { V3x##D##MODE, QUALIFIERS }; \
1665 constexpr simd_type BASE##x4 { V4x##D##MODE, QUALIFIERS }; \
1666 constexpr simd_type BASE##q { V##Q##MODE, QUALIFIERS }; \
1667 constexpr simd_type BASE##qx2 { V2x##Q##MODE, QUALIFIERS }; \
1668 constexpr simd_type BASE##qx3 { V3x##Q##MODE, QUALIFIERS }; \
1669 constexpr simd_type BASE##qx4 { V4x##Q##MODE, QUALIFIERS }; \
1670 constexpr simd_type BASE##_scalar { MODE, QUALIFIERS }; \
1671 constexpr simd_type BASE##_scalar_ptr \
1672 { MODE, aarch64_type_qualifiers (QUALIFIERS | qualifier_pointer) }; \
1673 constexpr simd_type BASE##_scalar_const_ptr \
1674 { MODE, aarch64_type_qualifiers (QUALIFIERS | qualifier_const_pointer) };
1676 VARIANTS (mf8, 8, 16, QImode, qualifier_modal_float)
1677 VARIANTS (p8, 8, 16, QImode, qualifier_poly)
1678 VARIANTS (s8, 8, 16, QImode, qualifier_none)
1679 VARIANTS (u8, 8, 16, QImode, qualifier_unsigned)
1681 VARIANTS (bf16, 4, 8, BFmode, qualifier_none)
1682 VARIANTS (f16, 4, 8, HFmode, qualifier_none)
1683 VARIANTS (p16, 4, 8, HImode, qualifier_poly)
1684 VARIANTS (s16, 4, 8, HImode, qualifier_none)
1685 VARIANTS (u16, 4, 8, HImode, qualifier_unsigned)
1687 VARIANTS (f32, 2, 4, SFmode, qualifier_none)
1688 VARIANTS (p32, 2, 4, SImode, qualifier_poly)
1689 VARIANTS (s32, 2, 4, SImode, qualifier_none)
1690 VARIANTS (u32, 2, 4, SImode, qualifier_unsigned)
1692 VARIANTS (f64, 1, 2, DFmode, qualifier_none)
1693 VARIANTS (p64, 1, 2, DImode, qualifier_poly)
1694 VARIANTS (s64, 1, 2, DImode, qualifier_none)
1695 VARIANTS (u64, 1, 2, DImode, qualifier_unsigned)
1697 constexpr simd_type none { VOIDmode, qualifier_none };
1698 #undef VARIANTS
1703 #undef ENTRY
1704 #define ENTRY(N, S, T0, T1, T2, T3, U, F) \
1705 {#N, aarch64_builtin_signatures::S, simd_types::T0, simd_types::T1, \
1706 simd_types::T2, simd_types::T3, U, \
1707 aarch64_required_extensions::REQUIRED_EXTENSIONS, FLAG_##F},
1709 /* Initialize pragma builtins. */
1711 struct aarch64_pragma_builtins_data
1713 const char *name;
1714 aarch64_builtin_signatures signature;
1715 simd_type types[4];
1716 int unspec;
1717 aarch64_required_extensions required_extensions;
1718 unsigned int flags;
1721 static aarch64_pragma_builtins_data aarch64_pragma_builtins[] = {
1722 #include "aarch64-simd-pragma-builtins.def"
1725 /* Return the function type for BUILTIN_DATA. */
1726 static tree
1727 aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data)
1729 tree return_type = NULL_TREE;
1730 auto_vec<tree, 8> arg_types;
1731 switch (builtin_data.signature)
1733 case aarch64_builtin_signatures::binary:
1734 case aarch64_builtin_signatures::binary_lane:
1735 case aarch64_builtin_signatures::load_lane:
1736 return_type = builtin_data.types[0].type ();
1737 for (int i = 1; i <= 2; ++i)
1738 arg_types.quick_push (builtin_data.types[i].type ());
1739 break;
1741 case aarch64_builtin_signatures::binary_two_lanes:
1742 /* binary_two_lanes has to be handled as a special case because indices
1743 interleave vectors. */
1744 return_type = builtin_data.types[0].type ();
1745 arg_types.quick_push (builtin_data.types[1].type ());
1746 arg_types.quick_push (integer_type_node);
1747 arg_types.quick_push (builtin_data.types[2].type ());
1748 arg_types.quick_push (integer_type_node);
1749 break;
1751 case aarch64_builtin_signatures::load:
1752 case aarch64_builtin_signatures::unary:
1753 case aarch64_builtin_signatures::unary_lane:
1754 return_type = builtin_data.types[0].type ();
1755 arg_types.quick_push (builtin_data.types[1].type ());
1756 break;
1758 case aarch64_builtin_signatures::store:
1759 case aarch64_builtin_signatures::store_lane:
1760 return_type = void_type_node;
1761 for (int i = 0; i <= 1; ++i)
1762 arg_types.quick_push (builtin_data.types[i].type ());
1763 break;
1765 case aarch64_builtin_signatures::ternary:
1766 case aarch64_builtin_signatures::ternary_lane:
1767 return_type = builtin_data.types[0].type ();
1768 for (int i = 1; i <= 3; ++i)
1769 arg_types.quick_push (builtin_data.types[i].type ());
1770 break;
1772 switch (builtin_data.signature)
1774 case aarch64_builtin_signatures::binary_lane:
1775 case aarch64_builtin_signatures::load_lane:
1776 case aarch64_builtin_signatures::store_lane:
1777 case aarch64_builtin_signatures::ternary_lane:
1778 case aarch64_builtin_signatures::unary_lane:
1779 arg_types.quick_push (integer_type_node);
1780 break;
1782 default:
1783 break;
1785 if (builtin_data.flags & FLAG_USES_FPMR)
1786 arg_types.quick_push (uint64_type_node);
1787 return build_function_type_array (return_type, arg_types.length (),
1788 arg_types.address ());
1791 /* Simulate function definitions for all of the builtins in
1792 aarch64_pragma_builtins. */
1793 static void
1794 aarch64_init_pragma_builtins ()
1796 for (size_t i = 0; i < ARRAY_SIZE (aarch64_pragma_builtins); ++i)
1798 auto data = aarch64_pragma_builtins[i];
1799 auto fntype = aarch64_fntype (data);
1800 auto code = AARCH64_PRAGMA_BUILTIN_START + i + 1;
1801 auto flag_mode = data.types[0].mode;
1802 auto attrs = aarch64_get_attributes (data.flags, flag_mode);
1803 aarch64_builtin_decls[code]
1804 = aarch64_general_simulate_builtin (data.name, fntype, code, attrs);
1808 /* If the builtin function with code CODE has an entry in
1809 aarch64_pragma_builtins, return its entry, otherwise return null. */
1811 static const aarch64_pragma_builtins_data*
1812 aarch64_get_pragma_builtin (int code)
1814 if (!(code > AARCH64_PRAGMA_BUILTIN_START
1815 && code < AARCH64_PRAGMA_BUILTIN_END))
1816 return NULL;
1818 auto idx = code - (AARCH64_PRAGMA_BUILTIN_START + 1);
1819 return &aarch64_pragma_builtins[idx];
1822 /* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type
1823 indexed by TYPE_INDEX. */
1824 static void
1825 register_tuple_type (unsigned int num_vectors, unsigned int type_index)
1827 const aarch64_simd_type_info *type = &aarch64_simd_types[type_index];
1828 aarch64_simd_type_info_trees *trees = &aarch64_simd_types_trees[type_index];
1830 /* Synthesize the name of the user-visible vector tuple type. */
1831 const char *vector_type_name = type->name;
1832 char tuple_type_name[sizeof ("bfloat16x4x2_t")];
1833 snprintf (tuple_type_name, sizeof (tuple_type_name), "%.*sx%d_t",
1834 (int) strlen (vector_type_name) - 4, vector_type_name + 2,
1835 num_vectors);
1836 tuple_type_name[0] = TOLOWER (tuple_type_name[0]);
1838 tree vector_type = trees->itype;
1839 tree array_type = build_array_type_nelts (vector_type, num_vectors);
1840 if (type->mode == DImode)
1842 if (num_vectors == 2)
1843 SET_TYPE_MODE (array_type, V2x1DImode);
1844 else if (num_vectors == 3)
1845 SET_TYPE_MODE (array_type, V3x1DImode);
1846 else if (num_vectors == 4)
1847 SET_TYPE_MODE (array_type, V4x1DImode);
1850 unsigned int alignment
1851 = known_eq (GET_MODE_SIZE (type->mode), 16) ? 128 : 64;
1852 machine_mode tuple_mode = TYPE_MODE_RAW (array_type);
1853 gcc_assert (VECTOR_MODE_P (tuple_mode)
1854 && TYPE_MODE (array_type) == tuple_mode
1855 && TYPE_ALIGN (array_type) == alignment);
1857 tree field = build_decl (input_location, FIELD_DECL,
1858 get_identifier ("val"), array_type);
1860 tree t = lang_hooks.types.simulate_record_decl (input_location,
1861 tuple_type_name,
1862 make_array_slice (&field,
1863 1));
1864 gcc_assert (TYPE_MODE_RAW (t) == TYPE_MODE (t)
1865 && (flag_pack_struct
1866 || maximum_field_alignment
1867 || (TYPE_MODE_RAW (t) == tuple_mode
1868 && TYPE_ALIGN (t) == alignment)));
1870 aarch64_simd_tuple_modes[type_index][num_vectors - 2] = tuple_mode;
1871 aarch64_simd_tuple_types[type_index][num_vectors - 2] = t;
1874 static bool
1875 aarch64_scalar_builtin_type_p (aarch64_simd_type t)
1877 return (t == Poly8_t || t == Poly16_t || t == Poly64_t || t == Poly128_t);
1880 /* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD
1881 set. */
1882 aarch64_simd_switcher::aarch64_simd_switcher (aarch64_feature_flags extra_flags)
1883 : m_old_asm_isa_flags (aarch64_asm_isa_flags),
1884 m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY)
1886 /* Changing the ISA flags should be enough here. We shouldn't need to
1887 pay the compile-time cost of a full target switch. */
1888 global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
1889 aarch64_set_asm_isa_flags (AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags);
1892 aarch64_simd_switcher::~aarch64_simd_switcher ()
1894 if (m_old_general_regs_only)
1895 global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY;
1896 aarch64_set_asm_isa_flags (m_old_asm_isa_flags);
1899 /* Implement #pragma GCC aarch64 "arm_neon.h".
1901 The types and functions defined here need to be available internally
1902 during LTO as well. */
1903 void
1904 handle_arm_neon_h (void)
1906 aarch64_simd_switcher simd;
1908 /* Register the AdvSIMD vector tuple types. */
1909 for (unsigned int i = 0; i < ARM_NEON_H_TYPES_LAST; i++)
1910 for (unsigned int count = 2; count <= 4; ++count)
1911 if (!aarch64_scalar_builtin_type_p (aarch64_simd_types[i].type))
1912 register_tuple_type (count, i);
1914 aarch64_init_simd_builtin_functions (true);
1915 aarch64_init_simd_intrinsics ();
1916 aarch64_init_pragma_builtins ();
1919 static void
1920 aarch64_init_simd_builtins (void)
1922 aarch64_init_simd_builtin_types ();
1924 /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
1925 Therefore we need to preserve the old __builtin scalar types. It can be
1926 removed once all the intrinsics become strongly typed using the qualifier
1927 system. */
1928 aarch64_init_simd_builtin_scalar_types ();
1930 aarch64_init_simd_builtin_functions (false);
1931 if (in_lto_p)
1932 handle_arm_neon_h ();
1934 /* Initialize the remaining fcmla_laneq intrinsics. */
1935 aarch64_init_fcmla_laneq_builtins ();
1938 static void
1939 aarch64_init_crc32_builtins ()
1941 tree usi_type = aarch64_simd_builtin_type (SImode, qualifier_unsigned);
1942 unsigned int i = 0;
1944 for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i)
1946 aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i];
1947 tree argtype = aarch64_simd_builtin_type (d->mode, qualifier_unsigned);
1948 tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE);
1949 tree attrs = aarch64_get_attributes (FLAG_DEFAULT, d->mode);
1950 tree fndecl
1951 = aarch64_general_add_builtin (d->name, ftype, d->fcode, attrs);
1953 aarch64_builtin_decls[d->fcode] = fndecl;
1957 /* Add builtins for reciprocal square root. */
1959 void
1960 aarch64_init_builtin_rsqrt (void)
1962 tree fndecl = NULL;
1963 tree ftype = NULL;
1965 tree V2SF_type_node = build_vector_type (float_type_node, 2);
1966 tree V2DF_type_node = build_vector_type (double_type_node, 2);
1967 tree V4SF_type_node = build_vector_type (float_type_node, 4);
1969 struct builtin_decls_data
1971 tree type_node;
1972 const char *builtin_name;
1973 int function_code;
1976 builtin_decls_data bdda[] =
1978 { double_type_node, "__builtin_aarch64_rsqrt_df", AARCH64_BUILTIN_RSQRT_DF },
1979 { float_type_node, "__builtin_aarch64_rsqrt_sf", AARCH64_BUILTIN_RSQRT_SF },
1980 { V2DF_type_node, "__builtin_aarch64_rsqrt_v2df", AARCH64_BUILTIN_RSQRT_V2DF },
1981 { V2SF_type_node, "__builtin_aarch64_rsqrt_v2sf", AARCH64_BUILTIN_RSQRT_V2SF },
1982 { V4SF_type_node, "__builtin_aarch64_rsqrt_v4sf", AARCH64_BUILTIN_RSQRT_V4SF }
1985 builtin_decls_data *bdd = bdda;
1986 builtin_decls_data *bdd_end = bdd + (ARRAY_SIZE (bdda));
1988 for (; bdd < bdd_end; bdd++)
1990 ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE);
1991 tree attrs = aarch64_get_attributes (FLAG_FP, TYPE_MODE (bdd->type_node));
1992 fndecl = aarch64_general_add_builtin (bdd->builtin_name,
1993 ftype, bdd->function_code, attrs);
1994 aarch64_builtin_decls[bdd->function_code] = fndecl;
1998 /* Initialize the backend type that supports the user-visible __mfp8
1999 type and its relative pointer type. */
2001 static void
2002 aarch64_init_fp8_types (void)
2004 aarch64_mfp8_type_node = make_unsigned_type (8);
2005 SET_TYPE_MODE (aarch64_mfp8_type_node, QImode);
2007 lang_hooks.types.register_builtin_type (aarch64_mfp8_type_node, "__mfp8");
2008 aarch64_mfp8_ptr_type_node = build_pointer_type (aarch64_mfp8_type_node);
2011 /* Initialize the backend types that support the user-visible __fp16
2012 type, also initialize a pointer to that type, to be used when
2013 forming HFAs. */
2015 static void
2016 aarch64_init_fp16_types (void)
2018 aarch64_fp16_type_node = make_node (REAL_TYPE);
2019 TYPE_PRECISION (aarch64_fp16_type_node) = 16;
2020 layout_type (aarch64_fp16_type_node);
2022 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
2023 aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
2026 /* Initialize the backend REAL_TYPE type supporting bfloat types. */
2027 static void
2028 aarch64_init_bf16_types (void)
2030 lang_hooks.types.register_builtin_type (bfloat16_type_node, "__bf16");
2031 aarch64_bf16_ptr_type_node = build_pointer_type (bfloat16_type_node);
2034 /* Pointer authentication builtins that will become NOP on legacy platform.
2035 Currently, these builtins are for internal use only (libgcc EH unwinder). */
2037 void
2038 aarch64_init_pauth_hint_builtins (void)
2040 /* Pointer Authentication builtins. */
2041 tree ftype_pointer_auth
2042 = build_function_type_list (ptr_type_node, ptr_type_node,
2043 unsigned_intDI_type_node, NULL_TREE);
2044 tree ftype_pointer_strip
2045 = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE);
2047 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716]
2048 = aarch64_general_add_builtin ("__builtin_aarch64_autia1716",
2049 ftype_pointer_auth,
2050 AARCH64_PAUTH_BUILTIN_AUTIA1716);
2051 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716]
2052 = aarch64_general_add_builtin ("__builtin_aarch64_pacia1716",
2053 ftype_pointer_auth,
2054 AARCH64_PAUTH_BUILTIN_PACIA1716);
2055 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIB1716]
2056 = aarch64_general_add_builtin ("__builtin_aarch64_autib1716",
2057 ftype_pointer_auth,
2058 AARCH64_PAUTH_BUILTIN_AUTIB1716);
2059 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIB1716]
2060 = aarch64_general_add_builtin ("__builtin_aarch64_pacib1716",
2061 ftype_pointer_auth,
2062 AARCH64_PAUTH_BUILTIN_PACIB1716);
2063 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI]
2064 = aarch64_general_add_builtin ("__builtin_aarch64_xpaclri",
2065 ftype_pointer_strip,
2066 AARCH64_PAUTH_BUILTIN_XPACLRI);
2069 /* Initialize the transactional memory extension (TME) builtins. */
2070 static void
2071 aarch64_init_tme_builtins (void)
2073 tree ftype_uint64_void
2074 = build_function_type_list (uint64_type_node, NULL);
2075 tree ftype_void_void
2076 = build_function_type_list (void_type_node, NULL);
2077 tree ftype_void_uint64
2078 = build_function_type_list (void_type_node, uint64_type_node, NULL);
2080 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TSTART]
2081 = aarch64_general_simulate_builtin ("__tstart", ftype_uint64_void,
2082 AARCH64_TME_BUILTIN_TSTART);
2083 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TTEST]
2084 = aarch64_general_simulate_builtin ("__ttest", ftype_uint64_void,
2085 AARCH64_TME_BUILTIN_TTEST);
2086 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCOMMIT]
2087 = aarch64_general_simulate_builtin ("__tcommit", ftype_void_void,
2088 AARCH64_TME_BUILTIN_TCOMMIT);
2089 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCANCEL]
2090 = aarch64_general_simulate_builtin ("__tcancel", ftype_void_uint64,
2091 AARCH64_TME_BUILTIN_TCANCEL);
2094 /* Add builtins for Random Number instructions. */
2096 static void
2097 aarch64_init_rng_builtins (void)
2099 tree unsigned_ptr_type
2100 = build_pointer_type (get_typenode_from_name (UINT64_TYPE));
2101 tree ftype
2102 = build_function_type_list (integer_type_node, unsigned_ptr_type, NULL);
2103 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDR]
2104 = aarch64_general_add_builtin ("__builtin_aarch64_rndr", ftype,
2105 AARCH64_BUILTIN_RNG_RNDR);
2106 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDRRS]
2107 = aarch64_general_add_builtin ("__builtin_aarch64_rndrrs", ftype,
2108 AARCH64_BUILTIN_RNG_RNDRRS);
2111 /* Add builtins for reading system register. */
2112 static void
2113 aarch64_init_rwsr_builtins (void)
2115 tree fntype = NULL;
2116 tree const_char_ptr_type
2117 = build_pointer_type (build_type_variant (char_type_node, true, false));
2119 #define AARCH64_INIT_RWSR_BUILTINS_DECL(F, N, T) \
2120 aarch64_builtin_decls[AARCH64_##F] \
2121 = aarch64_general_add_builtin ("__builtin_aarch64_"#N, T, AARCH64_##F);
2123 fntype
2124 = build_function_type_list (uint32_type_node, const_char_ptr_type, NULL);
2125 AARCH64_INIT_RWSR_BUILTINS_DECL (RSR, rsr, fntype);
2127 fntype
2128 = build_function_type_list (ptr_type_node, const_char_ptr_type, NULL);
2129 AARCH64_INIT_RWSR_BUILTINS_DECL (RSRP, rsrp, fntype);
2131 fntype
2132 = build_function_type_list (uint64_type_node, const_char_ptr_type, NULL);
2133 AARCH64_INIT_RWSR_BUILTINS_DECL (RSR64, rsr64, fntype);
2135 fntype
2136 = build_function_type_list (float_type_node, const_char_ptr_type, NULL);
2137 AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF, rsrf, fntype);
2139 fntype
2140 = build_function_type_list (double_type_node, const_char_ptr_type, NULL);
2141 AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF64, rsrf64, fntype);
2143 fntype
2144 = build_function_type_list (uint128_type_node, const_char_ptr_type, NULL);
2145 AARCH64_INIT_RWSR_BUILTINS_DECL (RSR128, rsr128, fntype);
2147 fntype
2148 = build_function_type_list (void_type_node, const_char_ptr_type,
2149 uint32_type_node, NULL);
2151 AARCH64_INIT_RWSR_BUILTINS_DECL (WSR, wsr, fntype);
2153 fntype
2154 = build_function_type_list (void_type_node, const_char_ptr_type,
2155 const_ptr_type_node, NULL);
2156 AARCH64_INIT_RWSR_BUILTINS_DECL (WSRP, wsrp, fntype);
2158 fntype
2159 = build_function_type_list (void_type_node, const_char_ptr_type,
2160 uint64_type_node, NULL);
2161 AARCH64_INIT_RWSR_BUILTINS_DECL (WSR64, wsr64, fntype);
2163 fntype
2164 = build_function_type_list (void_type_node, const_char_ptr_type,
2165 float_type_node, NULL);
2166 AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF, wsrf, fntype);
2168 fntype
2169 = build_function_type_list (void_type_node, const_char_ptr_type,
2170 double_type_node, NULL);
2171 AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF64, wsrf64, fntype);
2173 fntype
2174 = build_function_type_list (void_type_node, const_char_ptr_type,
2175 uint128_type_node, NULL);
2176 AARCH64_INIT_RWSR_BUILTINS_DECL (WSR128, wsr128, fntype);
2179 /* Add builtins for data and instrution prefetch. */
2180 static void
2181 aarch64_init_prefetch_builtin (void)
2183 #define AARCH64_INIT_PREFETCH_BUILTIN(INDEX, N) \
2184 aarch64_builtin_decls[INDEX] = \
2185 aarch64_general_add_builtin ("__builtin_aarch64_" N, ftype, INDEX, \
2186 prefetch_attrs)
2188 tree ftype;
2189 tree cv_argtype;
2190 tree prefetch_attrs = aarch64_get_attributes (FLAG_PREFETCH_MEMORY, DImode);
2191 cv_argtype = build_qualified_type (void_type_node, TYPE_QUAL_CONST
2192 | TYPE_QUAL_VOLATILE);
2193 cv_argtype = build_pointer_type (cv_argtype);
2195 ftype = build_function_type_list (void_type_node, cv_argtype, NULL);
2196 AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLD, "pld");
2197 AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLI, "pli");
2199 ftype = build_function_type_list (void_type_node, unsigned_type_node,
2200 unsigned_type_node, unsigned_type_node,
2201 cv_argtype, NULL);
2202 AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLDX, "pldx");
2204 ftype = build_function_type_list (void_type_node, unsigned_type_node,
2205 unsigned_type_node, cv_argtype, NULL);
2206 AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLIX, "plix");
2209 /* Initialize the memory tagging extension (MTE) builtins. */
2210 static GTY(()) struct GTY(())
2212 tree ftype;
2213 enum insn_code icode;
2214 } aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_END -
2215 AARCH64_MEMTAG_BUILTIN_START - 1];
2217 static void
2218 aarch64_init_memtag_builtins (void)
2220 tree fntype = NULL;
2222 #define AARCH64_INIT_MEMTAG_BUILTINS_DECL(F, N, I, T) \
2223 aarch64_builtin_decls[AARCH64_MEMTAG_BUILTIN_##F] \
2224 = aarch64_general_simulate_builtin ("__arm_mte_"#N, T, \
2225 AARCH64_MEMTAG_BUILTIN_##F); \
2226 aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_##F - \
2227 AARCH64_MEMTAG_BUILTIN_START - 1] = \
2228 {T, CODE_FOR_##I};
2230 fntype = build_function_type_list (ptr_type_node, ptr_type_node,
2231 uint64_type_node, NULL);
2232 AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, create_random_tag, irg, fntype);
2234 fntype = build_function_type_list (uint64_type_node, ptr_type_node,
2235 uint64_type_node, NULL);
2236 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, exclude_tag, gmi, fntype);
2238 fntype = build_function_type_list (ptrdiff_type_node, ptr_type_node,
2239 ptr_type_node, NULL);
2240 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, ptrdiff, subp, fntype);
2242 fntype = build_function_type_list (ptr_type_node, ptr_type_node,
2243 unsigned_type_node, NULL);
2244 AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, increment_tag, addg, fntype);
2246 fntype = build_function_type_list (void_type_node, ptr_type_node, NULL);
2247 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SET_TAG, set_tag, stg, fntype);
2249 fntype = build_function_type_list (ptr_type_node, ptr_type_node, NULL);
2250 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GET_TAG, get_tag, ldg, fntype);
2252 #undef AARCH64_INIT_MEMTAG_BUILTINS_DECL
2255 /* Add builtins for Load/store 64 Byte instructions. */
2257 typedef struct
2259 const char *name;
2260 unsigned int code;
2261 tree type;
2262 } ls64_builtins_data;
2264 static GTY(()) tree ls64_arm_data_t = NULL_TREE;
2266 static void
2267 aarch64_init_ls64_builtins_types (void)
2269 /* Synthesize:
2271 typedef struct {
2272 uint64_t val[8];
2273 } __arm_data512_t; */
2274 const char *tuple_type_name = "__arm_data512_t";
2275 tree node_type = get_typenode_from_name (UINT64_TYPE);
2276 tree array_type = build_array_type_nelts (node_type, 8);
2277 SET_TYPE_MODE (array_type, V8DImode);
2279 gcc_assert (TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type));
2280 gcc_assert (TYPE_ALIGN (array_type) == 64);
2282 tree field = build_decl (input_location, FIELD_DECL,
2283 get_identifier ("val"), array_type);
2285 ls64_arm_data_t = lang_hooks.types.simulate_record_decl (input_location,
2286 tuple_type_name,
2287 make_array_slice (&field, 1));
2289 gcc_assert (TYPE_MODE (ls64_arm_data_t) == V8DImode);
2290 gcc_assert (TYPE_MODE_RAW (ls64_arm_data_t) == TYPE_MODE (ls64_arm_data_t));
2291 gcc_assert (TYPE_ALIGN (ls64_arm_data_t) == 64);
2294 static void
2295 aarch64_init_ls64_builtins (void)
2297 aarch64_init_ls64_builtins_types ();
2299 ls64_builtins_data data[4] = {
2300 {"__arm_ld64b", AARCH64_LS64_BUILTIN_LD64B,
2301 build_function_type_list (ls64_arm_data_t,
2302 const_ptr_type_node, NULL_TREE)},
2303 {"__arm_st64b", AARCH64_LS64_BUILTIN_ST64B,
2304 build_function_type_list (void_type_node, ptr_type_node,
2305 ls64_arm_data_t, NULL_TREE)},
2306 {"__arm_st64bv", AARCH64_LS64_BUILTIN_ST64BV,
2307 build_function_type_list (uint64_type_node, ptr_type_node,
2308 ls64_arm_data_t, NULL_TREE)},
2309 {"__arm_st64bv0", AARCH64_LS64_BUILTIN_ST64BV0,
2310 build_function_type_list (uint64_type_node, ptr_type_node,
2311 ls64_arm_data_t, NULL_TREE)},
2314 for (size_t i = 0; i < ARRAY_SIZE (data); ++i)
2315 aarch64_builtin_decls[data[i].code]
2316 = aarch64_general_simulate_builtin (data[i].name, data[i].type,
2317 data[i].code);
2320 static void
2321 aarch64_init_data_intrinsics (void)
2323 /* These intrinsics are not fp nor they read/write memory. */
2324 tree attrs = aarch64_get_attributes (FLAG_DEFAULT, SImode);
2325 tree uint32_fntype = build_function_type_list (uint32_type_node,
2326 uint32_type_node, NULL_TREE);
2327 tree ulong_fntype = build_function_type_list (long_unsigned_type_node,
2328 long_unsigned_type_node,
2329 NULL_TREE);
2330 tree uint64_fntype = build_function_type_list (uint64_type_node,
2331 uint64_type_node, NULL_TREE);
2332 aarch64_builtin_decls[AARCH64_REV16]
2333 = aarch64_general_add_builtin ("__builtin_aarch64_rev16", uint32_fntype,
2334 AARCH64_REV16, attrs);
2335 aarch64_builtin_decls[AARCH64_REV16L]
2336 = aarch64_general_add_builtin ("__builtin_aarch64_rev16l", ulong_fntype,
2337 AARCH64_REV16L, attrs);
2338 aarch64_builtin_decls[AARCH64_REV16LL]
2339 = aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", uint64_fntype,
2340 AARCH64_REV16LL, attrs);
2341 aarch64_builtin_decls[AARCH64_RBIT]
2342 = aarch64_general_add_builtin ("__builtin_aarch64_rbit", uint32_fntype,
2343 AARCH64_RBIT, attrs);
2344 aarch64_builtin_decls[AARCH64_RBITL]
2345 = aarch64_general_add_builtin ("__builtin_aarch64_rbitl", ulong_fntype,
2346 AARCH64_RBITL, attrs);
2347 aarch64_builtin_decls[AARCH64_RBITLL]
2348 = aarch64_general_add_builtin ("__builtin_aarch64_rbitll", uint64_fntype,
2349 AARCH64_RBITLL, attrs);
2352 /* Implement #pragma GCC aarch64 "arm_acle.h". */
2353 void
2354 handle_arm_acle_h (void)
2356 aarch64_init_ls64_builtins ();
2357 aarch64_init_tme_builtins ();
2358 aarch64_init_memtag_builtins ();
2361 /* Initialize fpsr fpcr getters and setters. */
2363 static void
2364 aarch64_init_fpsr_fpcr_builtins (void)
2366 tree ftype_set
2367 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
2368 tree ftype_get
2369 = build_function_type_list (unsigned_type_node, NULL);
2371 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
2372 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr",
2373 ftype_get,
2374 AARCH64_BUILTIN_GET_FPCR);
2375 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
2376 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr",
2377 ftype_set,
2378 AARCH64_BUILTIN_SET_FPCR);
2379 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
2380 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr",
2381 ftype_get,
2382 AARCH64_BUILTIN_GET_FPSR);
2383 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
2384 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr",
2385 ftype_set,
2386 AARCH64_BUILTIN_SET_FPSR);
2388 ftype_set
2389 = build_function_type_list (void_type_node, long_long_unsigned_type_node,
2390 NULL);
2391 ftype_get
2392 = build_function_type_list (long_long_unsigned_type_node, NULL);
2394 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR64]
2395 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr64",
2396 ftype_get,
2397 AARCH64_BUILTIN_GET_FPCR64);
2398 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR64]
2399 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr64",
2400 ftype_set,
2401 AARCH64_BUILTIN_SET_FPCR64);
2402 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR64]
2403 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr64",
2404 ftype_get,
2405 AARCH64_BUILTIN_GET_FPSR64);
2406 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR64]
2407 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr64",
2408 ftype_set,
2409 AARCH64_BUILTIN_SET_FPSR64);
2412 /* Add builtins for Guarded Control Stack instructions. */
2414 static void
2415 aarch64_init_gcs_builtins (void)
2417 tree ftype;
2419 ftype = build_function_type_list (ptr_type_node, NULL);
2420 aarch64_builtin_decls[AARCH64_BUILTIN_GCSPR]
2421 = aarch64_general_add_builtin ("__builtin_aarch64_gcspr", ftype,
2422 AARCH64_BUILTIN_GCSPR);
2424 ftype = build_function_type_list (uint64_type_node, NULL);
2425 aarch64_builtin_decls[AARCH64_BUILTIN_GCSPOPM]
2426 = aarch64_general_add_builtin ("__builtin_aarch64_gcspopm", ftype,
2427 AARCH64_BUILTIN_GCSPOPM);
2429 ftype = build_function_type_list (ptr_type_node, ptr_type_node, NULL);
2430 aarch64_builtin_decls[AARCH64_BUILTIN_GCSSS]
2431 = aarch64_general_add_builtin ("__builtin_aarch64_gcsss", ftype,
2432 AARCH64_BUILTIN_GCSSS);
2435 /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */
2437 void
2438 aarch64_general_init_builtins (void)
2440 aarch64_init_fpsr_fpcr_builtins ();
2442 aarch64_init_fp8_types ();
2444 aarch64_init_fp16_types ();
2446 aarch64_init_bf16_types ();
2449 aarch64_simd_switcher simd;
2450 aarch64_init_simd_builtins ();
2453 aarch64_init_crc32_builtins ();
2454 aarch64_init_builtin_rsqrt ();
2455 aarch64_init_rng_builtins ();
2456 aarch64_init_data_intrinsics ();
2458 aarch64_init_rwsr_builtins ();
2459 aarch64_init_prefetch_builtin ();
2461 tree ftype_jcvt
2462 = build_function_type_list (intSI_type_node, double_type_node, NULL);
2463 aarch64_builtin_decls[AARCH64_JSCVT]
2464 = aarch64_general_add_builtin ("__builtin_aarch64_jcvtzs", ftype_jcvt,
2465 AARCH64_JSCVT);
2467 /* Initialize pointer authentication builtins which are backed by instructions
2468 in NOP encoding space.
2470 NOTE: these builtins are supposed to be used by libgcc unwinder only, as
2471 there is no support on return address signing under ILP32, we don't
2472 register them. */
2473 if (!TARGET_ILP32)
2474 aarch64_init_pauth_hint_builtins ();
2476 tree ftype_chkfeat
2477 = build_function_type_list (uint64_type_node, uint64_type_node, NULL);
2478 aarch64_builtin_decls[AARCH64_BUILTIN_CHKFEAT]
2479 = aarch64_general_add_builtin ("__builtin_aarch64_chkfeat", ftype_chkfeat,
2480 AARCH64_BUILTIN_CHKFEAT);
2482 aarch64_init_gcs_builtins ();
2484 if (in_lto_p)
2485 handle_arm_acle_h ();
2488 /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group. */
2489 tree
2490 aarch64_general_builtin_decl (unsigned code, bool)
2492 if (code >= AARCH64_BUILTIN_MAX)
2493 return error_mark_node;
2495 return aarch64_builtin_decls[code];
2498 /* True if we've already complained about attempts to use functions
2499 when the required extension is disabled. */
2500 static bool reported_missing_extension_p;
2502 /* True if we've already complained about attempts to use functions
2503 which require registers that are missing. */
2504 static bool reported_missing_registers_p;
2506 /* Report an error against LOCATION that the user has tried to use
2507 function FNDECL when extension EXTENSION is disabled. */
2508 static void
2509 aarch64_report_missing_extension (location_t location, tree fndecl,
2510 const char *extension)
2512 /* Avoid reporting a slew of messages for a single oversight. */
2513 if (reported_missing_extension_p)
2514 return;
2516 error_at (location, "ACLE function %qD requires ISA extension %qs",
2517 fndecl, extension);
2518 inform (location, "you can enable %qs using the command-line"
2519 " option %<-march%>, or by using the %<target%>"
2520 " attribute or pragma", extension);
2521 reported_missing_extension_p = true;
2524 /* Report an error against LOCATION that the user has tried to use
2525 function FNDECL when non-general registers are disabled. */
2526 static void
2527 aarch64_report_missing_registers (location_t location, tree fndecl)
2529 /* Avoid reporting a slew of messages for a single oversight. */
2530 if (reported_missing_registers_p)
2531 return;
2533 error_at (location,
2534 "ACLE function %qD is incompatible with the use of %qs",
2535 fndecl, "-mgeneral-regs-only");
2536 reported_missing_registers_p = true;
2539 /* Check whether the requirements in REQUIRED_EXTENSIONS are met, given that
2540 those requirements come from calling function FNDECL. Report an error
2541 against LOCATION if not. */
2542 bool
2543 aarch64_check_required_extensions (location_t location, tree fndecl,
2544 aarch64_required_extensions
2545 required_extensions)
2547 aarch64_feature_flags sm_state_extensions = 0;
2548 if (!TARGET_STREAMING)
2550 if (required_extensions.sm_off == 0)
2552 error_at (location, "ACLE function %qD can only be called when"
2553 " SME streaming mode is enabled", fndecl);
2554 return false;
2556 sm_state_extensions |= required_extensions.sm_off & ~AARCH64_FL_SM_OFF;
2558 if (!TARGET_NON_STREAMING)
2560 if (required_extensions.sm_on == 0)
2562 error_at (location, "ACLE function %qD cannot be called when"
2563 " SME streaming mode is enabled", fndecl);
2564 return false;
2566 sm_state_extensions |= required_extensions.sm_on & ~AARCH64_FL_SM_ON;
2569 if ((sm_state_extensions & ~aarch64_isa_flags) == 0)
2570 return true;
2572 auto missing_extensions = sm_state_extensions & ~aarch64_asm_isa_flags;
2573 if (missing_extensions == 0)
2575 /* All required extensions are enabled in aarch64_asm_isa_flags, so the
2576 error must be the use of general-regs-only. */
2577 aarch64_report_missing_registers (location, fndecl);
2578 return false;
2581 if (missing_extensions & AARCH64_FL_ZA_ON)
2583 error_at (location, "ACLE function %qD can only be called from"
2584 " a function that has %qs state", fndecl, "za");
2585 return false;
2588 static const struct {
2589 aarch64_feature_flags flag;
2590 const char *name;
2591 } extensions[] = {
2592 #define AARCH64_OPT_EXTENSION(EXT_NAME, IDENT, C, D, E, F) \
2593 { AARCH64_FL_##IDENT, EXT_NAME },
2594 #include "aarch64-option-extensions.def"
2597 for (unsigned int i = 0; i < ARRAY_SIZE (extensions); ++i)
2598 if (missing_extensions & extensions[i].flag)
2600 aarch64_report_missing_extension (location, fndecl, extensions[i].name);
2601 return false;
2603 gcc_unreachable ();
2606 /* Return the ISA extensions required by function CODE. */
2607 static aarch64_required_extensions
2608 aarch64_general_required_extensions (unsigned int code)
2610 using ext = aarch64_required_extensions;
2611 switch (code)
2613 case AARCH64_TME_BUILTIN_TSTART:
2614 case AARCH64_TME_BUILTIN_TCOMMIT:
2615 case AARCH64_TME_BUILTIN_TTEST:
2616 case AARCH64_TME_BUILTIN_TCANCEL:
2617 return ext::streaming_compatible (AARCH64_FL_TME);
2619 case AARCH64_LS64_BUILTIN_LD64B:
2620 case AARCH64_LS64_BUILTIN_ST64B:
2621 case AARCH64_LS64_BUILTIN_ST64BV:
2622 case AARCH64_LS64_BUILTIN_ST64BV0:
2623 return ext::streaming_compatible (AARCH64_FL_LS64);
2625 default:
2626 if (code >= AARCH64_MEMTAG_BUILTIN_START
2627 && code <= AARCH64_MEMTAG_BUILTIN_END)
2628 return ext::streaming_compatible (AARCH64_FL_MEMTAG);
2630 if (auto builtin_data = aarch64_get_pragma_builtin (code))
2631 return builtin_data->required_extensions;
2633 return ext::streaming_compatible (0);
2636 /* Checks calls to intrinsics that are defined using
2637 aarch64-simd-pragma-builtins.def. */
2638 struct aarch64_pragma_builtins_checker
2640 aarch64_pragma_builtins_checker (location_t, tree, unsigned int, tree *,
2641 const aarch64_pragma_builtins_data &);
2643 bool require_immediate_range (unsigned int, HOST_WIDE_INT,
2644 HOST_WIDE_INT);
2645 bool require_immediate_lane_index (unsigned int, unsigned int, unsigned int);
2646 bool require_immediate_lane_index (unsigned int, unsigned int);
2648 bool check ();
2650 location_t location;
2651 tree fndecl;
2652 unsigned int nargs;
2653 array_slice<tree> args;
2654 const aarch64_pragma_builtins_data &builtin_data;
2657 /* LOCATION is the location of the call; FNDECL is the FUNCTION_DECL
2658 that is being called; NARGS is the number of arguments to the call,
2659 which are in a vector starting at FIRST_ARG; and BUILTIN_DATA describes
2660 the intrinsic. */
2661 aarch64_pragma_builtins_checker::
2662 aarch64_pragma_builtins_checker (location_t location, tree fndecl,
2663 unsigned int nargs, tree *first_arg,
2664 const aarch64_pragma_builtins_data
2665 &builtin_data)
2666 : location (location), fndecl (fndecl), nargs (nargs),
2667 args (first_arg, nargs), builtin_data (builtin_data)
2671 /* Require argument ARGNO to be an integer constant expression in the
2672 range [MIN, MAX]. Return true if it was. */
2673 bool
2674 aarch64_pragma_builtins_checker::
2675 require_immediate_range (unsigned int argno, HOST_WIDE_INT min,
2676 HOST_WIDE_INT max)
2678 if (!tree_fits_shwi_p (args[argno]))
2680 report_non_ice (location, fndecl, argno);
2681 return false;
2684 HOST_WIDE_INT actual = tree_to_shwi (args[argno]);
2685 if (actual < min || actual > max)
2687 report_out_of_range (location, fndecl, argno, actual, min, max);
2688 return false;
2691 return true;
2694 /* Require argument LANE_ARGNO to be an immediate lane index into vector
2695 argument VEC_ARGNO, given that each index selects enough data to fill
2696 one element of argument ELT_ARGNO. Return true if the argument
2697 is valid. */
2698 bool
2699 aarch64_pragma_builtins_checker::
2700 require_immediate_lane_index (unsigned int lane_argno, unsigned vec_argno,
2701 unsigned int elt_argno)
2703 auto vec_mode = TYPE_MODE (TREE_TYPE (args[vec_argno]));
2704 auto elt_mode = TYPE_MODE (TREE_TYPE (args[elt_argno]));
2705 auto nunits = (aarch64_num_lanes (vec_mode)
2706 * GET_MODE_UNIT_SIZE (vec_mode)
2707 / GET_MODE_UNIT_SIZE (elt_mode));
2708 return require_immediate_range (lane_argno, 0, nunits - 1);
2711 /* Require argument LANE_ARGNO to be an immediate lane index that selects
2712 one element of argument VEC_ARGNO. Return true if the argument
2713 is valid. */
2714 bool
2715 aarch64_pragma_builtins_checker::
2716 require_immediate_lane_index (unsigned int lane_argno, unsigned int vec_argno)
2718 return require_immediate_lane_index (lane_argno, vec_argno, vec_argno);
2721 /* Check the arguments to the intrinsic call and return true if they
2722 are valid. */
2723 bool
2724 aarch64_pragma_builtins_checker::check ()
2726 auto &types = builtin_data.types;
2728 switch (builtin_data.unspec)
2730 case UNSPEC_DUP_LANE:
2731 case UNSPEC_GET_LANE:
2732 case UNSPEC_LD2_LANE:
2733 case UNSPEC_LD3_LANE:
2734 case UNSPEC_LD4_LANE:
2735 case UNSPEC_SET_LANE:
2736 case UNSPEC_ST1_LANE:
2737 case UNSPEC_ST2_LANE:
2738 case UNSPEC_ST3_LANE:
2739 case UNSPEC_ST4_LANE:
2740 return require_immediate_lane_index (nargs - 1, nargs - 2);
2742 case UNSPEC_EXT:
2743 return require_immediate_range (2, 0, types[2].nunits () - 1);
2745 case UNSPEC_FDOT_LANE_FP8:
2746 return require_immediate_lane_index (nargs - 2, nargs - 3, 0);
2748 case UNSPEC_FMLALB_FP8:
2749 case UNSPEC_FMLALT_FP8:
2750 case UNSPEC_FMLALLBB_FP8:
2751 case UNSPEC_FMLALLBT_FP8:
2752 case UNSPEC_FMLALLTB_FP8:
2753 case UNSPEC_FMLALLTT_FP8:
2754 if (builtin_data.signature == aarch64_builtin_signatures::ternary_lane)
2755 return require_immediate_lane_index (nargs - 2, nargs - 3);
2756 else if (builtin_data.signature == aarch64_builtin_signatures::ternary)
2757 return true;
2758 else
2759 gcc_unreachable ();
2761 case UNSPEC_LUTI2:
2762 case UNSPEC_LUTI4:
2764 auto vector_to_index_nunits = types[nargs - 1].nunits ();
2765 int output_mode_nunits = types[0].nunits ();
2767 int high;
2768 if (builtin_data.unspec == UNSPEC_LUTI2)
2769 high = (4 * vector_to_index_nunits / output_mode_nunits) - 1;
2770 else
2771 high = (2 * vector_to_index_nunits / output_mode_nunits) - 1;
2773 return require_immediate_range (nargs - 1, 0, high);
2776 case UNSPEC_VEC_COPY:
2777 /* & rather than && so that we report errors against both indices. */
2778 return (require_immediate_lane_index (1, 0)
2779 & require_immediate_lane_index (3, 2));
2781 default:
2782 return true;
2786 bool
2787 aarch64_general_check_builtin_call (location_t location, vec<location_t>,
2788 unsigned int code, tree fndecl,
2789 unsigned int nargs, tree *args)
2791 tree decl = aarch64_builtin_decls[code];
2792 auto required_extensions = aarch64_general_required_extensions (code);
2793 if (!aarch64_check_required_extensions (location, decl, required_extensions))
2794 return false;
2796 if (auto builtin_data = aarch64_get_pragma_builtin (code))
2798 aarch64_pragma_builtins_checker checker (location, fndecl, nargs, args,
2799 *builtin_data);
2800 return checker.check ();
2803 switch (code)
2805 case AARCH64_RSR:
2806 case AARCH64_RSRP:
2807 case AARCH64_RSR64:
2808 case AARCH64_RSRF:
2809 case AARCH64_RSRF64:
2810 case AARCH64_WSR:
2811 case AARCH64_WSRP:
2812 case AARCH64_WSR64:
2813 case AARCH64_WSRF:
2814 case AARCH64_WSRF64:
2816 tree addr = STRIP_NOPS (args[0]);
2817 if (TREE_CODE (TREE_TYPE (addr)) != POINTER_TYPE
2818 || TREE_CODE (addr) != ADDR_EXPR
2819 || TREE_CODE (TREE_OPERAND (addr, 0)) != STRING_CST)
2821 error_at (location,
2822 "first argument to %qD must be a string literal",
2823 fndecl);
2824 return false;
2826 break;
2830 return true;
2833 typedef enum
2835 SIMD_ARG_COPY_TO_REG,
2836 SIMD_ARG_CONSTANT,
2837 SIMD_ARG_LANE_INDEX,
2838 SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
2839 SIMD_ARG_LANE_PAIR_INDEX,
2840 SIMD_ARG_LANE_QUADTUP_INDEX,
2841 SIMD_ARG_STOP
2842 } builtin_simd_arg;
2845 static rtx
2846 aarch64_simd_expand_args (rtx target, int icode, int have_retval,
2847 tree exp, builtin_simd_arg *args,
2848 machine_mode builtin_mode)
2850 rtx pat;
2851 rtx op[SIMD_MAX_BUILTIN_ARGS + 1]; /* First element for result operand. */
2852 int opc = 0;
2854 if (have_retval)
2856 machine_mode tmode = insn_data[icode].operand[0].mode;
2857 if (!target
2858 || GET_MODE (target) != tmode
2859 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
2860 target = gen_reg_rtx (tmode);
2861 op[opc++] = target;
2864 for (;;)
2866 builtin_simd_arg thisarg = args[opc - have_retval];
2868 if (thisarg == SIMD_ARG_STOP)
2869 break;
2870 else
2872 tree arg = CALL_EXPR_ARG (exp, opc - have_retval);
2873 machine_mode mode = insn_data[icode].operand[opc].mode;
2874 op[opc] = expand_normal (arg);
2876 switch (thisarg)
2878 case SIMD_ARG_COPY_TO_REG:
2879 if (POINTER_TYPE_P (TREE_TYPE (arg)))
2880 op[opc] = convert_memory_address (Pmode, op[opc]);
2881 /*gcc_assert (GET_MODE (op[opc]) == mode); */
2882 if (!(*insn_data[icode].operand[opc].predicate)
2883 (op[opc], mode))
2884 op[opc] = copy_to_mode_reg (mode, op[opc]);
2885 break;
2887 case SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX:
2888 gcc_assert (opc > 1);
2889 if (CONST_INT_P (op[opc]))
2891 unsigned int nunits
2892 = GET_MODE_NUNITS (builtin_mode).to_constant ();
2893 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
2894 /* Keep to GCC-vector-extension lane indices in the RTL. */
2895 op[opc] = aarch64_endian_lane_rtx (builtin_mode,
2896 INTVAL (op[opc]));
2898 goto constant_arg;
2900 case SIMD_ARG_LANE_INDEX:
2901 /* Must be a previous operand into which this is an index. */
2902 gcc_assert (opc > 0);
2903 if (CONST_INT_P (op[opc]))
2905 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
2906 unsigned int nunits
2907 = GET_MODE_NUNITS (vmode).to_constant ();
2908 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
2909 /* Keep to GCC-vector-extension lane indices in the RTL. */
2910 op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
2912 /* If the lane index isn't a constant then error out. */
2913 goto constant_arg;
2915 case SIMD_ARG_LANE_PAIR_INDEX:
2916 /* Must be a previous operand into which this is an index and
2917 index is restricted to nunits / 2. */
2918 gcc_assert (opc > 0);
2919 if (CONST_INT_P (op[opc]))
2921 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
2922 unsigned int nunits
2923 = GET_MODE_NUNITS (vmode).to_constant ();
2924 aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp);
2925 /* Keep to GCC-vector-extension lane indices in the RTL. */
2926 int lane = INTVAL (op[opc]);
2927 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane),
2928 SImode);
2930 /* If the lane index isn't a constant then error out. */
2931 goto constant_arg;
2932 case SIMD_ARG_LANE_QUADTUP_INDEX:
2933 /* Must be a previous operand into which this is an index and
2934 index is restricted to nunits / 4. */
2935 gcc_assert (opc > 0);
2936 if (CONST_INT_P (op[opc]))
2938 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
2939 unsigned int nunits
2940 = GET_MODE_NUNITS (vmode).to_constant ();
2941 aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp);
2942 /* Keep to GCC-vector-extension lane indices in the RTL. */
2943 int lane = INTVAL (op[opc]);
2944 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane),
2945 SImode);
2947 /* If the lane index isn't a constant then error out. */
2948 goto constant_arg;
2949 case SIMD_ARG_CONSTANT:
2950 constant_arg:
2951 if (!(*insn_data[icode].operand[opc].predicate)
2952 (op[opc], mode))
2954 error_at (EXPR_LOCATION (exp),
2955 "argument %d must be a constant immediate",
2956 opc + 1 - have_retval);
2957 return const0_rtx;
2959 break;
2961 case SIMD_ARG_STOP:
2962 gcc_unreachable ();
2965 opc++;
2969 switch (opc)
2971 case 1:
2972 pat = GEN_FCN (icode) (op[0]);
2973 break;
2975 case 2:
2976 pat = GEN_FCN (icode) (op[0], op[1]);
2977 break;
2979 case 3:
2980 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
2981 break;
2983 case 4:
2984 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
2985 break;
2987 case 5:
2988 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
2989 break;
2991 case 6:
2992 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
2993 break;
2995 default:
2996 gcc_unreachable ();
2999 if (!pat)
3000 return NULL_RTX;
3002 emit_insn (pat);
3004 return target;
3007 /* Expand an AArch64 AdvSIMD builtin(intrinsic). */
3009 aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
3011 if (fcode == AARCH64_SIMD_BUILTIN_LANE_CHECK)
3013 rtx totalsize = expand_normal (CALL_EXPR_ARG (exp, 0));
3014 rtx elementsize = expand_normal (CALL_EXPR_ARG (exp, 1));
3015 if (CONST_INT_P (totalsize) && CONST_INT_P (elementsize)
3016 && UINTVAL (elementsize) != 0
3017 && UINTVAL (totalsize) != 0)
3019 rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 2));
3020 if (CONST_INT_P (lane_idx))
3021 aarch64_simd_lane_bounds (lane_idx, 0,
3022 UINTVAL (totalsize)
3023 / UINTVAL (elementsize),
3024 exp);
3025 else
3026 error_at (EXPR_LOCATION (exp),
3027 "lane index must be a constant immediate");
3029 else
3030 error_at (EXPR_LOCATION (exp),
3031 "total size and element size must be a nonzero "
3032 "constant immediate");
3033 /* Don't generate any RTL. */
3034 return const0_rtx;
3036 aarch64_simd_builtin_datum *d =
3037 &aarch64_simd_builtin_data[fcode - AARCH64_SIMD_PATTERN_START];
3038 enum insn_code icode = d->code;
3039 builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS + 1];
3040 int num_args = insn_data[d->code].n_operands;
3041 int is_void = 0;
3042 int k;
3044 is_void = !!(d->qualifiers[0] & qualifier_void);
3046 num_args += is_void;
3048 for (k = 1; k < num_args; k++)
3050 /* We have four arrays of data, each indexed in a different fashion.
3051 qualifiers - element 0 always describes the function return type.
3052 operands - element 0 is either the operand for return value (if
3053 the function has a non-void return type) or the operand for the
3054 first argument.
3055 expr_args - element 0 always holds the first argument.
3056 args - element 0 is always used for the return type. */
3057 int qualifiers_k = k;
3058 int operands_k = k - is_void;
3059 int expr_args_k = k - 1;
3061 if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
3062 args[k] = SIMD_ARG_LANE_INDEX;
3063 else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
3064 args[k] = SIMD_ARG_LANE_PAIR_INDEX;
3065 else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index)
3066 args[k] = SIMD_ARG_LANE_QUADTUP_INDEX;
3067 else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
3068 args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
3069 else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
3070 args[k] = SIMD_ARG_CONSTANT;
3071 else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
3073 rtx arg
3074 = expand_normal (CALL_EXPR_ARG (exp,
3075 (expr_args_k)));
3076 /* Handle constants only if the predicate allows it. */
3077 bool op_const_int_p =
3078 (CONST_INT_P (arg)
3079 && (*insn_data[icode].operand[operands_k].predicate)
3080 (arg, insn_data[icode].operand[operands_k].mode));
3081 args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG;
3083 else
3084 args[k] = SIMD_ARG_COPY_TO_REG;
3087 args[k] = SIMD_ARG_STOP;
3089 /* The interface to aarch64_simd_expand_args expects a 0 if
3090 the function is void, and a 1 if it is not. */
3091 return aarch64_simd_expand_args
3092 (target, icode, !is_void, exp, &args[1], d->mode);
3096 aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target)
3098 rtx pat;
3099 aarch64_crc_builtin_datum *d
3100 = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)];
3101 enum insn_code icode = d->icode;
3102 tree arg0 = CALL_EXPR_ARG (exp, 0);
3103 tree arg1 = CALL_EXPR_ARG (exp, 1);
3104 rtx op0 = expand_normal (arg0);
3105 rtx op1 = expand_normal (arg1);
3106 machine_mode tmode = insn_data[icode].operand[0].mode;
3107 machine_mode mode0 = insn_data[icode].operand[1].mode;
3108 machine_mode mode1 = insn_data[icode].operand[2].mode;
3110 if (! target
3111 || GET_MODE (target) != tmode
3112 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
3113 target = gen_reg_rtx (tmode);
3115 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
3116 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
3118 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
3119 op0 = copy_to_mode_reg (mode0, op0);
3120 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
3121 op1 = copy_to_mode_reg (mode1, op1);
3123 pat = GEN_FCN (icode) (target, op0, op1);
3124 if (!pat)
3125 return NULL_RTX;
3127 emit_insn (pat);
3128 return target;
3131 /* Function to expand reciprocal square root builtins. */
3133 static rtx
3134 aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target)
3136 tree arg0 = CALL_EXPR_ARG (exp, 0);
3137 rtx op0 = expand_normal (arg0);
3139 rtx (*gen) (rtx, rtx);
3141 switch (fcode)
3143 case AARCH64_BUILTIN_RSQRT_DF:
3144 gen = gen_rsqrtdf2;
3145 break;
3146 case AARCH64_BUILTIN_RSQRT_SF:
3147 gen = gen_rsqrtsf2;
3148 break;
3149 case AARCH64_BUILTIN_RSQRT_V2DF:
3150 gen = gen_rsqrtv2df2;
3151 break;
3152 case AARCH64_BUILTIN_RSQRT_V2SF:
3153 gen = gen_rsqrtv2sf2;
3154 break;
3155 case AARCH64_BUILTIN_RSQRT_V4SF:
3156 gen = gen_rsqrtv4sf2;
3157 break;
3158 default: gcc_unreachable ();
3161 if (!target)
3162 target = gen_reg_rtx (GET_MODE (op0));
3164 emit_insn (gen (target, op0));
3166 return target;
3169 /* Expand a FCMLA lane expression EXP with code FCODE and
3170 result going to TARGET if that is convenient. */
3173 aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
3175 int bcode = fcode - AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE - 1;
3176 aarch64_fcmla_laneq_builtin_datum* d
3177 = &aarch64_fcmla_lane_builtin_data[bcode];
3178 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
3179 rtx op0 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 0)));
3180 rtx op1 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 1)));
3181 rtx op2 = force_reg (quadmode, expand_normal (CALL_EXPR_ARG (exp, 2)));
3182 tree tmp = CALL_EXPR_ARG (exp, 3);
3183 rtx lane_idx = expand_expr (tmp, NULL_RTX, VOIDmode, EXPAND_INITIALIZER);
3185 /* Validate that the lane index is a constant. */
3186 if (!CONST_INT_P (lane_idx))
3188 error_at (EXPR_LOCATION (exp),
3189 "argument %d must be a constant immediate", 4);
3190 return const0_rtx;
3193 /* Validate that the index is within the expected range. */
3194 int nunits = GET_MODE_NUNITS (quadmode).to_constant ();
3195 aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp);
3197 /* Generate the correct register and mode. */
3198 int lane = INTVAL (lane_idx);
3200 if (lane < nunits / 4)
3201 op2 = force_lowpart_subreg (d->mode, op2, quadmode);
3202 else
3204 /* Select the upper 64 bits, either a V2SF or V4HF, this however
3205 is quite messy, as the operation required even though simple
3206 doesn't have a simple RTL pattern, and seems it's quite hard to
3207 define using a single RTL pattern. The target generic version
3208 gen_highpart_mode generates code that isn't optimal. */
3209 rtx temp1 = gen_reg_rtx (d->mode);
3210 rtx temp2 = gen_reg_rtx (DImode);
3211 temp1 = force_lowpart_subreg (d->mode, op2, quadmode);
3212 temp1 = force_subreg (V2DImode, temp1, d->mode, 0);
3213 if (BYTES_BIG_ENDIAN)
3214 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx));
3215 else
3216 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const1_rtx));
3217 op2 = force_subreg (d->mode, temp2, GET_MODE (temp2), 0);
3219 /* And recalculate the index. */
3220 lane -= nunits / 4;
3223 /* Keep to GCC-vector-extension lane indices in the RTL, only nunits / 4
3224 (max nunits in range check) are valid. Which means only 0-1, so we
3225 only need to know the order in a V2mode. */
3226 lane_idx = aarch64_endian_lane_rtx (V2DImode, lane);
3228 if (!target
3229 || !REG_P (target)
3230 || GET_MODE (target) != d->mode)
3231 target = gen_reg_rtx (d->mode);
3233 rtx pat = NULL_RTX;
3235 if (d->lane)
3236 pat = GEN_FCN (d->icode) (target, op0, op1, op2, lane_idx);
3237 else
3238 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
3240 if (!pat)
3241 return NULL_RTX;
3243 emit_insn (pat);
3244 return target;
3247 /* Function to expand an expression EXP which calls one of the Transactional
3248 Memory Extension (TME) builtins FCODE with the result going to TARGET. */
3249 static rtx
3250 aarch64_expand_builtin_tme (int fcode, tree exp, rtx target)
3252 switch (fcode)
3254 case AARCH64_TME_BUILTIN_TSTART:
3255 target = gen_reg_rtx (DImode);
3256 emit_insn (GEN_FCN (CODE_FOR_tstart) (target));
3257 break;
3259 case AARCH64_TME_BUILTIN_TTEST:
3260 target = gen_reg_rtx (DImode);
3261 emit_insn (GEN_FCN (CODE_FOR_ttest) (target));
3262 break;
3264 case AARCH64_TME_BUILTIN_TCOMMIT:
3265 emit_insn (GEN_FCN (CODE_FOR_tcommit) ());
3266 break;
3268 case AARCH64_TME_BUILTIN_TCANCEL:
3270 tree arg0 = CALL_EXPR_ARG (exp, 0);
3271 rtx op0 = expand_normal (arg0);
3272 if (CONST_INT_P (op0) && UINTVAL (op0) <= 65536)
3273 emit_insn (GEN_FCN (CODE_FOR_tcancel) (op0));
3274 else
3276 error_at (EXPR_LOCATION (exp),
3277 "argument must be a 16-bit constant immediate");
3278 return const0_rtx;
3281 break;
3283 default :
3284 gcc_unreachable ();
3286 return target;
3289 /* Function to expand an expression EXP which calls one of the Load/Store
3290 64 Byte extension (LS64) builtins FCODE with the result going to TARGET. */
3291 static rtx
3292 aarch64_expand_builtin_ls64 (int fcode, tree exp, rtx target)
3294 expand_operand ops[3];
3296 switch (fcode)
3298 case AARCH64_LS64_BUILTIN_LD64B:
3300 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
3301 create_output_operand (&ops[0], target, V8DImode);
3302 create_input_operand (&ops[1], op0, DImode);
3303 expand_insn (CODE_FOR_ld64b, 2, ops);
3304 return ops[0].value;
3306 case AARCH64_LS64_BUILTIN_ST64B:
3308 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
3309 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
3310 create_input_operand (&ops[0], op0, DImode);
3311 create_input_operand (&ops[1], op1, V8DImode);
3312 expand_insn (CODE_FOR_st64b, 2, ops);
3313 return const0_rtx;
3315 case AARCH64_LS64_BUILTIN_ST64BV:
3317 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
3318 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
3319 create_output_operand (&ops[0], target, DImode);
3320 create_input_operand (&ops[1], op0, DImode);
3321 create_input_operand (&ops[2], op1, V8DImode);
3322 expand_insn (CODE_FOR_st64bv, 3, ops);
3323 return ops[0].value;
3325 case AARCH64_LS64_BUILTIN_ST64BV0:
3327 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
3328 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
3329 create_output_operand (&ops[0], target, DImode);
3330 create_input_operand (&ops[1], op0, DImode);
3331 create_input_operand (&ops[2], op1, V8DImode);
3332 expand_insn (CODE_FOR_st64bv0, 3, ops);
3333 return ops[0].value;
3337 gcc_unreachable ();
3340 /* Expand a random number builtin EXP with code FCODE, putting the result
3341 int TARGET. If IGNORE is true the return value is ignored. */
3344 aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
3346 rtx pat;
3347 enum insn_code icode;
3348 if (fcode == AARCH64_BUILTIN_RNG_RNDR)
3349 icode = CODE_FOR_aarch64_rndr;
3350 else if (fcode == AARCH64_BUILTIN_RNG_RNDRRS)
3351 icode = CODE_FOR_aarch64_rndrrs;
3352 else
3353 gcc_unreachable ();
3355 rtx rand = gen_reg_rtx (DImode);
3356 pat = GEN_FCN (icode) (rand);
3357 if (!pat)
3358 return NULL_RTX;
3360 tree arg0 = CALL_EXPR_ARG (exp, 0);
3361 rtx res_addr = expand_normal (arg0);
3362 res_addr = convert_memory_address (Pmode, res_addr);
3363 rtx res_mem = gen_rtx_MEM (DImode, res_addr);
3364 emit_insn (pat);
3365 emit_move_insn (res_mem, rand);
3366 /* If the status result is unused don't generate the CSET code. */
3367 if (ignore)
3368 return target;
3370 rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
3371 rtx cmp_rtx = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
3372 emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg));
3373 return target;
3376 /* Expand the read/write system register builtin EXPs. */
3378 aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode)
3380 tree arg0, arg1;
3381 rtx const_str, input_val, subreg;
3382 enum machine_mode mode;
3383 enum insn_code icode;
3384 class expand_operand ops[2];
3386 arg0 = CALL_EXPR_ARG (exp, 0);
3388 bool write_op = (fcode == AARCH64_WSR
3389 || fcode == AARCH64_WSRP
3390 || fcode == AARCH64_WSR64
3391 || fcode == AARCH64_WSRF
3392 || fcode == AARCH64_WSRF64
3393 || fcode == AARCH64_WSR128);
3395 bool op128 = (fcode == AARCH64_RSR128 || fcode == AARCH64_WSR128);
3396 enum machine_mode sysreg_mode = op128 ? TImode : DImode;
3398 if (op128 && !TARGET_D128)
3400 error_at (EXPR_LOCATION (exp), "128-bit system register support requires"
3401 " the %<d128%> extension");
3402 return const0_rtx;
3405 /* Argument 0 (system register name) must be a string literal. */
3406 gcc_assert (TREE_CODE (arg0) == ADDR_EXPR
3407 && TREE_CODE (TREE_TYPE (arg0)) == POINTER_TYPE
3408 && TREE_CODE (TREE_OPERAND (arg0, 0)) == STRING_CST);
3410 const char *name_input = TREE_STRING_POINTER (TREE_OPERAND (arg0, 0));
3412 tree len_tree = c_strlen (arg0, 1);
3413 if (len_tree == NULL_TREE)
3415 error_at (EXPR_LOCATION (exp), "invalid system register name provided");
3416 return const0_rtx;
3419 size_t len = TREE_INT_CST_LOW (len_tree);
3420 char *sysreg_name = xstrdup (name_input);
3422 for (unsigned pos = 0; pos <= len; pos++)
3423 sysreg_name[pos] = TOLOWER (sysreg_name[pos]);
3425 const char* name_output = aarch64_retrieve_sysreg ((const char *) sysreg_name,
3426 write_op, op128);
3427 if (name_output == NULL)
3429 error_at (EXPR_LOCATION (exp), "invalid system register name %qs",
3430 sysreg_name);
3431 return const0_rtx;
3434 /* Assign the string corresponding to the system register name to an RTX. */
3435 const_str = rtx_alloc (CONST_STRING);
3436 PUT_CODE (const_str, CONST_STRING);
3437 XSTR (const_str, 0) = ggc_strdup (name_output);
3439 /* Set up expander operands and call instruction expansion. */
3440 if (write_op)
3442 arg1 = CALL_EXPR_ARG (exp, 1);
3443 mode = TYPE_MODE (TREE_TYPE (arg1));
3444 input_val = copy_to_mode_reg (mode, expand_normal (arg1));
3446 icode = (op128 ? CODE_FOR_aarch64_write_sysregti
3447 : CODE_FOR_aarch64_write_sysregdi);
3449 switch (fcode)
3451 case AARCH64_WSR:
3452 case AARCH64_WSRP:
3453 case AARCH64_WSR64:
3454 case AARCH64_WSRF64:
3455 case AARCH64_WSR128:
3456 subreg = force_lowpart_subreg (sysreg_mode, input_val, mode);
3457 break;
3458 case AARCH64_WSRF:
3459 subreg = gen_lowpart_SUBREG (SImode, input_val);
3460 subreg = gen_lowpart_SUBREG (DImode, subreg);
3461 break;
3464 create_fixed_operand (&ops[0], const_str);
3465 create_input_operand (&ops[1], subreg, sysreg_mode);
3466 expand_insn (icode, 2, ops);
3468 return target;
3471 /* Read operations are implied by !write_op. */
3472 gcc_assert (call_expr_nargs (exp) == 1);
3474 icode = (op128 ? CODE_FOR_aarch64_read_sysregti
3475 : CODE_FOR_aarch64_read_sysregdi);
3477 /* Emit the initial read_sysregdi rtx. */
3478 create_output_operand (&ops[0], target, sysreg_mode);
3479 create_fixed_operand (&ops[1], const_str);
3480 expand_insn (icode, 2, ops);
3481 target = ops[0].value;
3483 /* Do any necessary post-processing on the result. */
3484 switch (fcode)
3486 case AARCH64_RSR:
3487 case AARCH64_RSRP:
3488 case AARCH64_RSR64:
3489 case AARCH64_RSRF64:
3490 case AARCH64_RSR128:
3491 return force_lowpart_subreg (TYPE_MODE (TREE_TYPE (exp)),
3492 target, sysreg_mode);
3493 case AARCH64_RSRF:
3494 subreg = gen_lowpart_SUBREG (SImode, target);
3495 return gen_lowpart_SUBREG (SFmode, subreg);
3496 default:
3497 gcc_unreachable ();
3501 /* Ensure argument ARGNO in EXP represents a const-type argument in the range
3502 [MINVAL, MAXVAL). */
3503 static HOST_WIDE_INT
3504 require_const_argument (tree exp, unsigned int argno, HOST_WIDE_INT minval,
3505 HOST_WIDE_INT maxval)
3507 maxval--;
3508 tree arg = CALL_EXPR_ARG (exp, argno);
3509 if (TREE_CODE (arg) != INTEGER_CST)
3510 error_at (EXPR_LOCATION (exp), "Constant-type argument expected");
3512 auto argval = wi::to_widest (arg);
3514 if (argval < minval || argval > maxval)
3515 error_at (EXPR_LOCATION (exp),
3516 "argument %d must be a constant immediate "
3517 "in range [%wd,%wd]", argno + 1, minval, maxval);
3519 HOST_WIDE_INT retval = argval.to_shwi ();
3520 return retval;
3524 /* Expand a prefetch builtin EXP. */
3525 void
3526 aarch64_expand_prefetch_builtin (tree exp, int fcode)
3528 int kind_id = -1;
3529 int level_id = -1;
3530 int rettn_id = -1;
3531 char prfop[11];
3532 class expand_operand ops[2];
3534 static const char *kind_s[] = {"PLD", "PST", "PLI"};
3535 static const char *level_s[] = {"L1", "L2", "L3", "SLC"};
3536 static const char *rettn_s[] = {"KEEP", "STRM"};
3538 /* Each of the four prefetch builtins takes a different number of arguments,
3539 but proceeds to call the PRFM insn which requires 4 pieces of information
3540 to be fully defined. Where one of these takes less than 4 arguments, set
3541 sensible defaults. */
3542 switch (fcode)
3544 case AARCH64_PLDX:
3545 break;
3546 case AARCH64_PLIX:
3547 kind_id = 2;
3548 break;
3549 case AARCH64_PLI:
3550 case AARCH64_PLD:
3551 kind_id = (fcode == AARCH64_PLD) ? 0 : 2;
3552 level_id = 0;
3553 rettn_id = 0;
3554 break;
3555 default:
3556 gcc_unreachable ();
3559 /* Any -1 id variable is to be user-supplied. Here we fill these in and run
3560 bounds checks on them. "PLI" is used only implicitly by AARCH64_PLI &
3561 AARCH64_PLIX, never explicitly. */
3562 int argno = 0;
3563 if (kind_id < 0)
3564 kind_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE (kind_s) - 1);
3565 if (level_id < 0)
3566 level_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE (level_s));
3567 if (rettn_id < 0)
3568 rettn_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE (rettn_s));
3569 rtx address = expand_expr (CALL_EXPR_ARG (exp, argno), NULL_RTX, Pmode,
3570 EXPAND_NORMAL);
3572 if (seen_error ())
3573 return;
3575 sprintf (prfop, "%s%s%s", kind_s[kind_id],
3576 level_s[level_id],
3577 rettn_s[rettn_id]);
3579 rtx const_str = rtx_alloc (CONST_STRING);
3580 PUT_CODE (const_str, CONST_STRING);
3581 XSTR (const_str, 0) = ggc_strdup (prfop);
3583 create_fixed_operand (&ops[0], const_str);
3584 create_address_operand (&ops[1], address);
3585 maybe_expand_insn (CODE_FOR_aarch64_pldx, 2, ops);
3588 /* Expand an expression EXP that calls a MEMTAG built-in FCODE
3589 with result going to TARGET. */
3590 static rtx
3591 aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target)
3593 if (TARGET_ILP32)
3595 error ("Memory Tagging Extension does not support %<-mabi=ilp32%>");
3596 return const0_rtx;
3599 rtx pat = NULL;
3600 enum insn_code icode = aarch64_memtag_builtin_data[fcode -
3601 AARCH64_MEMTAG_BUILTIN_START - 1].icode;
3603 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
3604 machine_mode mode0 = GET_MODE (op0);
3605 op0 = force_reg (mode0 == VOIDmode ? DImode : mode0, op0);
3606 op0 = convert_to_mode (DImode, op0, true);
3608 switch (fcode)
3610 case AARCH64_MEMTAG_BUILTIN_IRG:
3611 case AARCH64_MEMTAG_BUILTIN_GMI:
3612 case AARCH64_MEMTAG_BUILTIN_SUBP:
3613 case AARCH64_MEMTAG_BUILTIN_INC_TAG:
3615 if (! target
3616 || GET_MODE (target) != DImode
3617 || ! (*insn_data[icode].operand[0].predicate) (target, DImode))
3618 target = gen_reg_rtx (DImode);
3620 if (fcode == AARCH64_MEMTAG_BUILTIN_INC_TAG)
3622 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
3624 if ((*insn_data[icode].operand[3].predicate) (op1, QImode))
3626 pat = GEN_FCN (icode) (target, op0, const0_rtx, op1);
3627 break;
3629 error_at (EXPR_LOCATION (exp),
3630 "argument %d must be a constant immediate "
3631 "in range [0,15]", 2);
3632 return const0_rtx;
3634 else
3636 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
3637 machine_mode mode1 = GET_MODE (op1);
3638 op1 = force_reg (mode1 == VOIDmode ? DImode : mode1, op1);
3639 op1 = convert_to_mode (DImode, op1, true);
3640 pat = GEN_FCN (icode) (target, op0, op1);
3642 break;
3644 case AARCH64_MEMTAG_BUILTIN_GET_TAG:
3645 target = op0;
3646 pat = GEN_FCN (icode) (target, op0, const0_rtx);
3647 break;
3648 case AARCH64_MEMTAG_BUILTIN_SET_TAG:
3649 pat = GEN_FCN (icode) (op0, op0, const0_rtx);
3650 break;
3651 default:
3652 gcc_unreachable();
3655 if (!pat)
3656 return NULL_RTX;
3658 emit_insn (pat);
3659 return target;
3662 /* Function to expand an expression EXP which calls one of the ACLE Data
3663 Intrinsic builtins FCODE with the result going to TARGET. */
3664 static rtx
3665 aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target)
3667 expand_operand ops[2];
3668 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
3669 create_output_operand (&ops[0], target, mode);
3670 create_input_operand (&ops[1], expand_normal (CALL_EXPR_ARG (exp, 0)), mode);
3671 enum insn_code icode;
3673 switch (fcode)
3675 case AARCH64_REV16:
3676 case AARCH64_REV16L:
3677 case AARCH64_REV16LL:
3678 icode = code_for_aarch64_rev16 (mode);
3679 break;
3680 case AARCH64_RBIT:
3681 case AARCH64_RBITL:
3682 case AARCH64_RBITLL:
3683 icode = code_for_aarch64_rbit (mode);
3684 break;
3685 default:
3686 gcc_unreachable ();
3689 expand_insn (icode, 2, ops);
3690 return ops[0].value;
3693 /* Convert ptr_mode value OP to a Pmode value (for ILP32). */
3694 static void
3695 aarch64_convert_address (expand_operand *op)
3697 op->value = convert_memory_address (Pmode, op->value);
3698 op->mode = Pmode;
3701 /* Dereference the pointer in OP, turning it into a memory reference to
3702 NELTS instances of MEM_MODE. */
3703 static void
3704 aarch64_dereference_pointer (expand_operand *op, machine_mode mem_mode,
3705 unsigned int nelts = 1)
3707 if (nelts == 1)
3709 op->value = gen_rtx_MEM (mem_mode, op->value);
3710 op->mode = mem_mode;
3712 else
3714 op->value = gen_rtx_MEM (BLKmode, op->value);
3715 op->mode = BLKmode;
3716 set_mem_size (op->value, GET_MODE_SIZE (mem_mode) * nelts);
3720 /* OP contains an integer index into a vector or tuple of mode VEC_MODE.
3721 Convert OP from an architectural lane number to a GCC lane number. */
3722 static void
3723 aarch64_canonicalize_lane (expand_operand *op, machine_mode vec_mode)
3725 auto nunits = aarch64_num_lanes (vec_mode);
3726 op->value = gen_int_mode (ENDIAN_LANE_N (nunits, UINTVAL (op->value)),
3727 SImode);
3730 /* OP contains an integer index into a vector or tuple of mode VEC_MODE.
3731 Convert OP from an architectural lane number to a vec_merge mask. */
3732 static void
3733 aarch64_convert_to_lane_mask (expand_operand *op, machine_mode vec_mode)
3735 auto nunits = aarch64_num_lanes (vec_mode);
3736 create_integer_operand (op, 1 << ENDIAN_LANE_N (nunits, INTVAL (op->value)));
3739 /* If OP is a 128-bit vector, convert it to the equivalent 64-bit vector.
3740 Do nothing otherwise. */
3741 static void
3742 aarch64_convert_to_v64 (expand_operand *op)
3744 if (known_eq (GET_MODE_BITSIZE (op->mode), 128u))
3746 op->mode = aarch64_v64_mode (GET_MODE_INNER (op->mode)).require ();
3747 op->value = gen_lowpart (op->mode, op->value);
3751 /* If OP is a 64-bit (half-register) vector or a structure of 64-bit vectors,
3752 pack its contents into the smallest associated full-register mode,
3753 padding with zeros if necessary. Return true if padding was used. */
3754 static bool
3755 aarch64_pack_into_v128s (expand_operand *op)
3757 bool padded = false;
3758 unsigned int nregs = targetm.hard_regno_nregs (V0_REGNUM, op->mode);
3760 /* Do nothing if the operand is already a full-register mode. */
3761 if (known_eq (nregs * UNITS_PER_VREG, GET_MODE_SIZE (op->mode)))
3762 return padded;
3764 auto elt_mode = GET_MODE_INNER (op->mode);
3765 auto v64_mode = aarch64_v64_mode (elt_mode).require ();
3766 auto v128_mode = aarch64_v128_mode (elt_mode).require ();
3768 auto new_mode = v128_mode;
3769 if (nregs > 2)
3770 new_mode = aarch64_advsimd_vector_array_mode (v128_mode, CEIL (nregs, 2))
3771 .require ();
3773 /* Get enough V64_MODE inputs to fill NEW_MDOE, which is made up of a
3774 whole number of V128_MODEs. */
3775 auto_vec<rtx, 4> inputs;
3776 for (unsigned int i = 0; i < nregs; ++i)
3778 rtx input = simplify_gen_subreg (v64_mode, op->value, op->mode,
3779 i * GET_MODE_SIZE (v64_mode));
3780 inputs.quick_push (input);
3782 if (nregs & 1)
3784 inputs.quick_push (CONST0_RTX (v64_mode));
3785 padded = true;
3788 /* Create a NEW_MODE register and build it up from individual V128_MODEs. */
3789 op->mode = new_mode;
3790 op->value = gen_reg_rtx (new_mode);
3791 for (unsigned int i = 0; i < inputs.length (); i += 2)
3793 rtx result = gen_rtx_SUBREG (v128_mode, op->value,
3794 i * GET_MODE_SIZE (v64_mode));
3795 emit_insn (gen_aarch64_combine (v64_mode, result,
3796 inputs[i], inputs[i + 1]));
3798 return padded;
3801 /* UNSPEC is a high unspec, indicated by "2" in mnemonics and "_high" in
3802 intrinsic names. Return the equivalent low unspec. */
3803 static int
3804 aarch64_get_low_unspec (int unspec)
3806 switch (unspec)
3808 case UNSPEC_FCVTN2_FP8:
3809 return UNSPEC_FCVTN_FP8;
3810 case UNSPEC_F1CVTL2_FP8:
3811 return UNSPEC_F1CVTL_FP8;
3812 case UNSPEC_F2CVTL2_FP8:
3813 return UNSPEC_F2CVTL_FP8;
3814 default:
3815 gcc_unreachable ();
3819 /* OPS contains the operands for one of the permute pair functions vtrn,
3820 vuzp or vzip. Expand the call, given that PERMUTE1 is the unspec for
3821 the first permute and PERMUTE2 is the unspec for the second permute. */
3822 static rtx
3823 aarch64_expand_permute_pair (vec<expand_operand> &ops, int permute1,
3824 int permute2)
3826 rtx op0 = force_reg (ops[1].mode, ops[1].value);
3827 rtx op1 = force_reg (ops[2].mode, ops[2].value);
3828 rtx target = gen_reg_rtx (ops[0].mode);
3829 rtx target0 = gen_rtx_SUBREG (ops[1].mode, target, 0);
3830 rtx target1 = gen_rtx_SUBREG (ops[1].mode, target,
3831 GET_MODE_SIZE (ops[1].mode));
3832 emit_insn (gen_aarch64 (permute1, ops[1].mode, target0, op0, op1));
3833 emit_insn (gen_aarch64 (permute2, ops[1].mode, target1, op0, op1));
3834 return target;
3837 /* Emit a TBL or TBX instruction with inputs INPUTS and a result of mode
3838 MODE. Return the result of the instruction.
3840 UNSPEC is either UNSPEC_TBL or UNSPEC_TBX. The inputs must already be in
3841 registers. */
3842 static rtx
3843 aarch64_expand_tbl_tbx (vec<rtx> &inputs, int unspec, machine_mode mode)
3845 rtx result = gen_reg_rtx (mode);
3846 rtvec vec = gen_rtvec_v (inputs.length (), inputs.address ());
3847 emit_insn (gen_rtx_SET (result, gen_rtx_UNSPEC (mode, vec, unspec)));
3848 return result;
3851 /* Emit a TBL or TBX intrinsic with the operands given by OPS. Return the
3852 result of the intrinsic.
3854 UNSPEC is either UNSPEC_TBL or UNSPEC_TBX. */
3855 static rtx
3856 aarch64_expand_tbl_tbx (vec<expand_operand> &ops, int unspec)
3858 for (unsigned int i = 1; i < ops.length (); ++i)
3859 ops[i].value = force_reg (ops[i].mode, ops[i].value);
3861 /* Handle the legacy forms for which the table is composed of 64-bit
3862 rather than 128-bit vectors. */
3863 auto &table = ops[ops.length () - 2];
3864 auto table_nelts = GET_MODE_NUNITS (table.mode);
3865 bool padded = aarch64_pack_into_v128s (&table);
3867 /* Packing to 128-bit vectors is enough for everything except the 64-bit
3868 forms of vtbx1 and vtbx3, where we need to handle the zero padding. */
3869 if (unspec == UNSPEC_TBL || !padded)
3871 auto_vec<rtx, 3> inputs;
3872 for (unsigned int i = 1; i < ops.length (); ++i)
3873 inputs.quick_push (ops[i].value);
3874 return aarch64_expand_tbl_tbx (inputs, unspec, ops[0].mode);
3877 /* Generate a TBL, which will give the right results for indices that
3878 are less than TABLE_NELTS. */
3879 auto_vec<rtx, 2> inputs;
3880 for (unsigned int i = 2; i < ops.length (); ++i)
3881 inputs.quick_push (ops[i].value);
3882 rtx tbl_result = aarch64_expand_tbl_tbx (inputs, UNSPEC_TBL, ops[0].mode);
3884 /* Get a mask of the indices that are less than TABLE_NELTS. */
3885 auto &indices = ops.last ();
3886 rtx cmp_result = gen_reg_rtx (indices.mode);
3887 rtx bound_rtx = gen_int_mode (table_nelts, GET_MODE_INNER (indices.mode));
3888 rtx bound_vec_rtx = gen_const_vec_duplicate (indices.mode, bound_rtx);
3889 emit_insn (gen_aarch64_cm (GTU, indices.mode, cmp_result,
3890 force_reg (indices.mode, bound_vec_rtx),
3891 indices.value));
3893 /* Select from the TBL result if the index is less than TABLE_NELTS
3894 and from OPS[1] otherwise. */
3895 rtx result = gen_reg_rtx (ops[0].mode);
3896 auto icode = get_vcond_mask_icode (ops[0].mode, indices.mode);
3897 emit_insn (GEN_FCN (icode) (result, tbl_result, ops[1].value, cmp_result));
3898 return result;
3901 /* Expand CALL_EXPR EXP, given that it is a call to the function described
3902 by BUILTIN_DATA, and return the function's return value. Put the result
3903 in TARGET if convenient. */
3904 static rtx
3905 aarch64_expand_pragma_builtin (tree exp, rtx target,
3906 const aarch64_pragma_builtins_data &builtin_data)
3908 unsigned int nargs = call_expr_nargs (exp);
3909 bool returns_void = VOID_TYPE_P (TREE_TYPE (exp));
3911 auto_vec<expand_operand, 8> ops;
3912 if (!returns_void)
3913 create_output_operand (ops.safe_push ({}), target,
3914 TYPE_MODE (TREE_TYPE (exp)));
3915 for (unsigned int i = 0; i < nargs; ++i)
3917 tree arg = CALL_EXPR_ARG (exp, i);
3918 create_input_operand (ops.safe_push ({}), expand_normal (arg),
3919 TYPE_MODE (TREE_TYPE (arg)));
3920 if (POINTER_TYPE_P (TREE_TYPE (arg)))
3921 aarch64_convert_address (&ops.last ());
3924 if (builtin_data.flags & FLAG_USES_FPMR)
3926 auto fpm_input = ops.pop ().value;
3927 auto fpmr = gen_rtx_REG (DImode, FPM_REGNUM);
3928 emit_move_insn (fpmr, fpm_input);
3931 switch (builtin_data.unspec)
3933 case UNSPEC_F1CVTL_FP8:
3934 case UNSPEC_F2CVTL_FP8:
3935 /* Convert _low forms (which take 128-bit vectors) to the base
3936 64-bit forms. */
3937 aarch64_convert_to_v64 (&ops[1]);
3938 break;
3940 case UNSPEC_LUTI2:
3941 /* LUTI2 treats the first argument as a vector of 4 elements. The forms
3942 with 128-bit inputs are only provided as a convenience; the upper
3943 halves don't actually matter. */
3944 aarch64_convert_to_v64 (&ops[1]);
3945 break;
3948 insn_code icode;
3949 switch (builtin_data.unspec)
3951 case UNSPEC_BSL:
3952 icode = code_for_aarch64_simd_bsl (ops[0].mode);
3953 break;
3955 case UNSPEC_COMBINE:
3956 icode = code_for_aarch64_combine (ops[1].mode);
3957 break;
3959 case UNSPEC_DUP:
3960 if (builtin_data.signature == aarch64_builtin_signatures::load)
3961 aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode));
3962 return expand_vector_broadcast (ops[0].mode, ops[1].value);
3964 case UNSPEC_DUP_LANE:
3965 aarch64_canonicalize_lane (&ops[2], ops[1].mode);
3966 if (ops[0].mode == ops[1].mode)
3967 icode = code_for_aarch64_dup_lane (ops[0].mode);
3968 else
3969 icode = code_for_aarch64_dup_lane (ops[0].mode, ops[0].mode);
3970 break;
3972 case UNSPEC_EXT:
3973 icode = code_for_aarch64_ext (ops[0].mode);
3974 break;
3976 case UNSPEC_FAMAX:
3977 case UNSPEC_FAMIN:
3978 case UNSPEC_F1CVTL_FP8:
3979 case UNSPEC_F2CVTL_FP8:
3980 case UNSPEC_FDOT_FP8:
3981 case UNSPEC_FSCALE:
3982 case UNSPEC_TRN1:
3983 case UNSPEC_TRN2:
3984 case UNSPEC_UZP1:
3985 case UNSPEC_UZP2:
3986 case UNSPEC_ZIP1:
3987 case UNSPEC_ZIP2:
3988 icode = code_for_aarch64 (builtin_data.unspec, ops[0].mode);
3989 break;
3991 case UNSPEC_F1CVTL2_FP8:
3992 case UNSPEC_F2CVTL2_FP8:
3994 /* Add a high-part selector for the vec_merge. */
3995 auto src_mode = ops.last ().mode;
3996 auto nunits = GET_MODE_NUNITS (src_mode).to_constant ();
3997 rtx par = aarch64_simd_vect_par_cnst_half (src_mode, nunits, true);
3998 create_fixed_operand (ops.safe_push ({}), par);
4000 auto unspec = aarch64_get_low_unspec (builtin_data.unspec);
4001 icode = code_for_aarch64_high (unspec, ops[0].mode);
4002 break;
4005 case UNSPEC_FCVTN_FP8:
4006 icode = code_for_aarch64 (builtin_data.unspec, ops[1].mode);
4007 break;
4009 case UNSPEC_FCVTN2_FP8:
4011 auto unspec = aarch64_get_low_unspec (builtin_data.unspec);
4012 auto mode = ops.last ().mode;
4013 if (BYTES_BIG_ENDIAN)
4014 icode = code_for_aarch64_high_be (unspec, mode);
4015 else
4016 icode = code_for_aarch64_high_le (unspec, mode);
4017 break;
4020 case UNSPEC_FDOT_LANE_FP8:
4021 /* This pattern does not canonicalize the lane number. */
4022 icode = code_for_aarch64_lane (builtin_data.unspec,
4023 ops[0].mode, ops[3].mode);
4024 break;
4026 case UNSPEC_FMLALB_FP8:
4027 case UNSPEC_FMLALT_FP8:
4028 case UNSPEC_FMLALLBB_FP8:
4029 case UNSPEC_FMLALLBT_FP8:
4030 case UNSPEC_FMLALLTB_FP8:
4031 case UNSPEC_FMLALLTT_FP8:
4032 if (builtin_data.signature == aarch64_builtin_signatures::ternary_lane)
4034 aarch64_canonicalize_lane (&ops[4], ops[3].mode);
4035 icode = code_for_aarch64_lane (builtin_data.unspec,
4036 ops[0].mode, ops[3].mode);
4038 else if (builtin_data.signature == aarch64_builtin_signatures::ternary)
4039 icode = code_for_aarch64 (builtin_data.unspec, ops[0].mode);
4040 else
4041 gcc_unreachable ();
4042 break;
4044 case UNSPEC_GET_LANE:
4045 aarch64_canonicalize_lane (&ops[2], ops[1].mode);
4046 icode = code_for_aarch64_get_lane (ops[1].mode);
4047 break;
4049 case UNSPEC_LD1:
4050 icode = code_for_aarch64_ld1 (ops[0].mode);
4051 break;
4053 case UNSPEC_LD1x2:
4054 icode = code_for_aarch64_ld1x2 (ops[0].mode);
4055 break;
4057 case UNSPEC_LD1x3:
4058 icode = code_for_aarch64_ld1x3 (ops[0].mode);
4059 break;
4061 case UNSPEC_LD1x4:
4062 icode = code_for_aarch64_ld1x4 (ops[0].mode);
4063 break;
4065 case UNSPEC_LD2:
4066 case UNSPEC_LD3:
4067 case UNSPEC_LD4:
4068 icode = code_for_aarch64_ld (ops[0].mode, ops[0].mode);
4069 break;
4071 case UNSPEC_LD2_DUP:
4072 aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode), 2);
4073 icode = code_for_aarch64_simd_ld2r (ops[0].mode);
4074 break;
4076 case UNSPEC_LD3_DUP:
4077 aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode), 3);
4078 icode = code_for_aarch64_simd_ld3r (ops[0].mode);
4079 break;
4081 case UNSPEC_LD4_DUP:
4082 aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode), 4);
4083 icode = code_for_aarch64_simd_ld4r (ops[0].mode);
4084 break;
4086 case UNSPEC_LD2_LANE:
4087 case UNSPEC_LD3_LANE:
4088 case UNSPEC_LD4_LANE:
4089 aarch64_canonicalize_lane (&ops[3], ops[2].mode);
4090 icode = code_for_aarch64_ld_lane (ops[0].mode, ops[0].mode);
4091 break;
4093 case UNSPEC_LUTI2:
4094 case UNSPEC_LUTI4:
4095 create_integer_operand (ops.safe_push ({}),
4096 builtin_data.unspec == UNSPEC_LUTI2 ? 2 : 4);
4097 icode = code_for_aarch64_lut (ops[1].mode, ops[2].mode);
4098 break;
4100 case UNSPEC_REV16:
4101 case UNSPEC_REV32:
4102 case UNSPEC_REV64:
4103 icode = code_for_aarch64_rev (builtin_data.unspec, ops[0].mode);
4104 break;
4106 case UNSPEC_SET_LANE:
4107 if (builtin_data.signature == aarch64_builtin_signatures::load_lane)
4108 aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode));
4109 /* The vec_set operand order is: dest, scalar, mask, vector. */
4110 std::swap (ops[2], ops[3]);
4111 aarch64_convert_to_lane_mask (&ops[2], ops[3].mode);
4112 icode = code_for_aarch64_simd_vec_set (ops[0].mode);
4113 break;
4115 case UNSPEC_ST1:
4116 icode = code_for_aarch64_st1 (ops[1].mode);
4117 break;
4119 case UNSPEC_ST1_LANE:
4120 aarch64_dereference_pointer (&ops[0], GET_MODE_INNER (ops[1].mode));
4121 /* Reinterpret ops[0] as an output. */
4122 create_fixed_operand (&ops[0], ops[0].value);
4123 aarch64_canonicalize_lane (&ops[2], ops[1].mode);
4124 icode = code_for_aarch64_get_lane (ops[1].mode);
4125 break;
4127 case UNSPEC_ST1x2:
4128 icode = code_for_aarch64_st1x2 (ops[1].mode);
4129 break;
4131 case UNSPEC_ST1x3:
4132 icode = code_for_aarch64_st1x3 (ops[1].mode);
4133 break;
4135 case UNSPEC_ST1x4:
4136 icode = code_for_aarch64_st1x4 (ops[1].mode);
4137 break;
4139 case UNSPEC_ST2:
4140 case UNSPEC_ST3:
4141 case UNSPEC_ST4:
4142 icode = code_for_aarch64_st (ops[1].mode, ops[1].mode);
4143 break;
4145 case UNSPEC_ST2_LANE:
4146 case UNSPEC_ST3_LANE:
4147 case UNSPEC_ST4_LANE:
4148 aarch64_canonicalize_lane (&ops[2], ops[1].mode);
4149 icode = code_for_aarch64_st_lane (ops[1].mode, ops[1].mode);
4150 break;
4152 case UNSPEC_TBL:
4153 case UNSPEC_TBX:
4154 return aarch64_expand_tbl_tbx (ops, builtin_data.unspec);
4156 case UNSPEC_TRN:
4157 return aarch64_expand_permute_pair (ops, UNSPEC_TRN1, UNSPEC_TRN2);
4159 case UNSPEC_UZP:
4160 return aarch64_expand_permute_pair (ops, UNSPEC_UZP1, UNSPEC_UZP2);
4162 case UNSPEC_VCREATE:
4163 return force_lowpart_subreg (ops[0].mode, ops[1].value, ops[1].mode);
4165 case UNSPEC_VEC_COPY:
4167 aarch64_convert_to_lane_mask (&ops[2], ops[1].mode);
4168 aarch64_canonicalize_lane (&ops[4], ops[3].mode);
4169 if (ops[1].mode == ops[3].mode)
4170 icode = code_for_aarch64_simd_vec_copy_lane (ops[1].mode);
4171 else
4172 icode = code_for_aarch64_simd_vec_copy_lane (ops[1].mode,
4173 ops[1].mode);
4174 break;
4177 case UNSPEC_ZIP:
4178 return aarch64_expand_permute_pair (ops, UNSPEC_ZIP1, UNSPEC_ZIP2);
4180 default:
4181 gcc_unreachable ();
4184 expand_insn (icode, ops.length (), ops.address ());
4185 return ops[0].value;
4188 /* Expand an expression EXP as fpsr or fpcr setter (depending on
4189 UNSPEC) using MODE. */
4190 static void
4191 aarch64_expand_fpsr_fpcr_setter (int unspec, machine_mode mode, tree exp)
4193 tree arg = CALL_EXPR_ARG (exp, 0);
4194 rtx op = force_reg (mode, expand_normal (arg));
4195 emit_insn (gen_aarch64_set (unspec, mode, op));
4198 /* Expand a fpsr or fpcr getter (depending on UNSPEC) using MODE.
4199 Return the target. */
4200 static rtx
4201 aarch64_expand_fpsr_fpcr_getter (enum insn_code icode, machine_mode mode,
4202 rtx target)
4204 expand_operand op;
4205 create_output_operand (&op, target, mode);
4206 expand_insn (icode, 1, &op);
4207 return op.value;
4210 /* Expand GCS builtin EXP with code FCODE, putting the result
4211 into TARGET. If IGNORE is true the return value is ignored. */
4214 aarch64_expand_gcs_builtin (tree exp, rtx target, int fcode, int ignore)
4216 if (fcode == AARCH64_BUILTIN_GCSPR)
4218 expand_operand op;
4219 create_output_operand (&op, target, DImode);
4220 expand_insn (CODE_FOR_aarch64_load_gcspr, 1, &op);
4221 return op.value;
4223 if (fcode == AARCH64_BUILTIN_GCSPOPM && ignore)
4225 expand_insn (CODE_FOR_aarch64_gcspopm_xzr, 0, 0);
4226 return target;
4228 if (fcode == AARCH64_BUILTIN_GCSPOPM)
4230 expand_operand ops[2];
4231 create_output_operand (&ops[0], target, DImode);
4232 create_input_operand (&ops[1], const0_rtx, DImode);
4233 expand_insn (CODE_FOR_aarch64_gcspopm, 2, ops);
4234 return gen_lowpart (ptr_mode, ops[0].value);
4236 if (fcode == AARCH64_BUILTIN_GCSSS)
4238 expand_operand opnd;
4239 rtx arg = expand_normal (CALL_EXPR_ARG (exp, 0));
4240 arg = convert_modes (DImode, ptr_mode, arg, true);
4241 create_input_operand (&opnd, arg, DImode);
4242 expand_insn (CODE_FOR_aarch64_gcsss1, 1, &opnd);
4243 expand_operand ops[2];
4244 create_output_operand (&ops[0], target, DImode);
4245 create_input_operand (&ops[1], const0_rtx, DImode);
4246 expand_insn (CODE_FOR_aarch64_gcsss2, 2, ops);
4247 return gen_lowpart (ptr_mode, ops[0].value);
4249 gcc_unreachable ();
4252 /* Expand an expression EXP that calls built-in function FCODE,
4253 with result going to TARGET if that's convenient. IGNORE is true
4254 if the result of the builtin is ignored. */
4256 aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
4257 int ignore)
4259 int icode;
4260 rtx op0;
4261 tree arg0;
4263 switch (fcode)
4265 case AARCH64_BUILTIN_GET_FPCR:
4266 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrsi,
4267 SImode, target);
4268 case AARCH64_BUILTIN_SET_FPCR:
4269 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, SImode, exp);
4270 return target;
4271 case AARCH64_BUILTIN_GET_FPSR:
4272 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrsi,
4273 SImode, target);
4274 case AARCH64_BUILTIN_SET_FPSR:
4275 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, SImode, exp);
4276 return target;
4277 case AARCH64_BUILTIN_GET_FPCR64:
4278 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrdi,
4279 DImode, target);
4280 case AARCH64_BUILTIN_SET_FPCR64:
4281 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, DImode, exp);
4282 return target;
4283 case AARCH64_BUILTIN_GET_FPSR64:
4284 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrdi,
4285 DImode, target);
4286 case AARCH64_BUILTIN_SET_FPSR64:
4287 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, DImode, exp);
4288 return target;
4289 case AARCH64_PAUTH_BUILTIN_AUTIA1716:
4290 case AARCH64_PAUTH_BUILTIN_PACIA1716:
4291 case AARCH64_PAUTH_BUILTIN_AUTIB1716:
4292 case AARCH64_PAUTH_BUILTIN_PACIB1716:
4293 case AARCH64_PAUTH_BUILTIN_XPACLRI:
4294 arg0 = CALL_EXPR_ARG (exp, 0);
4295 op0 = force_reg (Pmode, expand_normal (arg0));
4297 if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI)
4299 rtx lr = gen_rtx_REG (Pmode, R30_REGNUM);
4300 icode = CODE_FOR_xpaclri;
4301 emit_move_insn (lr, op0);
4302 emit_insn (GEN_FCN (icode) ());
4303 return lr;
4305 else
4307 tree arg1 = CALL_EXPR_ARG (exp, 1);
4308 rtx op1 = force_reg (Pmode, expand_normal (arg1));
4309 switch (fcode)
4311 case AARCH64_PAUTH_BUILTIN_AUTIA1716:
4312 icode = CODE_FOR_autia1716;
4313 break;
4314 case AARCH64_PAUTH_BUILTIN_AUTIB1716:
4315 icode = CODE_FOR_autib1716;
4316 break;
4317 case AARCH64_PAUTH_BUILTIN_PACIA1716:
4318 icode = CODE_FOR_pacia1716;
4319 break;
4320 case AARCH64_PAUTH_BUILTIN_PACIB1716:
4321 icode = CODE_FOR_pacib1716;
4322 break;
4323 default:
4324 icode = 0;
4325 gcc_unreachable ();
4328 rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM);
4329 rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM);
4330 emit_move_insn (x17_reg, op0);
4331 emit_move_insn (x16_reg, op1);
4332 emit_insn (GEN_FCN (icode) ());
4333 return x17_reg;
4336 case AARCH64_JSCVT:
4338 expand_operand ops[2];
4339 create_output_operand (&ops[0], target, SImode);
4340 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
4341 create_input_operand (&ops[1], op0, DFmode);
4342 expand_insn (CODE_FOR_aarch64_fjcvtzs, 2, ops);
4343 return ops[0].value;
4346 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF:
4347 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF:
4348 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF:
4349 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V2SF:
4350 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF:
4351 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF:
4352 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF:
4353 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF:
4354 return aarch64_expand_fcmla_builtin (exp, target, fcode);
4355 case AARCH64_BUILTIN_RNG_RNDR:
4356 case AARCH64_BUILTIN_RNG_RNDRRS:
4357 return aarch64_expand_rng_builtin (exp, target, fcode, ignore);
4358 case AARCH64_RSR:
4359 case AARCH64_RSRP:
4360 case AARCH64_RSR64:
4361 case AARCH64_RSRF:
4362 case AARCH64_RSRF64:
4363 case AARCH64_RSR128:
4364 case AARCH64_WSR:
4365 case AARCH64_WSRP:
4366 case AARCH64_WSR64:
4367 case AARCH64_WSRF:
4368 case AARCH64_WSRF64:
4369 case AARCH64_WSR128:
4370 return aarch64_expand_rwsr_builtin (exp, target, fcode);
4371 case AARCH64_PLD:
4372 case AARCH64_PLDX:
4373 case AARCH64_PLI:
4374 case AARCH64_PLIX:
4375 aarch64_expand_prefetch_builtin (exp, fcode);
4376 return target;
4378 case AARCH64_BUILTIN_CHKFEAT:
4380 rtx x16_reg = gen_rtx_REG (DImode, R16_REGNUM);
4381 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
4382 emit_move_insn (x16_reg, op0);
4383 expand_insn (CODE_FOR_aarch64_chkfeat, 0, 0);
4384 return copy_to_reg (x16_reg);
4387 case AARCH64_BUILTIN_GCSPR:
4388 case AARCH64_BUILTIN_GCSPOPM:
4389 case AARCH64_BUILTIN_GCSSS:
4390 return aarch64_expand_gcs_builtin (exp, target, fcode, ignore);
4393 if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
4394 return aarch64_simd_expand_builtin (fcode, exp, target);
4395 else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX)
4396 return aarch64_crc32_expand_builtin (fcode, exp, target);
4398 if (fcode == AARCH64_BUILTIN_RSQRT_DF
4399 || fcode == AARCH64_BUILTIN_RSQRT_SF
4400 || fcode == AARCH64_BUILTIN_RSQRT_V2DF
4401 || fcode == AARCH64_BUILTIN_RSQRT_V2SF
4402 || fcode == AARCH64_BUILTIN_RSQRT_V4SF)
4403 return aarch64_expand_builtin_rsqrt (fcode, exp, target);
4405 if (fcode == AARCH64_TME_BUILTIN_TSTART
4406 || fcode == AARCH64_TME_BUILTIN_TCOMMIT
4407 || fcode == AARCH64_TME_BUILTIN_TTEST
4408 || fcode == AARCH64_TME_BUILTIN_TCANCEL)
4409 return aarch64_expand_builtin_tme (fcode, exp, target);
4411 if (fcode == AARCH64_LS64_BUILTIN_LD64B
4412 || fcode == AARCH64_LS64_BUILTIN_ST64B
4413 || fcode == AARCH64_LS64_BUILTIN_ST64BV
4414 || fcode == AARCH64_LS64_BUILTIN_ST64BV0)
4415 return aarch64_expand_builtin_ls64 (fcode, exp, target);
4417 if (fcode >= AARCH64_MEMTAG_BUILTIN_START
4418 && fcode <= AARCH64_MEMTAG_BUILTIN_END)
4419 return aarch64_expand_builtin_memtag (fcode, exp, target);
4420 if (fcode >= AARCH64_REV16
4421 && fcode <= AARCH64_RBITLL)
4422 return aarch64_expand_builtin_data_intrinsic (fcode, exp, target);
4424 if (auto builtin_data = aarch64_get_pragma_builtin (fcode))
4425 return aarch64_expand_pragma_builtin (exp, target, *builtin_data);
4427 gcc_unreachable ();
4430 /* Return builtin for reciprocal square root. */
4432 tree
4433 aarch64_general_builtin_rsqrt (unsigned int fn)
4435 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df)
4436 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF];
4437 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf)
4438 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF];
4439 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv4sf)
4440 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V4SF];
4441 return NULL_TREE;
4444 /* Return true if the lane check can be removed as there is no
4445 error going to be emitted. */
4446 static bool
4447 aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2)
4449 if (TREE_CODE (arg0) != INTEGER_CST)
4450 return false;
4451 if (TREE_CODE (arg1) != INTEGER_CST)
4452 return false;
4453 if (TREE_CODE (arg2) != INTEGER_CST)
4454 return false;
4456 auto totalsize = wi::to_widest (arg0);
4457 auto elementsize = wi::to_widest (arg1);
4458 if (totalsize == 0 || elementsize == 0)
4459 return false;
4460 auto lane = wi::to_widest (arg2);
4461 auto high = wi::udiv_trunc (totalsize, elementsize);
4462 return wi::ltu_p (lane, high);
4465 #undef VAR1
4466 #define VAR1(T, N, MAP, FLAG, A) \
4467 case AARCH64_SIMD_BUILTIN_##T##_##N##A:
4469 #undef VREINTERPRET_BUILTIN
4470 #define VREINTERPRET_BUILTIN(A, B, L) \
4471 case AARCH64_SIMD_BUILTIN_VREINTERPRET##L##_##A##_##B:
4473 #undef VGET_LOW_BUILTIN
4474 #define VGET_LOW_BUILTIN(A) \
4475 case AARCH64_SIMD_BUILTIN_VGET_LOW_##A:
4477 #undef VGET_HIGH_BUILTIN
4478 #define VGET_HIGH_BUILTIN(A) \
4479 case AARCH64_SIMD_BUILTIN_VGET_HIGH_##A:
4481 /* Try to fold a call to the built-in function with subcode FCODE. The
4482 function is passed the N_ARGS arguments in ARGS and it returns a value
4483 of type TYPE. Return the new expression on success and NULL_TREE on
4484 failure. */
4485 tree
4486 aarch64_general_fold_builtin (unsigned int fcode, tree type,
4487 unsigned int n_args ATTRIBUTE_UNUSED, tree *args)
4489 switch (fcode)
4491 BUILTIN_VDQF (UNOP, abs, 2, ALL)
4492 return fold_build1 (ABS_EXPR, type, args[0]);
4493 VAR1 (UNOP, floatv2si, 2, ALL, v2sf)
4494 VAR1 (UNOP, floatv4si, 2, ALL, v4sf)
4495 VAR1 (UNOP, floatv2di, 2, ALL, v2df)
4496 return fold_build1 (FLOAT_EXPR, type, args[0]);
4497 AARCH64_SIMD_VREINTERPRET_BUILTINS
4498 return fold_build1 (VIEW_CONVERT_EXPR, type, args[0]);
4499 AARCH64_SIMD_VGET_LOW_BUILTINS
4501 auto pos = BYTES_BIG_ENDIAN ? 64 : 0;
4503 return fold_build3 (BIT_FIELD_REF, type, args[0], bitsize_int (64),
4504 bitsize_int (pos));
4506 AARCH64_SIMD_VGET_HIGH_BUILTINS
4508 auto pos = BYTES_BIG_ENDIAN ? 0 : 64;
4510 return fold_build3 (BIT_FIELD_REF, type, args[0], bitsize_int (64),
4511 bitsize_int (pos));
4513 case AARCH64_SIMD_BUILTIN_LANE_CHECK:
4514 gcc_assert (n_args == 3);
4515 if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
4516 return void_node;
4517 break;
4518 default:
4519 break;
4522 return NULL_TREE;
4525 enum aarch64_simd_type
4526 get_mem_type_for_load_store (unsigned int fcode)
4528 switch (fcode)
4530 VAR1 (LOAD1, ld1, 0, LOAD, v8qi)
4531 VAR1 (STORE1, st1, 0, STORE, v8qi)
4532 return Int8x8_t;
4533 VAR1 (LOAD1, ld1, 0, LOAD, v16qi)
4534 VAR1 (STORE1, st1, 0, STORE, v16qi)
4535 return Int8x16_t;
4536 VAR1 (LOAD1, ld1, 0, LOAD, v4hi)
4537 VAR1 (STORE1, st1, 0, STORE, v4hi)
4538 return Int16x4_t;
4539 VAR1 (LOAD1, ld1, 0, LOAD, v8hi)
4540 VAR1 (STORE1, st1, 0, STORE, v8hi)
4541 return Int16x8_t;
4542 VAR1 (LOAD1, ld1, 0, LOAD, v2si)
4543 VAR1 (STORE1, st1, 0, STORE, v2si)
4544 return Int32x2_t;
4545 VAR1 (LOAD1, ld1, 0, LOAD, v4si)
4546 VAR1 (STORE1, st1, 0, STORE, v4si)
4547 return Int32x4_t;
4548 VAR1 (LOAD1, ld1, 0, LOAD, v2di)
4549 VAR1 (STORE1, st1, 0, STORE, v2di)
4550 return Int64x2_t;
4551 VAR1 (LOAD1_U, ld1, 0, LOAD, v8qi)
4552 VAR1 (STORE1_U, st1, 0, STORE, v8qi)
4553 return Uint8x8_t;
4554 VAR1 (LOAD1_U, ld1, 0, LOAD, v16qi)
4555 VAR1 (STORE1_U, st1, 0, STORE, v16qi)
4556 return Uint8x16_t;
4557 VAR1 (LOAD1_U, ld1, 0, LOAD, v4hi)
4558 VAR1 (STORE1_U, st1, 0, STORE, v4hi)
4559 return Uint16x4_t;
4560 VAR1 (LOAD1_U, ld1, 0, LOAD, v8hi)
4561 VAR1 (STORE1_U, st1, 0, STORE, v8hi)
4562 return Uint16x8_t;
4563 VAR1 (LOAD1_U, ld1, 0, LOAD, v2si)
4564 VAR1 (STORE1_U, st1, 0, STORE, v2si)
4565 return Uint32x2_t;
4566 VAR1 (LOAD1_U, ld1, 0, LOAD, v4si)
4567 VAR1 (STORE1_U, st1, 0, STORE, v4si)
4568 return Uint32x4_t;
4569 VAR1 (LOAD1_U, ld1, 0, LOAD, v2di)
4570 VAR1 (STORE1_U, st1, 0, STORE, v2di)
4571 return Uint64x2_t;
4572 VAR1 (LOAD1_P, ld1, 0, LOAD, v8qi)
4573 VAR1 (STORE1_P, st1, 0, STORE, v8qi)
4574 return Poly8x8_t;
4575 VAR1 (LOAD1_P, ld1, 0, LOAD, v16qi)
4576 VAR1 (STORE1_P, st1, 0, STORE, v16qi)
4577 return Poly8x16_t;
4578 VAR1 (LOAD1_P, ld1, 0, LOAD, v4hi)
4579 VAR1 (STORE1_P, st1, 0, STORE, v4hi)
4580 return Poly16x4_t;
4581 VAR1 (LOAD1_P, ld1, 0, LOAD, v8hi)
4582 VAR1 (STORE1_P, st1, 0, STORE, v8hi)
4583 return Poly16x8_t;
4584 VAR1 (LOAD1_P, ld1, 0, LOAD, v2di)
4585 VAR1 (STORE1_P, st1, 0, STORE, v2di)
4586 return Poly64x2_t;
4587 VAR1 (LOAD1, ld1, 0, LOAD, v4hf)
4588 VAR1 (STORE1, st1, 0, STORE, v4hf)
4589 return Float16x4_t;
4590 VAR1 (LOAD1, ld1, 0, LOAD, v8hf)
4591 VAR1 (STORE1, st1, 0, STORE, v8hf)
4592 return Float16x8_t;
4593 VAR1 (LOAD1, ld1, 0, LOAD, v4bf)
4594 VAR1 (STORE1, st1, 0, STORE, v4bf)
4595 return Bfloat16x4_t;
4596 VAR1 (LOAD1, ld1, 0, LOAD, v8bf)
4597 VAR1 (STORE1, st1, 0, STORE, v8bf)
4598 return Bfloat16x8_t;
4599 VAR1 (LOAD1, ld1, 0, LOAD, v2sf)
4600 VAR1 (STORE1, st1, 0, STORE, v2sf)
4601 return Float32x2_t;
4602 VAR1 (LOAD1, ld1, 0, LOAD, v4sf)
4603 VAR1 (STORE1, st1, 0, STORE, v4sf)
4604 return Float32x4_t;
4605 VAR1 (LOAD1, ld1, 0, LOAD, v2df)
4606 VAR1 (STORE1, st1, 0, STORE, v2df)
4607 return Float64x2_t;
4608 default:
4609 gcc_unreachable ();
4610 break;
4614 /* We've seen a vector load from address ADDR. Record it in
4615 vector_load_decls, if appropriate. */
4616 static void
4617 aarch64_record_vector_load_arg (tree addr)
4619 tree decl = aarch64_vector_load_decl (addr);
4620 if (!decl)
4621 return;
4622 if (!cfun->machine->vector_load_decls)
4623 cfun->machine->vector_load_decls = hash_set<tree>::create_ggc (31);
4624 cfun->machine->vector_load_decls->add (decl);
4627 /* Force VAL into a valid gimple value, creating a new SSA_NAME if
4628 necessary. Insert any new statements before GSI. */
4629 static tree
4630 aarch64_force_gimple_val (gimple_stmt_iterator *gsi, tree val)
4632 if (is_gimple_val (val))
4633 return val;
4635 tree tmp = make_ssa_name (TREE_TYPE (val));
4636 gsi_insert_before_without_update (gsi, gimple_build_assign (tmp, val),
4637 GSI_SAME_STMT);
4638 return tmp;
4641 /* Copy vops from FROM to TO and return TO. */
4642 static gimple *
4643 aarch64_copy_vops (gimple *to, gimple *from)
4645 gimple_set_vuse (to, gimple_vuse (from));
4646 gimple_set_vdef (to, gimple_vdef (from));
4647 return to;
4650 /* Fold STMT (at GSI) to VAL, with SEQ setting up the value of VAL.
4651 Return the replacement statement. */
4652 static gimple *
4653 aarch64_fold_to_val (gcall *stmt, gimple_stmt_iterator *gsi,
4654 gimple *seq, tree val)
4656 auto *assign = gimple_build_assign (gimple_call_lhs (stmt), val);
4657 gimple_seq_add_stmt_without_update (&seq, assign);
4658 gsi_replace_with_seq_vops (gsi, seq);
4659 return assign;
4662 /* Dereference pointer ADDR, giving a memory reference of type TYPE. */
4663 static tree
4664 aarch64_dereference (tree addr, tree type)
4666 tree elt_type = (VECTOR_TYPE_P (type) ? TREE_TYPE (type) : type);
4667 tree elt_ptr_type = build_pointer_type_for_mode (elt_type, VOIDmode, true);
4668 tree zero = build_zero_cst (elt_ptr_type);
4669 /* Use element type alignment. */
4670 tree access_type = build_aligned_type (type, TYPE_ALIGN (elt_type));
4671 return fold_build2 (MEM_REF, access_type, addr, zero);
4674 /* LANE is a lane index into VEC. Return the associated bit index
4675 (counting from the first byte in memory order). */
4676 static tree
4677 aarch64_get_lane_bit_index (tree vec, tree lane)
4679 auto vec_mode = TYPE_MODE (TREE_TYPE (vec));
4680 auto nunits = aarch64_num_lanes (vec_mode);
4681 auto idx = ENDIAN_LANE_N (nunits, tree_to_uhwi (lane));
4682 return bitsize_int (idx * GET_MODE_UNIT_BITSIZE (vec_mode));
4685 /* LANE is a lane index into VEC. Return a BIT_FIELD_REF for the
4686 selected element. */
4687 static tree
4688 aarch64_get_lane (tree vec, tree lane)
4690 auto elt_type = TREE_TYPE (TREE_TYPE (vec));
4691 return fold_build3 (BIT_FIELD_REF, elt_type, vec, TYPE_SIZE (elt_type),
4692 aarch64_get_lane_bit_index (vec, lane));
4695 /* LANE is a lane index into VEC. Return a BIT_INSERT_EXPR that replaces
4696 that index with ELT and stores the result in LHS. */
4697 static gimple *
4698 aarch64_set_lane (tree lhs, tree elt, tree vec, tree lane)
4700 tree bit = aarch64_get_lane_bit_index (vec, lane);
4701 return gimple_build_assign (lhs, BIT_INSERT_EXPR, vec, elt, bit);
4704 /* Fold a call to vcombine. */
4705 static gimple *
4706 aarch64_fold_combine (gcall *stmt)
4708 tree first_part, second_part;
4709 if (BYTES_BIG_ENDIAN)
4711 second_part = gimple_call_arg (stmt, 0);
4712 first_part = gimple_call_arg (stmt, 1);
4714 else
4716 first_part = gimple_call_arg (stmt, 0);
4717 second_part = gimple_call_arg (stmt, 1);
4719 tree ret_type = gimple_call_return_type (stmt);
4720 tree ctor = build_constructor_va (ret_type, 2, NULL_TREE, first_part,
4721 NULL_TREE, second_part);
4722 return gimple_build_assign (gimple_call_lhs (stmt), ctor);
4725 /* Fold a call to vld1, given that it loads something of type TYPE. */
4726 static gimple *
4727 aarch64_fold_load (gcall *stmt, tree type)
4729 /* Punt until after inlining, so that we stand more chance of
4730 recording something meaningful in vector_load_decls. */
4731 if (!cfun->after_inlining)
4732 return nullptr;
4733 tree addr = gimple_call_arg (stmt, 0);
4734 aarch64_record_vector_load_arg (addr);
4735 if (!BYTES_BIG_ENDIAN)
4737 tree mem = aarch64_dereference (addr, type);
4738 auto *new_stmt = gimple_build_assign (gimple_get_lhs (stmt), mem);
4739 return aarch64_copy_vops (new_stmt, stmt);
4741 return nullptr;
4744 /* Fold a call to vst1, given that it loads something of type TYPE. */
4745 static gimple *
4746 aarch64_fold_store (gcall *stmt, tree type)
4748 tree addr = gimple_call_arg (stmt, 0);
4749 tree data = gimple_call_arg (stmt, 1);
4750 if (!BYTES_BIG_ENDIAN)
4752 tree mem = aarch64_dereference (addr, type);
4753 auto *new_stmt = gimple_build_assign (mem, data);
4754 return aarch64_copy_vops (new_stmt, stmt);
4756 return nullptr;
4759 /* An aarch64_fold_permute callback for vext. SELECTOR is the value of
4760 the final argument. */
4761 static unsigned int
4762 aarch64_ext_index (unsigned int, unsigned int selector, unsigned int i)
4764 return selector + i;
4767 /* An aarch64_fold_permute callback for vrev. SELECTOR is the number
4768 of elements in each reversal group. */
4769 static unsigned int
4770 aarch64_rev_index (unsigned int, unsigned int selector, unsigned int i)
4772 return ROUND_DOWN (i, selector) + (selector - 1) - (i % selector);
4775 /* An aarch64_fold_permute callback for vtrn. SELECTOR is 0 for TRN1
4776 and 1 for TRN2. */
4777 static unsigned int
4778 aarch64_trn_index (unsigned int nelts, unsigned int selector, unsigned int i)
4780 return (i % 2) * nelts + ROUND_DOWN (i, 2) + selector;
4783 /* An aarch64_fold_permute callback for vuzp. SELECTOR is 0 for UZP1
4784 and 1 for UZP2. */
4785 static unsigned int
4786 aarch64_uzp_index (unsigned int, unsigned int selector, unsigned int i)
4788 return i * 2 + selector;
4791 /* An aarch64_fold_permute callback for vzip. SELECTOR is 0 for ZIP1
4792 and 1 for ZIP2. */
4793 static unsigned int
4794 aarch64_zip_index (unsigned int nelts, unsigned int selector, unsigned int i)
4796 return (i % 2) * nelts + (i / 2) + selector * (nelts / 2);
4799 /* Fold STMT to a VEC_PERM_EXPR on the first NINPUTS arguments.
4800 Make the VEC_PERM_EXPR emulate an NINPUTS-input TBL in which
4801 architectural lane I of the result selects architectural lane:
4803 GET_INDEX (NELTS, SELECTOR, I)
4805 of the input table. NELTS is the number of elements in one vector. */
4806 static gimple *
4807 aarch64_fold_permute (gcall *stmt, unsigned int ninputs,
4808 unsigned int (*get_index) (unsigned int, unsigned int,
4809 unsigned int),
4810 unsigned int selector)
4812 tree op0 = gimple_call_arg (stmt, 0);
4813 tree op1 = ninputs == 2 ? gimple_call_arg (stmt, 1) : op0;
4814 auto nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)).to_constant ();
4815 vec_perm_builder sel (nelts, nelts, 1);
4816 for (unsigned int i = 0; i < nelts; ++i)
4818 unsigned int index = get_index (nelts, selector,
4819 ENDIAN_LANE_N (nelts, i));
4820 unsigned int vec = index / nelts;
4821 unsigned int elt = ENDIAN_LANE_N (nelts, index % nelts);
4822 sel.quick_push (vec * nelts + elt);
4825 vec_perm_indices indices (sel, ninputs, nelts);
4826 tree mask_type = build_vector_type (ssizetype, nelts);
4827 tree mask = vec_perm_indices_to_tree (mask_type, indices);
4828 return gimple_build_assign (gimple_call_lhs (stmt), VEC_PERM_EXPR,
4829 op0, op1, mask);
4832 /* Try to fold STMT (at GSI), given that it is a call to the builtin
4833 described by BUILTIN_DATA. Return the new statement on success,
4834 otherwise return null. */
4835 static gimple *
4836 aarch64_gimple_fold_pragma_builtin
4837 (gcall *stmt, gimple_stmt_iterator *gsi,
4838 const aarch64_pragma_builtins_data &builtin_data)
4840 auto &types = builtin_data.types;
4842 switch (builtin_data.unspec)
4844 case UNSPEC_COMBINE:
4845 return aarch64_fold_combine (stmt);
4847 case UNSPEC_DUP:
4848 case UNSPEC_DUP_LANE:
4850 tree arg = gimple_call_arg (stmt, 0);
4851 tree type = types[0].type ();
4852 if (builtin_data.signature == aarch64_builtin_signatures::load)
4853 arg = aarch64_dereference (arg, TREE_TYPE (type));
4854 else if (builtin_data.unspec == UNSPEC_DUP_LANE)
4855 arg = aarch64_get_lane (arg, gimple_call_arg (stmt, 1));
4856 arg = aarch64_force_gimple_val (gsi, arg);
4858 tree dup = build_vector_from_val (type, arg);
4859 return aarch64_fold_to_val (stmt, gsi, nullptr, dup);
4862 case UNSPEC_EXT:
4864 auto index = tree_to_uhwi (gimple_call_arg (stmt, 2));
4865 return aarch64_fold_permute (stmt, 2, aarch64_ext_index, index);
4868 case UNSPEC_GET_LANE:
4870 tree val = aarch64_get_lane (gimple_call_arg (stmt, 0),
4871 gimple_call_arg (stmt, 1));
4872 return gimple_build_assign (gimple_call_lhs (stmt), val);
4875 case UNSPEC_LD1:
4876 return aarch64_fold_load (stmt, types[0].type ());
4878 case UNSPEC_REV16:
4880 auto selector = 16 / GET_MODE_UNIT_BITSIZE (types[0].mode);
4881 return aarch64_fold_permute (stmt, 1, aarch64_rev_index, selector);
4884 case UNSPEC_REV32:
4886 auto selector = 32 / GET_MODE_UNIT_BITSIZE (types[0].mode);
4887 return aarch64_fold_permute (stmt, 1, aarch64_rev_index, selector);
4890 case UNSPEC_REV64:
4892 auto selector = 64 / GET_MODE_UNIT_BITSIZE (types[0].mode);
4893 return aarch64_fold_permute (stmt, 1, aarch64_rev_index, selector);
4896 case UNSPEC_SET_LANE:
4898 tree elt = gimple_call_arg (stmt, 0);
4899 if (builtin_data.signature == aarch64_builtin_signatures::load_lane)
4901 elt = aarch64_dereference (elt, TREE_TYPE (types[0].type ()));
4902 elt = aarch64_force_gimple_val (gsi, elt);
4904 return aarch64_set_lane (gimple_call_lhs (stmt), elt,
4905 gimple_call_arg (stmt, 1),
4906 gimple_call_arg (stmt, 2));
4909 case UNSPEC_ST1:
4910 return aarch64_fold_store (stmt, types[1].type ());
4912 case UNSPEC_ST1_LANE:
4914 tree val = aarch64_get_lane (gimple_call_arg (stmt, 1),
4915 gimple_call_arg (stmt, 2));
4916 tree mem = aarch64_dereference (gimple_call_arg (stmt, 0),
4917 TREE_TYPE (types[0].type ()));
4918 val = aarch64_force_gimple_val (gsi, val);
4919 return aarch64_copy_vops (gimple_build_assign (mem, val), stmt);
4922 case UNSPEC_TRN1:
4923 return aarch64_fold_permute (stmt, 2, aarch64_trn_index, 0);
4925 case UNSPEC_TRN2:
4926 return aarch64_fold_permute (stmt, 2, aarch64_trn_index, 1);
4928 case UNSPEC_UZP1:
4929 return aarch64_fold_permute (stmt, 2, aarch64_uzp_index, 0);
4931 case UNSPEC_UZP2:
4932 return aarch64_fold_permute (stmt, 2, aarch64_uzp_index, 1);
4934 case UNSPEC_VCREATE:
4935 return gimple_build_assign (gimple_call_lhs (stmt),
4936 fold_build1 (VIEW_CONVERT_EXPR,
4937 types[0].type (),
4938 gimple_call_arg (stmt, 0)));
4940 case UNSPEC_VEC_COPY:
4942 tree elt = aarch64_get_lane (gimple_call_arg (stmt, 2),
4943 gimple_call_arg (stmt, 3));
4944 elt = aarch64_force_gimple_val (gsi, elt);
4945 return aarch64_set_lane (gimple_call_lhs (stmt), elt,
4946 gimple_call_arg (stmt, 0),
4947 gimple_call_arg (stmt, 1));
4950 case UNSPEC_ZIP1:
4951 return aarch64_fold_permute (stmt, 2, aarch64_zip_index, 0);
4953 case UNSPEC_ZIP2:
4954 return aarch64_fold_permute (stmt, 2, aarch64_zip_index, 1);
4956 default:
4957 return nullptr;
4961 /* Try to fold STMT, given that it's a call to the built-in function with
4962 subcode FCODE. Return the new statement on success and null on
4963 failure. */
4964 gimple *
4965 aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
4966 gimple_stmt_iterator *gsi)
4968 gimple *new_stmt = NULL;
4969 unsigned nargs = gimple_call_num_args (stmt);
4970 tree *args = (nargs > 0
4971 ? gimple_call_arg_ptr (stmt, 0)
4972 : &error_mark_node);
4974 /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int
4975 and unsigned int; it will distinguish according to the types of
4976 the arguments to the __builtin. */
4977 switch (fcode)
4979 BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, ALL)
4980 BUILTIN_VDQ_I (UNOPU, reduc_plus_scal_, 10, DEFAULT)
4981 new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS,
4982 1, args[0]);
4983 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
4984 break;
4986 /* Lower sqrt builtins to gimple/internal function sqrt. */
4987 BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP)
4988 new_stmt = gimple_build_call_internal (IFN_SQRT,
4989 1, args[0]);
4990 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
4991 break;
4993 BUILTIN_VDC (BINOP, combine, 0, QUIET)
4994 BUILTIN_VD_I (BINOPU, combine, 0, DEFAULT)
4995 BUILTIN_VDC_P (BINOPP, combine, 0, DEFAULT)
4996 new_stmt = aarch64_fold_combine (stmt);
4997 break;
4999 /*lower store and load neon builtins to gimple. */
5000 BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
5001 BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD)
5002 BUILTIN_VALLP_NO_DI (LOAD1_P, ld1, 0, LOAD)
5004 enum aarch64_simd_type mem_type
5005 = get_mem_type_for_load_store (fcode);
5006 aarch64_simd_type_info_trees simd_type
5007 = aarch64_simd_types_trees[mem_type];
5008 new_stmt = aarch64_fold_load (stmt, simd_type.itype);
5009 break;
5012 BUILTIN_VALL_F16 (STORE1, st1, 0, STORE)
5013 BUILTIN_VDQ_I (STORE1_U, st1, 0, STORE)
5014 BUILTIN_VALLP_NO_DI (STORE1_P, st1, 0, STORE)
5016 enum aarch64_simd_type mem_type
5017 = get_mem_type_for_load_store (fcode);
5018 aarch64_simd_type_info_trees simd_type
5019 = aarch64_simd_types_trees[mem_type];
5020 new_stmt = aarch64_fold_store (stmt, simd_type.itype);
5021 break;
5024 BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10, ALL)
5025 BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10, ALL)
5026 new_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
5027 1, args[0]);
5028 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
5029 break;
5030 BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10, ALL)
5031 BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10, ALL)
5032 new_stmt = gimple_build_call_internal (IFN_REDUC_MIN,
5033 1, args[0]);
5034 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
5035 break;
5036 BUILTIN_VSDQ_I_DI (BINOP, ashl, 3, DEFAULT)
5037 if (TREE_CODE (args[1]) == INTEGER_CST
5038 && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
5039 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
5040 LSHIFT_EXPR, args[0], args[1]);
5041 break;
5042 /* lower saturating add/sub neon builtins to gimple. */
5043 BUILTIN_VSDQ_I (BINOP, ssadd, 3, DEFAULT)
5044 BUILTIN_VSDQ_I (BINOPU, usadd, 3, DEFAULT)
5045 new_stmt = gimple_build_call_internal (IFN_SAT_ADD, 2, args[0], args[1]);
5046 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
5047 break;
5048 BUILTIN_VSDQ_I (BINOP, sssub, 3, DEFAULT)
5049 BUILTIN_VSDQ_I (BINOPU, ussub, 3, DEFAULT)
5050 new_stmt = gimple_build_call_internal (IFN_SAT_SUB, 2, args[0], args[1]);
5051 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
5052 break;
5054 BUILTIN_VSDQ_I_DI (BINOP, sshl, 0, DEFAULT)
5055 BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0, DEFAULT)
5057 tree cst = args[1];
5058 tree ctype = TREE_TYPE (cst);
5059 /* Left shifts can be both scalar or vector, e.g. uint64x1_t is
5060 treated as a scalar type not a vector one. */
5061 if ((cst = uniform_integer_cst_p (cst)) != NULL_TREE)
5063 wide_int wcst = wi::to_wide (cst);
5064 tree unit_ty = TREE_TYPE (cst);
5066 wide_int abs_cst = wi::abs (wcst);
5067 if (wi::geu_p (abs_cst, element_precision (args[0])))
5068 break;
5070 if (wi::neg_p (wcst, TYPE_SIGN (ctype)))
5072 tree final_cst;
5073 final_cst = wide_int_to_tree (unit_ty, abs_cst);
5074 if (TREE_CODE (cst) != INTEGER_CST)
5075 final_cst = build_uniform_cst (ctype, final_cst);
5077 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
5078 RSHIFT_EXPR, args[0],
5079 final_cst);
5081 else
5082 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
5083 LSHIFT_EXPR, args[0], args[1]);
5086 break;
5087 BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, DEFAULT)
5088 VAR1 (SHIFTIMM, ashr_simd, 0, DEFAULT, di)
5089 BUILTIN_VDQ_I (USHIFTIMM, lshr, 3, DEFAULT)
5090 VAR1 (USHIFTIMM, lshr_simd, 0, DEFAULT, di)
5091 if (TREE_CODE (args[1]) == INTEGER_CST
5092 && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
5093 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
5094 RSHIFT_EXPR, args[0], args[1]);
5095 break;
5096 BUILTIN_GPF (BINOP, fmulx, 0, ALL)
5098 gcc_assert (nargs == 2);
5099 bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST;
5100 bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST;
5101 if (a0_cst_p || a1_cst_p)
5103 if (a0_cst_p && a1_cst_p)
5105 tree t0 = TREE_TYPE (args[0]);
5106 real_value a0 = (TREE_REAL_CST (args[0]));
5107 real_value a1 = (TREE_REAL_CST (args[1]));
5108 if (real_equal (&a1, &dconst0))
5109 std::swap (a0, a1);
5110 /* According to real_equal (), +0 equals -0. */
5111 if (real_equal (&a0, &dconst0) && real_isinf (&a1))
5113 real_value res = dconst2;
5114 res.sign = a0.sign ^ a1.sign;
5115 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
5116 REAL_CST,
5117 build_real (t0, res));
5119 else
5120 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
5121 MULT_EXPR,
5122 args[0], args[1]);
5124 else /* a0_cst_p ^ a1_cst_p. */
5126 real_value const_part = a0_cst_p
5127 ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]);
5128 if (!real_equal (&const_part, &dconst0)
5129 && !real_isinf (&const_part))
5130 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
5131 MULT_EXPR, args[0],
5132 args[1]);
5135 if (new_stmt)
5137 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5138 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5140 break;
5142 case AARCH64_SIMD_BUILTIN_LANE_CHECK:
5143 if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
5145 unlink_stmt_vdef (stmt);
5146 release_defs (stmt);
5147 new_stmt = gimple_build_nop ();
5149 break;
5150 default:
5151 if (auto builtin_data = aarch64_get_pragma_builtin (fcode))
5152 new_stmt = aarch64_gimple_fold_pragma_builtin (stmt, gsi,
5153 *builtin_data);
5154 break;
5157 /* GIMPLE assign statements (unlike calls) require a non-null lhs. If we
5158 created an assign statement with a null lhs, then fix this by assigning
5159 to a new (and subsequently unused) variable. */
5160 if (new_stmt && is_gimple_assign (new_stmt) && !gimple_assign_lhs (new_stmt))
5162 tree new_lhs = make_ssa_name (gimple_call_return_type (stmt));
5163 gimple_assign_set_lhs (new_stmt, new_lhs);
5166 return new_stmt;
5169 void
5170 aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
5172 const unsigned AARCH64_FE_INVALID = 1;
5173 const unsigned AARCH64_FE_DIVBYZERO = 2;
5174 const unsigned AARCH64_FE_OVERFLOW = 4;
5175 const unsigned AARCH64_FE_UNDERFLOW = 8;
5176 const unsigned AARCH64_FE_INEXACT = 16;
5177 const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID
5178 | AARCH64_FE_DIVBYZERO
5179 | AARCH64_FE_OVERFLOW
5180 | AARCH64_FE_UNDERFLOW
5181 | AARCH64_FE_INEXACT);
5182 const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8;
5183 tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
5184 tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
5185 tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
5186 tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;
5188 /* Generate the equivalence of :
5189 unsigned int fenv_cr;
5190 fenv_cr = __builtin_aarch64_get_fpcr ();
5192 unsigned int fenv_sr;
5193 fenv_sr = __builtin_aarch64_get_fpsr ();
5195 Now set all exceptions to non-stop
5196 unsigned int mask_cr
5197 = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
5198 unsigned int masked_cr;
5199 masked_cr = fenv_cr & mask_cr;
5201 And clear all exception flags
5202 unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
5203 unsigned int masked_cr;
5204 masked_sr = fenv_sr & mask_sr;
5206 __builtin_aarch64_set_cr (masked_cr);
5207 __builtin_aarch64_set_sr (masked_sr); */
5209 fenv_cr = create_tmp_var_raw (unsigned_type_node);
5210 fenv_sr = create_tmp_var_raw (unsigned_type_node);
5212 get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
5213 set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
5214 get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
5215 set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];
5217 mask_cr = build_int_cst (unsigned_type_node,
5218 ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT));
5219 mask_sr = build_int_cst (unsigned_type_node,
5220 ~(AARCH64_FE_ALL_EXCEPT));
5222 ld_fenv_cr = build4 (TARGET_EXPR, unsigned_type_node,
5223 fenv_cr, build_call_expr (get_fpcr, 0),
5224 NULL_TREE, NULL_TREE);
5225 ld_fenv_sr = build4 (TARGET_EXPR, unsigned_type_node,
5226 fenv_sr, build_call_expr (get_fpsr, 0),
5227 NULL_TREE, NULL_TREE);
5229 masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
5230 masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
5232 hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
5233 hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);
5235 hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
5236 hold_fnclex_sr);
5237 masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
5238 masked_fenv_sr);
5239 ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);
5241 *hold = build2 (COMPOUND_EXPR, void_type_node,
5242 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
5243 hold_fnclex);
5245 /* Store the value of masked_fenv to clear the exceptions:
5246 __builtin_aarch64_set_fpsr (masked_fenv_sr); */
5248 *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);
5250 /* Generate the equivalent of :
5251 unsigned int new_fenv_var;
5252 new_fenv_var = __builtin_aarch64_get_fpsr ();
5254 __builtin_aarch64_set_fpsr (fenv_sr);
5256 __atomic_feraiseexcept (new_fenv_var); */
5258 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
5259 reload_fenv = build4 (TARGET_EXPR, unsigned_type_node,
5260 new_fenv_var, build_call_expr (get_fpsr, 0),
5261 NULL_TREE, NULL_TREE);
5262 restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
5263 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
5264 update_call = build_call_expr (atomic_feraiseexcept, 1,
5265 fold_convert (integer_type_node, new_fenv_var));
5266 *update = build2 (COMPOUND_EXPR, void_type_node,
5267 build2 (COMPOUND_EXPR, void_type_node,
5268 reload_fenv, restore_fnenv), update_call);
5271 /* Resolve overloaded MEMTAG build-in functions. */
5272 #define AARCH64_BUILTIN_SUBCODE(F) \
5273 (DECL_MD_FUNCTION_CODE (F) >> AARCH64_BUILTIN_SHIFT)
5275 static tree
5276 aarch64_resolve_overloaded_memtag (location_t loc,
5277 tree fndecl, void *pass_params)
5279 vec<tree, va_gc> *params = static_cast<vec<tree, va_gc> *> (pass_params);
5280 unsigned param_num = params ? params->length() : 0;
5281 unsigned int fcode = AARCH64_BUILTIN_SUBCODE (fndecl);
5282 tree inittype = aarch64_memtag_builtin_data[
5283 fcode - AARCH64_MEMTAG_BUILTIN_START - 1].ftype;
5284 unsigned arg_num = list_length (TYPE_ARG_TYPES (inittype)) - 1;
5286 if (param_num != arg_num)
5288 TREE_TYPE (fndecl) = inittype;
5289 return NULL_TREE;
5291 tree retype = NULL;
5293 if (fcode == AARCH64_MEMTAG_BUILTIN_SUBP)
5295 tree t0 = TREE_TYPE ((*params)[0]);
5296 tree t1 = TREE_TYPE ((*params)[1]);
5298 if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
5299 t0 = ptr_type_node;
5300 if (t1 == error_mark_node || TREE_CODE (t1) != POINTER_TYPE)
5301 t1 = ptr_type_node;
5303 if (TYPE_MODE (t0) != DImode)
5304 warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
5305 (int)tree_to_shwi (DECL_SIZE ((*params)[0])));
5307 if (TYPE_MODE (t1) != DImode)
5308 warning_at (loc, 1, "expected 64-bit address but argument 2 is %d-bit",
5309 (int)tree_to_shwi (DECL_SIZE ((*params)[1])));
5311 retype = build_function_type_list (ptrdiff_type_node, t0, t1, NULL);
5313 else
5315 tree t0 = TREE_TYPE ((*params)[0]);
5317 if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
5319 TREE_TYPE (fndecl) = inittype;
5320 return NULL_TREE;
5323 if (TYPE_MODE (t0) != DImode)
5324 warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
5325 (int)tree_to_shwi (DECL_SIZE ((*params)[0])));
5327 switch (fcode)
5329 case AARCH64_MEMTAG_BUILTIN_IRG:
5330 retype = build_function_type_list (t0, t0, uint64_type_node, NULL);
5331 break;
5332 case AARCH64_MEMTAG_BUILTIN_GMI:
5333 retype = build_function_type_list (uint64_type_node, t0,
5334 uint64_type_node, NULL);
5335 break;
5336 case AARCH64_MEMTAG_BUILTIN_INC_TAG:
5337 retype = build_function_type_list (t0, t0, unsigned_type_node, NULL);
5338 break;
5339 case AARCH64_MEMTAG_BUILTIN_SET_TAG:
5340 retype = build_function_type_list (void_type_node, t0, NULL);
5341 break;
5342 case AARCH64_MEMTAG_BUILTIN_GET_TAG:
5343 retype = build_function_type_list (t0, t0, NULL);
5344 break;
5345 default:
5346 return NULL_TREE;
5350 if (!retype || retype == error_mark_node)
5351 TREE_TYPE (fndecl) = inittype;
5352 else
5353 TREE_TYPE (fndecl) = retype;
5355 return NULL_TREE;
5358 /* Called at aarch64_resolve_overloaded_builtin in aarch64-c.cc. */
5359 tree
5360 aarch64_resolve_overloaded_builtin_general (location_t loc, tree function,
5361 void *pass_params)
5363 unsigned int fcode = AARCH64_BUILTIN_SUBCODE (function);
5365 if (fcode >= AARCH64_MEMTAG_BUILTIN_START
5366 && fcode <= AARCH64_MEMTAG_BUILTIN_END)
5367 return aarch64_resolve_overloaded_memtag(loc, function, pass_params);
5369 return NULL_TREE;
5372 #undef AARCH64_CHECK_BUILTIN_MODE
5373 #undef AARCH64_FIND_FRINT_VARIANT
5374 #undef CF0
5375 #undef CF1
5376 #undef CF2
5377 #undef CF3
5378 #undef CF4
5379 #undef CF10
5380 #undef VAR1
5381 #undef VAR2
5382 #undef VAR3
5383 #undef VAR4
5384 #undef VAR5
5385 #undef VAR6
5386 #undef VAR7
5387 #undef VAR8
5388 #undef VAR9
5389 #undef VAR10
5390 #undef VAR11
5392 #include "gt-aarch64-builtins.h"