1 /* Builtins' description for AArch64 SIMD architecture.
2 Copyright (C) 2011-2025 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
25 #include "coretypes.h"
28 #include "basic-block.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
43 #include "langhooks.h"
44 #include "gimple-iterator.h"
45 #include "case-cfn-macros.h"
47 #include "stringpool.h"
49 #include "gimple-fold.h"
51 #include "aarch64-builtins.h"
53 using namespace aarch64
;
55 #define v8qi_UP E_V8QImode
56 #define v8di_UP E_V8DImode
57 #define v4hi_UP E_V4HImode
58 #define v4hf_UP E_V4HFmode
59 #define v2si_UP E_V2SImode
60 #define v2sf_UP E_V2SFmode
61 #define v1df_UP E_V1DFmode
62 #define v1di_UP E_V1DImode
63 #define di_UP E_DImode
64 #define df_UP E_DFmode
65 #define v16qi_UP E_V16QImode
66 #define v8hi_UP E_V8HImode
67 #define v8hf_UP E_V8HFmode
68 #define v4si_UP E_V4SImode
69 #define v4sf_UP E_V4SFmode
70 #define v2di_UP E_V2DImode
71 #define v2df_UP E_V2DFmode
72 #define ti_UP E_TImode
73 #define oi_UP E_OImode
74 #define ci_UP E_CImode
75 #define xi_UP E_XImode
76 #define si_UP E_SImode
77 #define sf_UP E_SFmode
78 #define hi_UP E_HImode
79 #define hf_UP E_HFmode
80 #define qi_UP E_QImode
81 #define bf_UP E_BFmode
82 #define v4bf_UP E_V4BFmode
83 #define v8bf_UP E_V8BFmode
84 #define v2x8qi_UP E_V2x8QImode
85 #define v2x4hi_UP E_V2x4HImode
86 #define v2x4hf_UP E_V2x4HFmode
87 #define v2x4bf_UP E_V2x4BFmode
88 #define v2x2si_UP E_V2x2SImode
89 #define v2x2sf_UP E_V2x2SFmode
90 #define v2x1di_UP E_V2x1DImode
91 #define v2x1df_UP E_V2x1DFmode
92 #define v2x16qi_UP E_V2x16QImode
93 #define v2x8hi_UP E_V2x8HImode
94 #define v2x8hf_UP E_V2x8HFmode
95 #define v2x8bf_UP E_V2x8BFmode
96 #define v2x4si_UP E_V2x4SImode
97 #define v2x4sf_UP E_V2x4SFmode
98 #define v2x2di_UP E_V2x2DImode
99 #define v2x2df_UP E_V2x2DFmode
100 #define v3x8qi_UP E_V3x8QImode
101 #define v3x4hi_UP E_V3x4HImode
102 #define v3x4hf_UP E_V3x4HFmode
103 #define v3x4bf_UP E_V3x4BFmode
104 #define v3x2si_UP E_V3x2SImode
105 #define v3x2sf_UP E_V3x2SFmode
106 #define v3x1di_UP E_V3x1DImode
107 #define v3x1df_UP E_V3x1DFmode
108 #define v3x16qi_UP E_V3x16QImode
109 #define v3x8hi_UP E_V3x8HImode
110 #define v3x8hf_UP E_V3x8HFmode
111 #define v3x8bf_UP E_V3x8BFmode
112 #define v3x4si_UP E_V3x4SImode
113 #define v3x4sf_UP E_V3x4SFmode
114 #define v3x2di_UP E_V3x2DImode
115 #define v3x2df_UP E_V3x2DFmode
116 #define v4x8qi_UP E_V4x8QImode
117 #define v4x4hi_UP E_V4x4HImode
118 #define v4x4hf_UP E_V4x4HFmode
119 #define v4x4bf_UP E_V4x4BFmode
120 #define v4x2si_UP E_V4x2SImode
121 #define v4x2sf_UP E_V4x2SFmode
122 #define v4x1di_UP E_V4x1DImode
123 #define v4x1df_UP E_V4x1DFmode
124 #define v4x16qi_UP E_V4x16QImode
125 #define v4x8hi_UP E_V4x8HImode
126 #define v4x8hf_UP E_V4x8HFmode
127 #define v4x8bf_UP E_V4x8BFmode
128 #define v4x4si_UP E_V4x4SImode
129 #define v4x4sf_UP E_V4x4SFmode
130 #define v4x2di_UP E_V4x2DImode
131 #define v4x2df_UP E_V4x2DFmode
134 #define MODE_d_bf16 E_V4BFmode
135 #define MODE_d_f16 E_V4HFmode
136 #define MODE_d_f32 E_V2SFmode
137 #define MODE_d_f64 E_V1DFmode
138 #define MODE_d_mf8 E_V8QImode
139 #define MODE_d_s8 E_V8QImode
140 #define MODE_d_s16 E_V4HImode
141 #define MODE_d_s32 E_V2SImode
142 #define MODE_d_s64 E_V1DImode
143 #define MODE_d_u8 E_V8QImode
144 #define MODE_d_u16 E_V4HImode
145 #define MODE_d_u32 E_V2SImode
146 #define MODE_d_u64 E_V1DImode
147 #define MODE_d_p8 E_V8QImode
148 #define MODE_d_p16 E_V4HImode
149 #define MODE_d_p64 E_V1DImode
150 #define MODE_q_bf16 E_V8BFmode
151 #define MODE_q_f16 E_V8HFmode
152 #define MODE_q_f32 E_V4SFmode
153 #define MODE_q_f64 E_V2DFmode
154 #define MODE_q_mf8 E_V16QImode
155 #define MODE_q_s8 E_V16QImode
156 #define MODE_q_s16 E_V8HImode
157 #define MODE_q_s32 E_V4SImode
158 #define MODE_q_s64 E_V2DImode
159 #define MODE_q_u8 E_V16QImode
160 #define MODE_q_u16 E_V8HImode
161 #define MODE_q_u32 E_V4SImode
162 #define MODE_q_u64 E_V2DImode
163 #define MODE_q_p8 E_V16QImode
164 #define MODE_q_p16 E_V8HImode
165 #define MODE_q_p64 E_V2DImode
166 #define MODE_q_p128 E_TImode
168 #define QUAL_bf16 qualifier_none
169 #define QUAL_f16 qualifier_none
170 #define QUAL_f32 qualifier_none
171 #define QUAL_f64 qualifier_none
172 #define QUAL_s8 qualifier_none
173 #define QUAL_s16 qualifier_none
174 #define QUAL_s32 qualifier_none
175 #define QUAL_s64 qualifier_none
176 #define QUAL_u8 qualifier_unsigned
177 #define QUAL_u16 qualifier_unsigned
178 #define QUAL_u32 qualifier_unsigned
179 #define QUAL_u64 qualifier_unsigned
180 #define QUAL_p8 qualifier_poly
181 #define QUAL_p16 qualifier_poly
182 #define QUAL_p64 qualifier_poly
183 #define QUAL_p128 qualifier_poly
184 #define QUAL_mf8 qualifier_modal_float
189 #define SIMD_INTR_MODE(suffix, length) MODE_##length##_##suffix
190 #define SIMD_INTR_QUAL(suffix) QUAL_##suffix
191 #define SIMD_INTR_LENGTH_CHAR(length) LENGTH_##length
193 #define SIMD_MAX_BUILTIN_ARGS 5
195 /* Flags that describe what a function might do. */
196 const unsigned int FLAG_READ_FPCR
= 1U << 0;
197 const unsigned int FLAG_RAISE_FP_EXCEPTIONS
= 1U << 1;
198 const unsigned int FLAG_READ_MEMORY
= 1U << 2;
199 const unsigned int FLAG_PREFETCH_MEMORY
= 1U << 3;
200 const unsigned int FLAG_WRITE_MEMORY
= 1U << 4;
201 const unsigned int FLAG_USES_FPMR
= 1U << 5;
203 /* Indicates that READ_FPCR and RAISE_FP_EXCEPTIONS should be set for
204 floating-point modes but not for integer modes. */
205 const unsigned int FLAG_AUTO_FP
= 1U << 6;
207 const unsigned int FLAG_QUIET
= 0;
208 const unsigned int FLAG_DEFAULT
= FLAG_AUTO_FP
;
209 const unsigned int FLAG_FP
= FLAG_READ_FPCR
| FLAG_RAISE_FP_EXCEPTIONS
;
210 const unsigned int FLAG_ALL
= FLAG_READ_FPCR
| FLAG_RAISE_FP_EXCEPTIONS
211 | FLAG_READ_MEMORY
| FLAG_PREFETCH_MEMORY
| FLAG_WRITE_MEMORY
;
212 const unsigned int FLAG_STORE
= FLAG_WRITE_MEMORY
;
213 const unsigned int FLAG_LOAD
= FLAG_READ_MEMORY
;
214 const unsigned int FLAG_FP8
= FLAG_FP
| FLAG_USES_FPMR
;
220 const enum insn_code code
;
222 enum aarch64_type_qualifiers
*qualifiers
;
224 } aarch64_simd_builtin_datum
;
226 static enum aarch64_type_qualifiers
227 aarch64_types_unop_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
228 = { qualifier_none
, qualifier_none
};
229 #define TYPES_UNOP (aarch64_types_unop_qualifiers)
230 static enum aarch64_type_qualifiers
231 aarch64_types_unopu_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
232 = { qualifier_unsigned
, qualifier_unsigned
};
233 #define TYPES_UNOPU (aarch64_types_unopu_qualifiers)
234 static enum aarch64_type_qualifiers
235 aarch64_types_unopus_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
236 = { qualifier_unsigned
, qualifier_none
};
237 #define TYPES_UNOPUS (aarch64_types_unopus_qualifiers)
238 static enum aarch64_type_qualifiers
239 aarch64_types_binop_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
240 = { qualifier_none
, qualifier_none
, qualifier_maybe_immediate
};
241 #define TYPES_BINOP (aarch64_types_binop_qualifiers)
242 static enum aarch64_type_qualifiers
243 aarch64_types_binopu_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
244 = { qualifier_unsigned
, qualifier_unsigned
, qualifier_unsigned
};
245 #define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
246 static enum aarch64_type_qualifiers
247 aarch64_types_binop_uus_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
248 = { qualifier_unsigned
, qualifier_unsigned
, qualifier_none
};
249 #define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers)
250 static enum aarch64_type_qualifiers
251 aarch64_types_binop_ssu_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
252 = { qualifier_none
, qualifier_none
, qualifier_unsigned
};
253 #define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
254 static enum aarch64_type_qualifiers
255 aarch64_types_binop_uss_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
256 = { qualifier_unsigned
, qualifier_none
, qualifier_none
};
257 #define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers)
258 static enum aarch64_type_qualifiers
259 aarch64_types_binopp_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
260 = { qualifier_poly
, qualifier_poly
, qualifier_poly
};
261 #define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
262 static enum aarch64_type_qualifiers
263 aarch64_types_binop_ppu_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
264 = { qualifier_poly
, qualifier_poly
, qualifier_unsigned
};
265 #define TYPES_BINOP_PPU (aarch64_types_binop_ppu_qualifiers)
267 static enum aarch64_type_qualifiers
268 aarch64_types_ternop_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
269 = { qualifier_none
, qualifier_none
, qualifier_none
, qualifier_none
};
270 #define TYPES_TERNOP (aarch64_types_ternop_qualifiers)
271 static enum aarch64_type_qualifiers
272 aarch64_types_ternop_lane_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
273 = { qualifier_none
, qualifier_none
, qualifier_none
, qualifier_lane_index
};
274 #define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers)
275 static enum aarch64_type_qualifiers
276 aarch64_types_ternopu_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
277 = { qualifier_unsigned
, qualifier_unsigned
,
278 qualifier_unsigned
, qualifier_unsigned
};
279 #define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers)
280 static enum aarch64_type_qualifiers
281 aarch64_types_ternopu_lane_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
282 = { qualifier_unsigned
, qualifier_unsigned
,
283 qualifier_unsigned
, qualifier_lane_index
};
284 #define TYPES_TERNOPU_LANE (aarch64_types_ternopu_lane_qualifiers)
285 static enum aarch64_type_qualifiers
286 aarch64_types_ternopu_imm_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
287 = { qualifier_unsigned
, qualifier_unsigned
,
288 qualifier_unsigned
, qualifier_immediate
};
289 #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
290 static enum aarch64_type_qualifiers
291 aarch64_types_ternop_sssu_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
292 = { qualifier_none
, qualifier_none
, qualifier_none
, qualifier_unsigned
};
293 #define TYPES_TERNOP_SSSU (aarch64_types_ternop_sssu_qualifiers)
294 static enum aarch64_type_qualifiers
295 aarch64_types_ternop_ssus_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
296 = { qualifier_none
, qualifier_none
, qualifier_unsigned
, qualifier_none
};
297 #define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers)
298 static enum aarch64_type_qualifiers
299 aarch64_types_ternop_suss_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
300 = { qualifier_none
, qualifier_unsigned
, qualifier_none
, qualifier_none
};
301 #define TYPES_TERNOP_SUSS (aarch64_types_ternop_suss_qualifiers)
302 static enum aarch64_type_qualifiers
303 aarch64_types_binop_pppu_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
304 = { qualifier_poly
, qualifier_poly
, qualifier_poly
, qualifier_unsigned
};
305 #define TYPES_TERNOP_PPPU (aarch64_types_binop_pppu_qualifiers)
307 static enum aarch64_type_qualifiers
308 aarch64_types_quadop_lane_pair_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
309 = { qualifier_none
, qualifier_none
, qualifier_none
,
310 qualifier_none
, qualifier_lane_pair_index
};
311 #define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers)
312 static enum aarch64_type_qualifiers
313 aarch64_types_quadop_lane_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
314 = { qualifier_none
, qualifier_none
, qualifier_none
,
315 qualifier_none
, qualifier_lane_index
};
316 #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers)
317 static enum aarch64_type_qualifiers
318 aarch64_types_quadopu_lane_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
319 = { qualifier_unsigned
, qualifier_unsigned
, qualifier_unsigned
,
320 qualifier_unsigned
, qualifier_lane_index
};
321 #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers)
323 static enum aarch64_type_qualifiers
324 aarch64_types_quadopssus_lane_quadtup_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
325 = { qualifier_none
, qualifier_none
, qualifier_unsigned
,
326 qualifier_none
, qualifier_lane_quadtup_index
};
327 #define TYPES_QUADOPSSUS_LANE_QUADTUP \
328 (aarch64_types_quadopssus_lane_quadtup_qualifiers)
329 static enum aarch64_type_qualifiers
330 aarch64_types_quadopsssu_lane_quadtup_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
331 = { qualifier_none
, qualifier_none
, qualifier_none
,
332 qualifier_unsigned
, qualifier_lane_quadtup_index
};
333 #define TYPES_QUADOPSSSU_LANE_QUADTUP \
334 (aarch64_types_quadopsssu_lane_quadtup_qualifiers)
336 static enum aarch64_type_qualifiers
337 aarch64_types_quadopu_imm_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
338 = { qualifier_unsigned
, qualifier_unsigned
, qualifier_unsigned
,
339 qualifier_unsigned
, qualifier_immediate
};
340 #define TYPES_QUADOPUI (aarch64_types_quadopu_imm_qualifiers)
342 static enum aarch64_type_qualifiers
343 aarch64_types_binop_imm_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
344 = { qualifier_none
, qualifier_none
, qualifier_immediate
};
345 #define TYPES_GETREG (aarch64_types_binop_imm_qualifiers)
346 #define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers)
347 static enum aarch64_type_qualifiers
348 aarch64_types_shift_to_unsigned_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
349 = { qualifier_unsigned
, qualifier_none
, qualifier_immediate
};
350 #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
351 static enum aarch64_type_qualifiers
352 aarch64_types_fcvt_from_unsigned_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
353 = { qualifier_none
, qualifier_unsigned
, qualifier_immediate
};
354 #define TYPES_FCVTIMM_SUS (aarch64_types_fcvt_from_unsigned_qualifiers)
355 static enum aarch64_type_qualifiers
356 aarch64_types_unsigned_shift_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
357 = { qualifier_unsigned
, qualifier_unsigned
, qualifier_immediate
};
358 #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
359 #define TYPES_USHIFT2IMM (aarch64_types_ternopu_imm_qualifiers)
360 static enum aarch64_type_qualifiers
361 aarch64_types_shift2_to_unsigned_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
362 = { qualifier_unsigned
, qualifier_unsigned
, qualifier_none
, qualifier_immediate
};
363 #define TYPES_SHIFT2IMM_UUSS (aarch64_types_shift2_to_unsigned_qualifiers)
365 static enum aarch64_type_qualifiers
366 aarch64_types_ternop_s_imm_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
367 = { qualifier_none
, qualifier_none
, qualifier_none
, qualifier_immediate
};
368 #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers)
369 #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers)
370 #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers)
371 #define TYPES_SHIFT2IMM (aarch64_types_ternop_s_imm_qualifiers)
373 static enum aarch64_type_qualifiers
374 aarch64_types_ternop_p_imm_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
375 = { qualifier_poly
, qualifier_poly
, qualifier_poly
, qualifier_immediate
};
376 #define TYPES_SHIFTINSERTP (aarch64_types_ternop_p_imm_qualifiers)
378 static enum aarch64_type_qualifiers
379 aarch64_types_unsigned_shiftacc_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
380 = { qualifier_unsigned
, qualifier_unsigned
, qualifier_unsigned
,
381 qualifier_immediate
};
382 #define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers)
384 static enum aarch64_type_qualifiers
385 aarch64_types_load1_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
386 = { qualifier_none
, qualifier_const_pointer_map_mode
};
387 #define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
388 #define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers)
389 static enum aarch64_type_qualifiers
390 aarch64_types_load1_u_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
391 = { qualifier_unsigned
, qualifier_const_pointer_map_mode
};
392 #define TYPES_LOAD1_U (aarch64_types_load1_u_qualifiers)
393 #define TYPES_LOADSTRUCT_U (aarch64_types_load1_u_qualifiers)
394 static enum aarch64_type_qualifiers
395 aarch64_types_load1_p_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
396 = { qualifier_poly
, qualifier_const_pointer_map_mode
};
397 #define TYPES_LOAD1_P (aarch64_types_load1_p_qualifiers)
398 #define TYPES_LOADSTRUCT_P (aarch64_types_load1_p_qualifiers)
400 static enum aarch64_type_qualifiers
401 aarch64_types_loadstruct_lane_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
402 = { qualifier_none
, qualifier_const_pointer_map_mode
,
403 qualifier_none
, qualifier_struct_load_store_lane_index
};
404 #define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers)
405 static enum aarch64_type_qualifiers
406 aarch64_types_loadstruct_lane_u_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
407 = { qualifier_unsigned
, qualifier_const_pointer_map_mode
,
408 qualifier_unsigned
, qualifier_struct_load_store_lane_index
};
409 #define TYPES_LOADSTRUCT_LANE_U (aarch64_types_loadstruct_lane_u_qualifiers)
410 static enum aarch64_type_qualifiers
411 aarch64_types_loadstruct_lane_p_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
412 = { qualifier_poly
, qualifier_const_pointer_map_mode
,
413 qualifier_poly
, qualifier_struct_load_store_lane_index
};
414 #define TYPES_LOADSTRUCT_LANE_P (aarch64_types_loadstruct_lane_p_qualifiers)
416 static enum aarch64_type_qualifiers
417 aarch64_types_bsl_p_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
418 = { qualifier_poly
, qualifier_unsigned
,
419 qualifier_poly
, qualifier_poly
};
420 #define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers)
421 static enum aarch64_type_qualifiers
422 aarch64_types_bsl_s_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
423 = { qualifier_none
, qualifier_unsigned
,
424 qualifier_none
, qualifier_none
};
425 #define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers)
426 static enum aarch64_type_qualifiers
427 aarch64_types_bsl_u_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
428 = { qualifier_unsigned
, qualifier_unsigned
,
429 qualifier_unsigned
, qualifier_unsigned
};
430 #define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers)
432 /* The first argument (return type) of a store should be void type,
433 which we represent with qualifier_void. Their first operand will be
434 a DImode pointer to the location to store to, so we must use
435 qualifier_map_mode | qualifier_pointer to build a pointer to the
436 element type of the vector. */
437 static enum aarch64_type_qualifiers
438 aarch64_types_store1_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
439 = { qualifier_void
, qualifier_pointer_map_mode
, qualifier_none
};
440 #define TYPES_STORE1 (aarch64_types_store1_qualifiers)
441 #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
442 static enum aarch64_type_qualifiers
443 aarch64_types_store1_u_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
444 = { qualifier_void
, qualifier_pointer_map_mode
, qualifier_unsigned
};
445 #define TYPES_STORE1_U (aarch64_types_store1_u_qualifiers)
446 #define TYPES_STORESTRUCT_U (aarch64_types_store1_u_qualifiers)
447 static enum aarch64_type_qualifiers
448 aarch64_types_store1_p_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
449 = { qualifier_void
, qualifier_pointer_map_mode
, qualifier_poly
};
450 #define TYPES_STORE1_P (aarch64_types_store1_p_qualifiers)
451 #define TYPES_STORESTRUCT_P (aarch64_types_store1_p_qualifiers)
453 static enum aarch64_type_qualifiers
454 aarch64_types_storestruct_lane_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
455 = { qualifier_void
, qualifier_pointer_map_mode
,
456 qualifier_none
, qualifier_struct_load_store_lane_index
};
457 #define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
458 static enum aarch64_type_qualifiers
459 aarch64_types_storestruct_lane_u_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
460 = { qualifier_void
, qualifier_pointer_map_mode
,
461 qualifier_unsigned
, qualifier_struct_load_store_lane_index
};
462 #define TYPES_STORESTRUCT_LANE_U (aarch64_types_storestruct_lane_u_qualifiers)
463 static enum aarch64_type_qualifiers
464 aarch64_types_storestruct_lane_p_qualifiers
[SIMD_MAX_BUILTIN_ARGS
]
465 = { qualifier_void
, qualifier_pointer_map_mode
,
466 qualifier_poly
, qualifier_struct_load_store_lane_index
};
467 #define TYPES_STORESTRUCT_LANE_P (aarch64_types_storestruct_lane_p_qualifiers)
469 constexpr insn_code CODE_FOR_aarch64_sdot_prodv8qi
470 = CODE_FOR_sdot_prodv2siv8qi
;
471 constexpr insn_code CODE_FOR_aarch64_udot_prodv8qi
472 = CODE_FOR_udot_prodv2siv8qi
;
473 constexpr insn_code CODE_FOR_aarch64_usdot_prodv8qi
474 = CODE_FOR_usdot_prodv2siv8qi
;
475 constexpr insn_code CODE_FOR_aarch64_sdot_prodv16qi
476 = CODE_FOR_sdot_prodv4siv16qi
;
477 constexpr insn_code CODE_FOR_aarch64_udot_prodv16qi
478 = CODE_FOR_udot_prodv4siv16qi
;
479 constexpr insn_code CODE_FOR_aarch64_usdot_prodv16qi
480 = CODE_FOR_usdot_prodv4siv16qi
;
482 #define CF0(N, X) CODE_FOR_aarch64_##N##X
483 #define CF1(N, X) CODE_FOR_##N##X##1
484 #define CF2(N, X) CODE_FOR_##N##X##2
485 #define CF3(N, X) CODE_FOR_##N##X##3
486 #define CF4(N, X) CODE_FOR_##N##X##4
487 #define CF10(N, X) CODE_FOR_##N##X
489 /* Define cascading VAR<N> macros that are used from
490 aarch64-builtin-iterators.h to iterate over modes. These definitions
491 will end up generating a number of VAR1 expansions and code later on in the
492 file should redefine VAR1 to whatever it needs to process on a per-mode
494 #define VAR2(T, N, MAP, FLAG, A, B) \
495 VAR1 (T, N, MAP, FLAG, A) \
496 VAR1 (T, N, MAP, FLAG, B)
497 #define VAR3(T, N, MAP, FLAG, A, B, C) \
498 VAR2 (T, N, MAP, FLAG, A, B) \
499 VAR1 (T, N, MAP, FLAG, C)
500 #define VAR4(T, N, MAP, FLAG, A, B, C, D) \
501 VAR3 (T, N, MAP, FLAG, A, B, C) \
502 VAR1 (T, N, MAP, FLAG, D)
503 #define VAR5(T, N, MAP, FLAG, A, B, C, D, E) \
504 VAR4 (T, N, MAP, FLAG, A, B, C, D) \
505 VAR1 (T, N, MAP, FLAG, E)
506 #define VAR6(T, N, MAP, FLAG, A, B, C, D, E, F) \
507 VAR5 (T, N, MAP, FLAG, A, B, C, D, E) \
508 VAR1 (T, N, MAP, FLAG, F)
509 #define VAR7(T, N, MAP, FLAG, A, B, C, D, E, F, G) \
510 VAR6 (T, N, MAP, FLAG, A, B, C, D, E, F) \
511 VAR1 (T, N, MAP, FLAG, G)
512 #define VAR8(T, N, MAP, FLAG, A, B, C, D, E, F, G, H) \
513 VAR7 (T, N, MAP, FLAG, A, B, C, D, E, F, G) \
514 VAR1 (T, N, MAP, FLAG, H)
515 #define VAR9(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I) \
516 VAR8 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H) \
517 VAR1 (T, N, MAP, FLAG, I)
518 #define VAR10(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J) \
519 VAR9 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I) \
520 VAR1 (T, N, MAP, FLAG, J)
521 #define VAR11(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K) \
522 VAR10 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J) \
523 VAR1 (T, N, MAP, FLAG, K)
524 #define VAR12(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L) \
525 VAR11 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K) \
526 VAR1 (T, N, MAP, FLAG, L)
527 #define VAR13(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M) \
528 VAR12 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L) \
529 VAR1 (T, N, MAP, FLAG, M)
530 #define VAR14(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
531 VAR13 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M) \
532 VAR1 (T, X, MAP, FLAG, N)
533 #define VAR15(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \
534 VAR14 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
535 VAR1 (T, X, MAP, FLAG, O)
536 #define VAR16(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
537 VAR15 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \
538 VAR1 (T, X, MAP, FLAG, P)
540 #include "aarch64-builtin-iterators.h"
542 /* The builtins below should be expanded through the standard optabs
543 CODE_FOR_[u]avg<mode>3_[floor,ceil]. However the mapping scheme in
544 aarch64-simd-builtins.def does not easily allow us to have a pre-mode
545 ("uavg") and post-mode string ("_ceil") in the CODE_FOR_* construction.
546 So the builtins use a name that is natural for AArch64 instructions
547 e.g. "aarch64_srhadd<mode>" and we re-map these to the optab-related
550 #define VAR1(F,T1,T2,I,M) \
551 constexpr insn_code CODE_FOR_aarch64_##F##M = CODE_FOR_##T1##M##3##T2;
553 BUILTIN_VDQ_BHSI (srhadd
, avg
, _ceil
, 0)
554 BUILTIN_VDQ_BHSI (urhadd
, uavg
, _ceil
, 0)
555 BUILTIN_VDQ_BHSI (shadd
, avg
, _floor
, 0)
556 BUILTIN_VDQ_BHSI (uhadd
, uavg
, _floor
, 0)
558 /* The builtins below should be expanded through the standard optabs
559 CODE_FOR_extend<mode><Vwide>2. */
561 #define VAR1(F,T,N,M) \
562 constexpr insn_code CODE_FOR_aarch64_##F##M = CODE_FOR_##T##N##M##2;
564 VAR1 (float_extend_lo_
, extend
, v2sf
, v2df
)
565 VAR1 (float_extend_lo_
, extend
, v4hf
, v4sf
)
567 /* __builtin_aarch64_float_truncate_lo_<mode> should be expanded through the
568 standard optabs CODE_FOR_trunc<Vwide><mode>2. */
569 constexpr insn_code CODE_FOR_aarch64_float_truncate_lo_v4hf
570 = CODE_FOR_truncv4sfv4hf2
;
571 constexpr insn_code CODE_FOR_aarch64_float_truncate_lo_v2sf
572 = CODE_FOR_truncv2dfv2sf2
;
575 #define VAR1(T, N, MAP, FLAG, A) \
576 {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T, FLAG_##FLAG},
578 static aarch64_simd_builtin_datum aarch64_simd_builtin_data
[] = {
579 #include "aarch64-simd-builtins.def"
582 /* There's only 8 CRC32 builtins. Probably not worth their own .def file. */
583 #define AARCH64_CRC32_BUILTINS \
584 CRC32_BUILTIN (crc32b, QI) \
585 CRC32_BUILTIN (crc32h, HI) \
586 CRC32_BUILTIN (crc32w, SI) \
587 CRC32_BUILTIN (crc32x, DI) \
588 CRC32_BUILTIN (crc32cb, QI) \
589 CRC32_BUILTIN (crc32ch, HI) \
590 CRC32_BUILTIN (crc32cw, SI) \
591 CRC32_BUILTIN (crc32cx, DI)
593 /* The next 8 FCMLA instrinsics require some special handling compared the
594 normal simd intrinsics. */
595 #define AARCH64_SIMD_FCMLA_LANEQ_BUILTINS \
596 FCMLA_LANEQ_BUILTIN (0, v2sf, fcmla, V2SF, false) \
597 FCMLA_LANEQ_BUILTIN (90, v2sf, fcmla, V2SF, false) \
598 FCMLA_LANEQ_BUILTIN (180, v2sf, fcmla, V2SF, false) \
599 FCMLA_LANEQ_BUILTIN (270, v2sf, fcmla, V2SF, false) \
600 FCMLA_LANEQ_BUILTIN (0, v4hf, fcmla_laneq, V4HF, true) \
601 FCMLA_LANEQ_BUILTIN (90, v4hf, fcmla_laneq, V4HF, true) \
602 FCMLA_LANEQ_BUILTIN (180, v4hf, fcmla_laneq, V4HF, true) \
603 FCMLA_LANEQ_BUILTIN (270, v4hf, fcmla_laneq, V4HF, true) \
606 /* vreinterpret intrinsics are defined for any pair of element types.
608 { _f16 _f32 _f64 } { _f16 _f32 _f64 }
610 { _s8 _s16 _s32 _s64 } x { _s8 _s16 _s32 _s64 }
611 { _u8 _u16 _u32 _u64 } { _u8 _u16 _u32 _u64 }
612 { _p8 _p16 _p64 } { _p8 _p16 _p64 }. */
613 #define VREINTERPRET_BUILTIN2(A, B) \
614 VREINTERPRET_BUILTIN (A, B, d)
616 #define VREINTERPRET_BUILTINS1(A) \
617 VREINTERPRET_BUILTIN2 (A, bf16) \
618 VREINTERPRET_BUILTIN2 (A, f16) \
619 VREINTERPRET_BUILTIN2 (A, f32) \
620 VREINTERPRET_BUILTIN2 (A, f64) \
621 VREINTERPRET_BUILTIN2 (A, mf8) \
622 VREINTERPRET_BUILTIN2 (A, s8) \
623 VREINTERPRET_BUILTIN2 (A, s16) \
624 VREINTERPRET_BUILTIN2 (A, s32) \
625 VREINTERPRET_BUILTIN2 (A, s64) \
626 VREINTERPRET_BUILTIN2 (A, u8) \
627 VREINTERPRET_BUILTIN2 (A, u16) \
628 VREINTERPRET_BUILTIN2 (A, u32) \
629 VREINTERPRET_BUILTIN2 (A, u64) \
630 VREINTERPRET_BUILTIN2 (A, p8) \
631 VREINTERPRET_BUILTIN2 (A, p16) \
632 VREINTERPRET_BUILTIN2 (A, p64)
634 #define VREINTERPRET_BUILTINS \
635 VREINTERPRET_BUILTINS1 (bf16) \
636 VREINTERPRET_BUILTINS1 (f16) \
637 VREINTERPRET_BUILTINS1 (f32) \
638 VREINTERPRET_BUILTINS1 (f64) \
639 VREINTERPRET_BUILTINS1 (mf8) \
640 VREINTERPRET_BUILTINS1 (s8) \
641 VREINTERPRET_BUILTINS1 (s16) \
642 VREINTERPRET_BUILTINS1 (s32) \
643 VREINTERPRET_BUILTINS1 (s64) \
644 VREINTERPRET_BUILTINS1 (u8) \
645 VREINTERPRET_BUILTINS1 (u16) \
646 VREINTERPRET_BUILTINS1 (u32) \
647 VREINTERPRET_BUILTINS1 (u64) \
648 VREINTERPRET_BUILTINS1 (p8) \
649 VREINTERPRET_BUILTINS1 (p16) \
650 VREINTERPRET_BUILTINS1 (p64)
652 /* vreinterpretq intrinsics are additionally defined for p128.
654 { _f16 _f32 _f64 } { _f16 _f32 _f64 }
656 { _s8 _s16 _s32 _s64 } x { _s8 _s16 _s32 _s64 }
657 { _u8 _u16 _u32 _u64 } { _u8 _u16 _u32 _u64 }
658 { _p8 _p16 _p64 _p128 } { _p8 _p16 _p64 _p128 }. */
659 #define VREINTERPRETQ_BUILTIN2(A, B) \
660 VREINTERPRET_BUILTIN (A, B, q)
662 #define VREINTERPRETQ_BUILTINS1(A) \
663 VREINTERPRETQ_BUILTIN2 (A, bf16) \
664 VREINTERPRETQ_BUILTIN2 (A, f16) \
665 VREINTERPRETQ_BUILTIN2 (A, f32) \
666 VREINTERPRETQ_BUILTIN2 (A, f64) \
667 VREINTERPRETQ_BUILTIN2 (A, mf8) \
668 VREINTERPRETQ_BUILTIN2 (A, s8) \
669 VREINTERPRETQ_BUILTIN2 (A, s16) \
670 VREINTERPRETQ_BUILTIN2 (A, s32) \
671 VREINTERPRETQ_BUILTIN2 (A, s64) \
672 VREINTERPRETQ_BUILTIN2 (A, u8) \
673 VREINTERPRETQ_BUILTIN2 (A, u16) \
674 VREINTERPRETQ_BUILTIN2 (A, u32) \
675 VREINTERPRETQ_BUILTIN2 (A, u64) \
676 VREINTERPRETQ_BUILTIN2 (A, p8) \
677 VREINTERPRETQ_BUILTIN2 (A, p16) \
678 VREINTERPRETQ_BUILTIN2 (A, p64) \
679 VREINTERPRETQ_BUILTIN2 (A, p128)
681 #define VREINTERPRETQ_BUILTINS \
682 VREINTERPRETQ_BUILTINS1 (bf16) \
683 VREINTERPRETQ_BUILTINS1 (f16) \
684 VREINTERPRETQ_BUILTINS1 (f32) \
685 VREINTERPRETQ_BUILTINS1 (f64) \
686 VREINTERPRETQ_BUILTINS1 (mf8) \
687 VREINTERPRETQ_BUILTINS1 (s8) \
688 VREINTERPRETQ_BUILTINS1 (s16) \
689 VREINTERPRETQ_BUILTINS1 (s32) \
690 VREINTERPRETQ_BUILTINS1 (s64) \
691 VREINTERPRETQ_BUILTINS1 (u8) \
692 VREINTERPRETQ_BUILTINS1 (u16) \
693 VREINTERPRETQ_BUILTINS1 (u32) \
694 VREINTERPRETQ_BUILTINS1 (u64) \
695 VREINTERPRETQ_BUILTINS1 (p8) \
696 VREINTERPRETQ_BUILTINS1 (p16) \
697 VREINTERPRETQ_BUILTINS1 (p64) \
698 VREINTERPRETQ_BUILTINS1 (p128)
700 #define AARCH64_SIMD_VREINTERPRET_BUILTINS \
701 VREINTERPRET_BUILTINS \
702 VREINTERPRETQ_BUILTINS
704 #define AARCH64_SIMD_VGET_LOW_BUILTINS \
705 VGET_LOW_BUILTIN(mf8) \
706 VGET_LOW_BUILTIN(f16) \
707 VGET_LOW_BUILTIN(f32) \
708 VGET_LOW_BUILTIN(f64) \
709 VGET_LOW_BUILTIN(p8) \
710 VGET_LOW_BUILTIN(p16) \
711 VGET_LOW_BUILTIN(p64) \
712 VGET_LOW_BUILTIN(s8) \
713 VGET_LOW_BUILTIN(s16) \
714 VGET_LOW_BUILTIN(s32) \
715 VGET_LOW_BUILTIN(s64) \
716 VGET_LOW_BUILTIN(u8) \
717 VGET_LOW_BUILTIN(u16) \
718 VGET_LOW_BUILTIN(u32) \
719 VGET_LOW_BUILTIN(u64) \
720 VGET_LOW_BUILTIN(bf16)
722 #define AARCH64_SIMD_VGET_HIGH_BUILTINS \
723 VGET_HIGH_BUILTIN(mf8) \
724 VGET_HIGH_BUILTIN(f16) \
725 VGET_HIGH_BUILTIN(f32) \
726 VGET_HIGH_BUILTIN(f64) \
727 VGET_HIGH_BUILTIN(p8) \
728 VGET_HIGH_BUILTIN(p16) \
729 VGET_HIGH_BUILTIN(p64) \
730 VGET_HIGH_BUILTIN(s8) \
731 VGET_HIGH_BUILTIN(s16) \
732 VGET_HIGH_BUILTIN(s32) \
733 VGET_HIGH_BUILTIN(s64) \
734 VGET_HIGH_BUILTIN(u8) \
735 VGET_HIGH_BUILTIN(u16) \
736 VGET_HIGH_BUILTIN(u32) \
737 VGET_HIGH_BUILTIN(u64) \
738 VGET_HIGH_BUILTIN(bf16)
744 const enum insn_code icode
;
746 } aarch64_crc_builtin_datum
;
748 /* Hold information about how to expand the FCMLA_LANEQ builtins. */
753 const enum insn_code icode
;
756 } aarch64_fcmla_laneq_builtin_datum
;
758 /* Hold information about how to declare SIMD intrinsics. */
763 unsigned int op_count
;
764 machine_mode op_modes
[SIMD_MAX_BUILTIN_ARGS
];
765 enum aarch64_type_qualifiers qualifiers
[SIMD_MAX_BUILTIN_ARGS
];
768 } aarch64_simd_intrinsic_datum
;
770 #define CRC32_BUILTIN(N, M) \
773 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
774 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M,
776 #define VREINTERPRET_BUILTIN(A, B, L) \
777 AARCH64_SIMD_BUILTIN_VREINTERPRET##L##_##A##_##B,
779 #define VGET_LOW_BUILTIN(A) \
780 AARCH64_SIMD_BUILTIN_VGET_LOW_##A,
782 #define VGET_HIGH_BUILTIN(A) \
783 AARCH64_SIMD_BUILTIN_VGET_HIGH_##A,
786 #define VAR1(T, N, MAP, FLAG, A) \
787 AARCH64_SIMD_BUILTIN_##T##_##N##A,
790 #define ENTRY(N, S, T0, T1, T2, T3, U, F) \
793 enum aarch64_builtins
797 AARCH64_BUILTIN_GET_FPCR
,
798 AARCH64_BUILTIN_SET_FPCR
,
799 AARCH64_BUILTIN_GET_FPSR
,
800 AARCH64_BUILTIN_SET_FPSR
,
802 AARCH64_BUILTIN_GET_FPCR64
,
803 AARCH64_BUILTIN_SET_FPCR64
,
804 AARCH64_BUILTIN_GET_FPSR64
,
805 AARCH64_BUILTIN_SET_FPSR64
,
807 AARCH64_BUILTIN_RSQRT_DF
,
808 AARCH64_BUILTIN_RSQRT_SF
,
809 AARCH64_BUILTIN_RSQRT_V2DF
,
810 AARCH64_BUILTIN_RSQRT_V2SF
,
811 AARCH64_BUILTIN_RSQRT_V4SF
,
812 AARCH64_SIMD_BUILTIN_BASE
,
813 AARCH64_SIMD_BUILTIN_LANE_CHECK
,
814 #include "aarch64-simd-builtins.def"
815 /* The first enum element which is based on an insn_data pattern. */
816 AARCH64_SIMD_PATTERN_START
= AARCH64_SIMD_BUILTIN_LANE_CHECK
+ 1,
817 AARCH64_SIMD_BUILTIN_MAX
= AARCH64_SIMD_PATTERN_START
818 + ARRAY_SIZE (aarch64_simd_builtin_data
) - 1,
819 AARCH64_CRC32_BUILTIN_BASE
,
820 AARCH64_CRC32_BUILTINS
821 AARCH64_CRC32_BUILTIN_MAX
,
822 /* SIMD intrinsic builtins. */
823 AARCH64_SIMD_VREINTERPRET_BUILTINS
824 AARCH64_SIMD_VGET_LOW_BUILTINS
825 AARCH64_SIMD_VGET_HIGH_BUILTINS
826 /* ARMv8.3-A Pointer Authentication Builtins. */
827 AARCH64_PAUTH_BUILTIN_AUTIA1716
,
828 AARCH64_PAUTH_BUILTIN_PACIA1716
,
829 AARCH64_PAUTH_BUILTIN_AUTIB1716
,
830 AARCH64_PAUTH_BUILTIN_PACIB1716
,
831 AARCH64_PAUTH_BUILTIN_XPACLRI
,
832 /* Special cased Armv8.3-A Complex FMA by Lane quad Builtins. */
833 AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE
,
834 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
835 /* Builtin for Arm8.3-a Javascript conversion instruction. */
838 AARCH64_TME_BUILTIN_TSTART
,
839 AARCH64_TME_BUILTIN_TCOMMIT
,
840 AARCH64_TME_BUILTIN_TTEST
,
841 AARCH64_TME_BUILTIN_TCANCEL
,
842 /* Armv8.5-a RNG instruction builtins. */
843 AARCH64_BUILTIN_RNG_RNDR
,
844 AARCH64_BUILTIN_RNG_RNDRRS
,
845 /* MEMTAG builtins. */
846 AARCH64_MEMTAG_BUILTIN_START
,
847 AARCH64_MEMTAG_BUILTIN_IRG
,
848 AARCH64_MEMTAG_BUILTIN_GMI
,
849 AARCH64_MEMTAG_BUILTIN_SUBP
,
850 AARCH64_MEMTAG_BUILTIN_INC_TAG
,
851 AARCH64_MEMTAG_BUILTIN_SET_TAG
,
852 AARCH64_MEMTAG_BUILTIN_GET_TAG
,
853 AARCH64_MEMTAG_BUILTIN_END
,
855 AARCH64_LS64_BUILTIN_LD64B
,
856 AARCH64_LS64_BUILTIN_ST64B
,
857 AARCH64_LS64_BUILTIN_ST64BV
,
858 AARCH64_LS64_BUILTIN_ST64BV0
,
865 /* Pragma builtins. */
866 AARCH64_PRAGMA_BUILTIN_START
,
867 #include "aarch64-simd-pragma-builtins.def"
868 AARCH64_PRAGMA_BUILTIN_END
,
869 /* System register builtins. */
886 /* Armv8.9-A / Armv9.4-A builtins. */
887 AARCH64_BUILTIN_CHKFEAT
,
888 AARCH64_BUILTIN_GCSPR
,
889 AARCH64_BUILTIN_GCSPOPM
,
890 AARCH64_BUILTIN_GCSSS
,
895 #define CRC32_BUILTIN(N, M) \
896 {"__builtin_aarch64_"#N, E_##M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N},
898 static aarch64_crc_builtin_datum aarch64_crc_builtin_data
[] = {
899 AARCH64_CRC32_BUILTINS
903 #undef FCMLA_LANEQ_BUILTIN
904 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
905 {"__builtin_aarch64_fcmla_laneq"#I#N, E_##M##mode, CODE_FOR_aarch64_##X##I##N, \
906 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, T},
908 /* This structure contains how to manage the mapping form the builtin to the
909 instruction to generate in the backend and how to invoke the instruction. */
910 static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data
[] = {
911 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
914 #undef VREINTERPRET_BUILTIN
915 #define VREINTERPRET_BUILTIN(A, B, L) \
916 {"vreinterpret" SIMD_INTR_LENGTH_CHAR(L) "_" #A "_" #B, \
917 AARCH64_SIMD_BUILTIN_VREINTERPRET##L##_##A##_##B, \
919 { SIMD_INTR_MODE(A, L), SIMD_INTR_MODE(B, L) }, \
920 { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(B) }, \
922 SIMD_INTR_MODE(A, L) == SIMD_INTR_MODE(B, L) \
923 && SIMD_INTR_QUAL(A) == SIMD_INTR_QUAL(B) \
926 #undef VGET_LOW_BUILTIN
927 #define VGET_LOW_BUILTIN(A) \
929 AARCH64_SIMD_BUILTIN_VGET_LOW_##A, \
931 { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \
932 { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \
937 #undef VGET_HIGH_BUILTIN
938 #define VGET_HIGH_BUILTIN(A) \
940 AARCH64_SIMD_BUILTIN_VGET_HIGH_##A, \
942 { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \
943 { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \
948 static const aarch64_simd_intrinsic_datum aarch64_simd_intrinsic_data
[] = {
949 AARCH64_SIMD_VREINTERPRET_BUILTINS
950 AARCH64_SIMD_VGET_LOW_BUILTINS
951 AARCH64_SIMD_VGET_HIGH_BUILTINS
957 static GTY(()) tree aarch64_builtin_decls
[AARCH64_BUILTIN_MAX
];
959 #define NUM_DREG_TYPES 6
960 #define NUM_QREG_TYPES 6
962 /* Internal scalar builtin types. These types are used to support
963 neon intrinsic builtins. They are _not_ user-visible types. Therefore
964 the mangling for these types are implementation defined. */
965 const char *aarch64_scalar_builtin_types
[] = {
966 "__builtin_aarch64_simd_qi",
967 "__builtin_aarch64_simd_hi",
968 "__builtin_aarch64_simd_si",
969 "__builtin_aarch64_simd_hf",
970 "__builtin_aarch64_simd_sf",
971 "__builtin_aarch64_simd_di",
972 "__builtin_aarch64_simd_df",
973 "__builtin_aarch64_simd_poly8",
974 "__builtin_aarch64_simd_poly16",
975 "__builtin_aarch64_simd_poly64",
976 "__builtin_aarch64_simd_poly128",
977 "__builtin_aarch64_simd_ti",
978 "__builtin_aarch64_simd_uqi",
979 "__builtin_aarch64_simd_uhi",
980 "__builtin_aarch64_simd_usi",
981 "__builtin_aarch64_simd_udi",
982 "__builtin_aarch64_simd_ei",
983 "__builtin_aarch64_simd_oi",
984 "__builtin_aarch64_simd_ci",
985 "__builtin_aarch64_simd_xi",
986 "__builtin_aarch64_simd_bf",
990 extern const aarch64_simd_type_info aarch64_simd_types
[];
991 extern GTY(()) aarch64_simd_type_info_trees aarch64_simd_types_trees
[];
994 #define ENTRY(E, M, Q, G) \
995 {E, "__" #E, #G "__" #E, E_##M##mode, qualifier_##Q},
996 const struct aarch64_simd_type_info aarch64_simd_types
[] = {
997 #include "aarch64-simd-builtin-types.def"
1001 struct aarch64_simd_type_info_trees
1002 aarch64_simd_types_trees
[ARRAY_SIZE (aarch64_simd_types
)];
1004 static machine_mode aarch64_simd_tuple_modes
[ARM_NEON_H_TYPES_LAST
][3];
1005 static GTY(()) tree aarch64_simd_tuple_types
[ARM_NEON_H_TYPES_LAST
][3];
1007 static GTY(()) tree aarch64_simd_intOI_type_node
= NULL_TREE
;
1008 static GTY(()) tree aarch64_simd_intCI_type_node
= NULL_TREE
;
1009 static GTY(()) tree aarch64_simd_intXI_type_node
= NULL_TREE
;
1011 /* The user-visible __mfp8 type, and a pointer to that type. Used
1012 across the back-end. */
1013 tree aarch64_mfp8_type_node
= NULL_TREE
;
1014 tree aarch64_mfp8_ptr_type_node
= NULL_TREE
;
1016 /* The user-visible __fp16 type, and a pointer to that type. Used
1017 across the back-end. */
1018 tree aarch64_fp16_type_node
= NULL_TREE
;
1019 tree aarch64_fp16_ptr_type_node
= NULL_TREE
;
1021 /* Back-end node type for brain float (bfloat) types. */
1022 tree aarch64_bf16_ptr_type_node
= NULL_TREE
;
1024 /* Wrapper around add_builtin_function. NAME is the name of the built-in
1025 function, TYPE is the function type, CODE is the function subcode
1026 (relative to AARCH64_BUILTIN_GENERAL), and ATTRS is the function
1029 aarch64_general_add_builtin (const char *name
, tree type
, unsigned int code
,
1030 tree attrs
= NULL_TREE
)
1032 code
= (code
<< AARCH64_BUILTIN_SHIFT
) | AARCH64_BUILTIN_GENERAL
;
1033 return add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
1038 aarch64_general_simulate_builtin (const char *name
, tree fntype
,
1040 tree attrs
= NULL_TREE
)
1042 code
= (code
<< AARCH64_BUILTIN_SHIFT
) | AARCH64_BUILTIN_GENERAL
;
1043 return simulate_builtin_function_decl (input_location
, name
, fntype
,
1048 aarch64_mangle_builtin_scalar_type (const_tree type
)
1052 while (aarch64_scalar_builtin_types
[i
] != NULL
)
1054 const char *name
= aarch64_scalar_builtin_types
[i
];
1056 if (TREE_CODE (TYPE_NAME (type
)) == TYPE_DECL
1057 && DECL_NAME (TYPE_NAME (type
))
1058 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type
))), name
))
1059 return aarch64_scalar_builtin_types
[i
];
1066 aarch64_mangle_builtin_vector_type (const_tree type
)
1068 tree attrs
= TYPE_ATTRIBUTES (type
);
1069 if (tree attr
= lookup_attribute ("Advanced SIMD type", attrs
))
1071 tree mangled_name
= TREE_VALUE (TREE_VALUE (attr
));
1072 return IDENTIFIER_POINTER (mangled_name
);
1079 aarch64_general_mangle_builtin_type (const_tree type
)
1082 /* Walk through all the AArch64 builtins types tables to filter out the
1084 if ((mangle
= aarch64_mangle_builtin_vector_type (type
))
1085 || (mangle
= aarch64_mangle_builtin_scalar_type (type
)))
1091 /* Helper function for aarch64_simd_builtin_type. */
1093 aarch64_int_or_fp_type (machine_mode mode
,
1094 enum aarch64_type_qualifiers qualifiers
)
1096 #define QUAL_TYPE(M) ((qualifiers & qualifier_unsigned) \
1097 ? unsigned_int##M##_type_node : int##M##_type_node);
1101 if (qualifiers
& qualifier_modal_float
)
1102 return aarch64_mfp8_type_node
;
1103 return QUAL_TYPE (QI
);
1105 return QUAL_TYPE (HI
);
1107 return QUAL_TYPE (SI
);
1109 return QUAL_TYPE (DI
);
1111 return QUAL_TYPE (TI
);
1113 return aarch64_simd_intOI_type_node
;
1115 return aarch64_simd_intCI_type_node
;
1117 return aarch64_simd_intXI_type_node
;
1119 return aarch64_fp16_type_node
;
1121 return float_type_node
;
1123 return double_type_node
;
1125 return bfloat16_type_node
;
1132 /* Helper function for aarch64_simd_builtin_type. */
1134 aarch64_lookup_simd_type_in_table (machine_mode mode
,
1135 enum aarch64_type_qualifiers qualifiers
)
1138 int nelts
= ARRAY_SIZE (aarch64_simd_types
);
1140 & (qualifier_poly
| qualifier_unsigned
| qualifier_modal_float
);
1142 for (i
= 0; i
< nelts
; i
++)
1144 if (aarch64_simd_types
[i
].mode
== mode
1145 && aarch64_simd_types
[i
].q
== q
)
1146 return aarch64_simd_types_trees
[i
].itype
;
1147 if (aarch64_simd_tuple_types
[i
][0] != NULL_TREE
)
1148 for (int j
= 0; j
< 3; j
++)
1149 if (aarch64_simd_tuple_modes
[i
][j
] == mode
1150 && aarch64_simd_types
[i
].q
== q
)
1151 return aarch64_simd_tuple_types
[i
][j
];
1157 /* Return a type for an operand with specified mode and qualifiers. */
1159 aarch64_simd_builtin_type (machine_mode mode
,
1160 enum aarch64_type_qualifiers qualifiers
)
1162 tree type
= NULL_TREE
;
1164 /* For pointers, we want a pointer to the basic type of the vector. */
1165 if ((qualifiers
& qualifier_pointer
) && VECTOR_MODE_P (mode
))
1166 mode
= GET_MODE_INNER (mode
);
1168 /* Non-poly scalar modes map to standard types not in the table. */
1169 if ((qualifiers
& qualifier_poly
) || VECTOR_MODE_P (mode
))
1170 type
= aarch64_lookup_simd_type_in_table (mode
, qualifiers
);
1172 type
= aarch64_int_or_fp_type (mode
, qualifiers
);
1174 gcc_assert (type
!= NULL_TREE
);
1176 /* Add qualifiers. */
1177 if (qualifiers
& qualifier_const
)
1178 type
= build_qualified_type (type
, TYPE_QUAL_CONST
);
1179 if (qualifiers
& qualifier_pointer
)
1180 type
= build_pointer_type (type
);
1186 aarch64_init_simd_builtin_types (void)
1189 int nelts
= ARRAY_SIZE (aarch64_simd_types
);
1192 /* Init all the element types built by the front-end. */
1193 aarch64_simd_types_trees
[Int8x8_t
].eltype
= intQI_type_node
;
1194 aarch64_simd_types_trees
[Int8x16_t
].eltype
= intQI_type_node
;
1195 aarch64_simd_types_trees
[Int16x4_t
].eltype
= intHI_type_node
;
1196 aarch64_simd_types_trees
[Int16x8_t
].eltype
= intHI_type_node
;
1197 aarch64_simd_types_trees
[Int32x2_t
].eltype
= intSI_type_node
;
1198 aarch64_simd_types_trees
[Int32x4_t
].eltype
= intSI_type_node
;
1199 aarch64_simd_types_trees
[Int64x1_t
].eltype
= intDI_type_node
;
1200 aarch64_simd_types_trees
[Int64x2_t
].eltype
= intDI_type_node
;
1201 aarch64_simd_types_trees
[Uint8x8_t
].eltype
= unsigned_intQI_type_node
;
1202 aarch64_simd_types_trees
[Uint8x16_t
].eltype
= unsigned_intQI_type_node
;
1203 aarch64_simd_types_trees
[Uint16x4_t
].eltype
= unsigned_intHI_type_node
;
1204 aarch64_simd_types_trees
[Uint16x8_t
].eltype
= unsigned_intHI_type_node
;
1205 aarch64_simd_types_trees
[Uint32x2_t
].eltype
= unsigned_intSI_type_node
;
1206 aarch64_simd_types_trees
[Uint32x4_t
].eltype
= unsigned_intSI_type_node
;
1207 aarch64_simd_types_trees
[Uint64x1_t
].eltype
= unsigned_intDI_type_node
;
1208 aarch64_simd_types_trees
[Uint64x2_t
].eltype
= unsigned_intDI_type_node
;
1210 /* Poly types are a world of their own. */
1211 aarch64_simd_types_trees
[Poly8_t
].eltype
1212 = aarch64_simd_types_trees
[Poly8_t
].itype
1213 = build_distinct_type_copy (unsigned_intQI_type_node
);
1214 /* Prevent front-ends from transforming Poly8_t arrays into string
1216 TYPE_STRING_FLAG (aarch64_simd_types_trees
[Poly8_t
].eltype
) = false;
1218 aarch64_simd_types_trees
[Poly16_t
].eltype
1219 = aarch64_simd_types_trees
[Poly16_t
].itype
1220 = build_distinct_type_copy (unsigned_intHI_type_node
);
1221 aarch64_simd_types_trees
[Poly64_t
].eltype
1222 = aarch64_simd_types_trees
[Poly64_t
].itype
1223 = build_distinct_type_copy (unsigned_intDI_type_node
);
1224 aarch64_simd_types_trees
[Poly128_t
].eltype
1225 = aarch64_simd_types_trees
[Poly128_t
].itype
1226 = build_distinct_type_copy (unsigned_intTI_type_node
);
1227 /* Init poly vector element types with scalar poly types. */
1228 aarch64_simd_types_trees
[Poly8x8_t
].eltype
1229 = aarch64_simd_types_trees
[Poly8_t
].itype
;
1230 aarch64_simd_types_trees
[Poly8x16_t
].eltype
1231 = aarch64_simd_types_trees
[Poly8_t
].itype
;
1232 aarch64_simd_types_trees
[Poly16x4_t
].eltype
1233 = aarch64_simd_types_trees
[Poly16_t
].itype
;
1234 aarch64_simd_types_trees
[Poly16x8_t
].eltype
1235 = aarch64_simd_types_trees
[Poly16_t
].itype
;
1236 aarch64_simd_types_trees
[Poly64x1_t
].eltype
1237 = aarch64_simd_types_trees
[Poly64_t
].itype
;
1238 aarch64_simd_types_trees
[Poly64x2_t
].eltype
1239 = aarch64_simd_types_trees
[Poly64_t
].itype
;
1241 /* Continue with standard types. */
1242 aarch64_simd_types_trees
[Float16x4_t
].eltype
= aarch64_fp16_type_node
;
1243 aarch64_simd_types_trees
[Float16x8_t
].eltype
= aarch64_fp16_type_node
;
1244 aarch64_simd_types_trees
[Float32x2_t
].eltype
= float_type_node
;
1245 aarch64_simd_types_trees
[Float32x4_t
].eltype
= float_type_node
;
1246 aarch64_simd_types_trees
[Float64x1_t
].eltype
= double_type_node
;
1247 aarch64_simd_types_trees
[Float64x2_t
].eltype
= double_type_node
;
1249 /* Init Bfloat vector types with underlying __bf16 type. */
1250 aarch64_simd_types_trees
[Bfloat16x4_t
].eltype
= bfloat16_type_node
;
1251 aarch64_simd_types_trees
[Bfloat16x8_t
].eltype
= bfloat16_type_node
;
1253 /* Init FP8 element types. */
1254 aarch64_simd_types_trees
[Mfloat8x8_t
].eltype
= aarch64_mfp8_type_node
;
1255 aarch64_simd_types_trees
[Mfloat8x16_t
].eltype
= aarch64_mfp8_type_node
;
1257 for (i
= 0; i
< nelts
; i
++)
1259 tree eltype
= aarch64_simd_types_trees
[i
].eltype
;
1260 machine_mode mode
= aarch64_simd_types
[i
].mode
;
1262 if (aarch64_simd_types_trees
[i
].itype
== NULL
)
1264 tree type
= build_vector_type (eltype
, GET_MODE_NUNITS (mode
));
1265 type
= build_distinct_type_copy (type
);
1266 SET_TYPE_STRUCTURAL_EQUALITY (type
);
1268 tree mangled_name
= get_identifier (aarch64_simd_types
[i
].mangle
);
1269 tree value
= tree_cons (NULL_TREE
, mangled_name
, NULL_TREE
);
1270 TYPE_ATTRIBUTES (type
)
1271 = tree_cons (get_identifier ("Advanced SIMD type"), value
,
1272 TYPE_ATTRIBUTES (type
));
1273 aarch64_simd_types_trees
[i
].itype
= type
;
1276 tdecl
= add_builtin_type (aarch64_simd_types
[i
].name
,
1277 aarch64_simd_types_trees
[i
].itype
);
1278 TYPE_NAME (aarch64_simd_types_trees
[i
].itype
) = tdecl
;
1281 #define AARCH64_BUILD_SIGNED_TYPE(mode) \
1282 make_signed_type (GET_MODE_PRECISION (mode));
1283 aarch64_simd_intOI_type_node
= AARCH64_BUILD_SIGNED_TYPE (OImode
);
1284 aarch64_simd_intCI_type_node
= AARCH64_BUILD_SIGNED_TYPE (CImode
);
1285 aarch64_simd_intXI_type_node
= AARCH64_BUILD_SIGNED_TYPE (XImode
);
1286 #undef AARCH64_BUILD_SIGNED_TYPE
1288 tdecl
= add_builtin_type
1289 ("__builtin_aarch64_simd_oi" , aarch64_simd_intOI_type_node
);
1290 TYPE_NAME (aarch64_simd_intOI_type_node
) = tdecl
;
1291 tdecl
= add_builtin_type
1292 ("__builtin_aarch64_simd_ci" , aarch64_simd_intCI_type_node
);
1293 TYPE_NAME (aarch64_simd_intCI_type_node
) = tdecl
;
1294 tdecl
= add_builtin_type
1295 ("__builtin_aarch64_simd_xi" , aarch64_simd_intXI_type_node
);
1296 TYPE_NAME (aarch64_simd_intXI_type_node
) = tdecl
;
1300 aarch64_init_simd_builtin_scalar_types (void)
1302 /* Define typedefs for all the standard scalar types. */
1303 (*lang_hooks
.types
.register_builtin_type
) (intQI_type_node
,
1304 "__builtin_aarch64_simd_qi");
1305 (*lang_hooks
.types
.register_builtin_type
) (intHI_type_node
,
1306 "__builtin_aarch64_simd_hi");
1307 (*lang_hooks
.types
.register_builtin_type
) (aarch64_fp16_type_node
,
1308 "__builtin_aarch64_simd_hf");
1309 (*lang_hooks
.types
.register_builtin_type
) (intSI_type_node
,
1310 "__builtin_aarch64_simd_si");
1311 (*lang_hooks
.types
.register_builtin_type
) (float_type_node
,
1312 "__builtin_aarch64_simd_sf");
1313 (*lang_hooks
.types
.register_builtin_type
) (intDI_type_node
,
1314 "__builtin_aarch64_simd_di");
1315 (*lang_hooks
.types
.register_builtin_type
) (double_type_node
,
1316 "__builtin_aarch64_simd_df");
1317 (*lang_hooks
.types
.register_builtin_type
) (unsigned_intQI_type_node
,
1318 "__builtin_aarch64_simd_poly8");
1319 (*lang_hooks
.types
.register_builtin_type
) (unsigned_intHI_type_node
,
1320 "__builtin_aarch64_simd_poly16");
1321 (*lang_hooks
.types
.register_builtin_type
) (unsigned_intDI_type_node
,
1322 "__builtin_aarch64_simd_poly64");
1323 (*lang_hooks
.types
.register_builtin_type
) (unsigned_intTI_type_node
,
1324 "__builtin_aarch64_simd_poly128");
1325 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
1326 "__builtin_aarch64_simd_ti");
1327 (*lang_hooks
.types
.register_builtin_type
) (bfloat16_type_node
,
1328 "__builtin_aarch64_simd_bf");
1329 /* Unsigned integer types for various mode sizes. */
1330 (*lang_hooks
.types
.register_builtin_type
) (unsigned_intQI_type_node
,
1331 "__builtin_aarch64_simd_uqi");
1332 (*lang_hooks
.types
.register_builtin_type
) (unsigned_intHI_type_node
,
1333 "__builtin_aarch64_simd_uhi");
1334 (*lang_hooks
.types
.register_builtin_type
) (unsigned_intSI_type_node
,
1335 "__builtin_aarch64_simd_usi");
1336 (*lang_hooks
.types
.register_builtin_type
) (unsigned_intDI_type_node
,
1337 "__builtin_aarch64_simd_udi");
1340 /* If MODE is a single Advanced SIMD vector, return the number of lanes in the
1341 vector. If MODE is an Advanced SIMD structure/tuple mode, return the number
1342 of lanes in a single vector. */
1344 aarch64_num_lanes (machine_mode mode
)
1346 unsigned int nregs
= targetm
.hard_regno_nregs (V0_REGNUM
, mode
);
1347 return exact_div (GET_MODE_NUNITS (mode
), nregs
).to_constant ();
1350 /* Return a set of FLAG_* flags derived from FLAGS
1351 that describe what a function with result MODE could do,
1352 taking the command-line flags into account. */
1354 aarch64_call_properties (unsigned int flags
, machine_mode mode
)
1356 if ((flags
& FLAG_AUTO_FP
) && FLOAT_MODE_P (mode
))
1359 /* -fno-trapping-math means that we can assume any FP exceptions
1360 are not user-visible. */
1361 if (!flag_trapping_math
)
1362 flags
&= ~FLAG_RAISE_FP_EXCEPTIONS
;
1367 /* Return true if calls to a function with flags F and mode MODE
1368 could modify some form of global state. */
1370 aarch64_modifies_global_state_p (unsigned int f
, machine_mode mode
)
1372 unsigned int flags
= aarch64_call_properties (f
, mode
);
1374 if (flags
& FLAG_RAISE_FP_EXCEPTIONS
)
1377 if (flags
& FLAG_PREFETCH_MEMORY
)
1380 return flags
& FLAG_WRITE_MEMORY
;
1383 /* Return true if calls to a function with flags F and mode MODE
1384 could read some form of global state. */
1386 aarch64_reads_global_state_p (unsigned int f
, machine_mode mode
)
1388 unsigned int flags
= aarch64_call_properties (f
, mode
);
1390 if (flags
& FLAG_READ_FPCR
)
1393 return flags
& FLAG_READ_MEMORY
;
1396 /* Return true if calls to a function with flags F and mode MODE
1397 could raise a signal. */
1399 aarch64_could_trap_p (unsigned int f
, machine_mode mode
)
1401 unsigned int flags
= aarch64_call_properties (f
, mode
);
1403 if (flags
& FLAG_RAISE_FP_EXCEPTIONS
)
1406 if (flags
& (FLAG_READ_MEMORY
| FLAG_WRITE_MEMORY
))
1412 /* Add attribute NAME to ATTRS. */
1414 aarch64_add_attribute (const char *name
, tree attrs
)
1416 return tree_cons (get_identifier (name
), NULL_TREE
, attrs
);
1419 /* Return the appropriate attributes for a function that has
1420 flags F and mode MODE. */
1422 aarch64_get_attributes (unsigned int f
, machine_mode mode
)
1424 tree attrs
= NULL_TREE
;
1426 if (!aarch64_modifies_global_state_p (f
, mode
))
1428 if (aarch64_reads_global_state_p (f
, mode
))
1429 attrs
= aarch64_add_attribute ("pure", attrs
);
1431 attrs
= aarch64_add_attribute ("const", attrs
);
1434 if (!flag_non_call_exceptions
|| !aarch64_could_trap_p (f
, mode
))
1435 attrs
= aarch64_add_attribute ("nothrow", attrs
);
1437 return aarch64_add_attribute ("leaf", attrs
);
1440 /* Due to the architecture not providing lane variant of the lane instructions
1441 for fcmla we can't use the standard simd builtin expansion code, but we
1442 still want the majority of the validation that would normally be done. */
1445 aarch64_init_fcmla_laneq_builtins (void)
1449 for (i
= 0; i
< ARRAY_SIZE (aarch64_fcmla_lane_builtin_data
); ++i
)
1451 aarch64_fcmla_laneq_builtin_datum
* d
1452 = &aarch64_fcmla_lane_builtin_data
[i
];
1453 tree argtype
= aarch64_simd_builtin_type (d
->mode
, qualifier_none
);
1454 machine_mode quadmode
= GET_MODE_2XWIDER_MODE (d
->mode
).require ();
1455 tree quadtype
= aarch64_simd_builtin_type (quadmode
, qualifier_none
);
1457 = aarch64_simd_builtin_type (SImode
, qualifier_lane_pair_index
);
1458 tree ftype
= build_function_type_list (argtype
, argtype
, argtype
,
1459 quadtype
, lanetype
, NULL_TREE
);
1460 tree attrs
= aarch64_get_attributes (FLAG_FP
, d
->mode
);
1462 = aarch64_general_add_builtin (d
->name
, ftype
, d
->fcode
, attrs
);
1464 aarch64_builtin_decls
[d
->fcode
] = fndecl
;
1469 aarch64_init_simd_intrinsics (void)
1473 for (i
= 0; i
< ARRAY_SIZE (aarch64_simd_intrinsic_data
); ++i
)
1475 auto d
= &aarch64_simd_intrinsic_data
[i
];
1480 tree return_type
= void_type_node
;
1481 tree args
= void_list_node
;
1483 for (int op_num
= d
->op_count
- 1; op_num
>= 0; op_num
--)
1485 machine_mode op_mode
= d
->op_modes
[op_num
];
1486 enum aarch64_type_qualifiers qualifiers
= d
->qualifiers
[op_num
];
1488 tree eltype
= aarch64_simd_builtin_type (op_mode
, qualifiers
);
1491 return_type
= eltype
;
1493 args
= tree_cons (NULL_TREE
, eltype
, args
);
1496 tree ftype
= build_function_type (return_type
, args
);
1497 tree attrs
= aarch64_get_attributes (d
->flags
, d
->op_modes
[0]);
1499 = (d
->fcode
<< AARCH64_BUILTIN_SHIFT
| AARCH64_BUILTIN_GENERAL
);
1500 tree fndecl
= simulate_builtin_function_decl (input_location
, d
->name
,
1501 ftype
, code
, NULL
, attrs
);
1502 aarch64_builtin_decls
[d
->fcode
] = fndecl
;
1507 aarch64_init_simd_builtin_functions (bool called_from_pragma
)
1509 unsigned int i
, fcode
= AARCH64_SIMD_PATTERN_START
;
1511 if (!called_from_pragma
)
1513 tree lane_check_fpr
= build_function_type_list (void_type_node
,
1518 /* aarch64_im_lane_boundsi should be leaf and nothrow as it
1519 is expanded as nop or will cause an user error. */
1520 tree attrs
= aarch64_add_attribute ("nothrow", NULL_TREE
);
1521 attrs
= aarch64_add_attribute ("leaf", attrs
);
1522 aarch64_builtin_decls
[AARCH64_SIMD_BUILTIN_LANE_CHECK
]
1523 = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
1525 AARCH64_SIMD_BUILTIN_LANE_CHECK
, attrs
);
1528 for (i
= 0; i
< ARRAY_SIZE (aarch64_simd_builtin_data
); i
++, fcode
++)
1530 bool print_type_signature_p
= false;
1531 char type_signature
[SIMD_MAX_BUILTIN_ARGS
+ 1] = { 0 };
1532 aarch64_simd_builtin_datum
*d
= &aarch64_simd_builtin_data
[i
];
1539 /* We must track two variables here. op_num is
1540 the operand number as in the RTL pattern. This is
1541 required to access the mode (e.g. V4SF mode) of the
1542 argument, from which the base type can be derived.
1543 arg_num is an index in to the qualifiers data, which
1544 gives qualifiers to the type (e.g. const unsigned).
1545 The reason these two variables may differ by one is the
1546 void return type. While all return types take the 0th entry
1547 in the qualifiers array, there is no operand for them in the
1549 int op_num
= insn_data
[d
->code
].n_operands
- 1;
1550 int arg_num
= d
->qualifiers
[0] & qualifier_void
1553 tree return_type
= void_type_node
, args
= void_list_node
;
1556 int struct_mode_args
= 0;
1557 for (int j
= op_num
; j
>= 0; j
--)
1559 machine_mode op_mode
= insn_data
[d
->code
].operand
[j
].mode
;
1560 if (aarch64_advsimd_struct_mode_p (op_mode
))
1564 if ((called_from_pragma
&& struct_mode_args
== 0)
1565 || (!called_from_pragma
&& struct_mode_args
> 0))
1568 /* Build a function type directly from the insn_data for this
1569 builtin. The build_function_type () function takes care of
1570 removing duplicates for us. */
1571 for (; op_num
>= 0; arg_num
--, op_num
--)
1573 machine_mode op_mode
= insn_data
[d
->code
].operand
[op_num
].mode
;
1574 enum aarch64_type_qualifiers qualifiers
= d
->qualifiers
[arg_num
];
1576 if (qualifiers
& qualifier_unsigned
)
1578 type_signature
[op_num
] = 'u';
1579 print_type_signature_p
= true;
1581 else if (qualifiers
& qualifier_poly
)
1583 type_signature
[op_num
] = 'p';
1584 print_type_signature_p
= true;
1587 type_signature
[op_num
] = 's';
1589 /* Some builtins have different user-facing types
1590 for certain arguments, encoded in d->mode. */
1591 if (qualifiers
& qualifier_map_mode
)
1594 eltype
= aarch64_simd_builtin_type (op_mode
, qualifiers
);
1596 /* If we have reached arg_num == 0, we are at a non-void
1597 return type. Otherwise, we are still processing
1600 return_type
= eltype
;
1602 args
= tree_cons (NULL_TREE
, eltype
, args
);
1605 ftype
= build_function_type (return_type
, args
);
1607 gcc_assert (ftype
!= NULL
);
1609 if (print_type_signature_p
)
1610 snprintf (namebuf
, sizeof (namebuf
), "__builtin_aarch64_%s_%s",
1611 d
->name
, type_signature
);
1613 snprintf (namebuf
, sizeof (namebuf
), "__builtin_aarch64_%s",
1616 tree attrs
= aarch64_get_attributes (d
->flags
, d
->mode
);
1618 if (called_from_pragma
)
1620 unsigned int raw_code
1621 = (fcode
<< AARCH64_BUILTIN_SHIFT
) | AARCH64_BUILTIN_GENERAL
;
1622 fndecl
= simulate_builtin_function_decl (input_location
, namebuf
,
1623 ftype
, raw_code
, NULL
,
1627 fndecl
= aarch64_general_add_builtin (namebuf
, ftype
, fcode
, attrs
);
1629 aarch64_builtin_decls
[fcode
] = fndecl
;
1633 enum class aarch64_builtin_signatures
1650 /* Pairs a machine mode with the information needed to turn it into a
1651 function argument type or return type. */
1653 tree
type () const { return aarch64_simd_builtin_type (mode
, qualifiers
); }
1654 unsigned nunits () const { return GET_MODE_NUNITS (mode
).to_constant (); }
1657 aarch64_type_qualifiers qualifiers
;
1660 namespace simd_types
{
1661 #define VARIANTS(BASE, D, Q, MODE, QUALIFIERS) \
1662 constexpr simd_type BASE { V##D##MODE, QUALIFIERS }; \
1663 constexpr simd_type BASE##x2 { V2x##D##MODE, QUALIFIERS }; \
1664 constexpr simd_type BASE##x3 { V3x##D##MODE, QUALIFIERS }; \
1665 constexpr simd_type BASE##x4 { V4x##D##MODE, QUALIFIERS }; \
1666 constexpr simd_type BASE##q { V##Q##MODE, QUALIFIERS }; \
1667 constexpr simd_type BASE##qx2 { V2x##Q##MODE, QUALIFIERS }; \
1668 constexpr simd_type BASE##qx3 { V3x##Q##MODE, QUALIFIERS }; \
1669 constexpr simd_type BASE##qx4 { V4x##Q##MODE, QUALIFIERS }; \
1670 constexpr simd_type BASE##_scalar { MODE, QUALIFIERS }; \
1671 constexpr simd_type BASE##_scalar_ptr \
1672 { MODE, aarch64_type_qualifiers (QUALIFIERS | qualifier_pointer) }; \
1673 constexpr simd_type BASE##_scalar_const_ptr \
1674 { MODE, aarch64_type_qualifiers (QUALIFIERS | qualifier_const_pointer) };
1676 VARIANTS (mf8
, 8, 16, QImode
, qualifier_modal_float
)
1677 VARIANTS (p8
, 8, 16, QImode
, qualifier_poly
)
1678 VARIANTS (s8
, 8, 16, QImode
, qualifier_none
)
1679 VARIANTS (u8
, 8, 16, QImode
, qualifier_unsigned
)
1681 VARIANTS (bf16
, 4, 8, BFmode
, qualifier_none
)
1682 VARIANTS (f16
, 4, 8, HFmode
, qualifier_none
)
1683 VARIANTS (p16
, 4, 8, HImode
, qualifier_poly
)
1684 VARIANTS (s16
, 4, 8, HImode
, qualifier_none
)
1685 VARIANTS (u16
, 4, 8, HImode
, qualifier_unsigned
)
1687 VARIANTS (f32
, 2, 4, SFmode
, qualifier_none
)
1688 VARIANTS (p32
, 2, 4, SImode
, qualifier_poly
)
1689 VARIANTS (s32
, 2, 4, SImode
, qualifier_none
)
1690 VARIANTS (u32
, 2, 4, SImode
, qualifier_unsigned
)
1692 VARIANTS (f64
, 1, 2, DFmode
, qualifier_none
)
1693 VARIANTS (p64
, 1, 2, DImode
, qualifier_poly
)
1694 VARIANTS (s64
, 1, 2, DImode
, qualifier_none
)
1695 VARIANTS (u64
, 1, 2, DImode
, qualifier_unsigned
)
1697 constexpr simd_type none
{ VOIDmode
, qualifier_none
};
1704 #define ENTRY(N, S, T0, T1, T2, T3, U, F) \
1705 {#N, aarch64_builtin_signatures::S, simd_types::T0, simd_types::T1, \
1706 simd_types::T2, simd_types::T3, U, \
1707 aarch64_required_extensions::REQUIRED_EXTENSIONS, FLAG_##F},
1709 /* Initialize pragma builtins. */
1711 struct aarch64_pragma_builtins_data
1714 aarch64_builtin_signatures signature
;
1717 aarch64_required_extensions required_extensions
;
1721 static aarch64_pragma_builtins_data aarch64_pragma_builtins
[] = {
1722 #include "aarch64-simd-pragma-builtins.def"
1725 /* Return the function type for BUILTIN_DATA. */
1727 aarch64_fntype (const aarch64_pragma_builtins_data
&builtin_data
)
1729 tree return_type
= NULL_TREE
;
1730 auto_vec
<tree
, 8> arg_types
;
1731 switch (builtin_data
.signature
)
1733 case aarch64_builtin_signatures::binary
:
1734 case aarch64_builtin_signatures::binary_lane
:
1735 case aarch64_builtin_signatures::load_lane
:
1736 return_type
= builtin_data
.types
[0].type ();
1737 for (int i
= 1; i
<= 2; ++i
)
1738 arg_types
.quick_push (builtin_data
.types
[i
].type ());
1741 case aarch64_builtin_signatures::binary_two_lanes
:
1742 /* binary_two_lanes has to be handled as a special case because indices
1743 interleave vectors. */
1744 return_type
= builtin_data
.types
[0].type ();
1745 arg_types
.quick_push (builtin_data
.types
[1].type ());
1746 arg_types
.quick_push (integer_type_node
);
1747 arg_types
.quick_push (builtin_data
.types
[2].type ());
1748 arg_types
.quick_push (integer_type_node
);
1751 case aarch64_builtin_signatures::load
:
1752 case aarch64_builtin_signatures::unary
:
1753 case aarch64_builtin_signatures::unary_lane
:
1754 return_type
= builtin_data
.types
[0].type ();
1755 arg_types
.quick_push (builtin_data
.types
[1].type ());
1758 case aarch64_builtin_signatures::store
:
1759 case aarch64_builtin_signatures::store_lane
:
1760 return_type
= void_type_node
;
1761 for (int i
= 0; i
<= 1; ++i
)
1762 arg_types
.quick_push (builtin_data
.types
[i
].type ());
1765 case aarch64_builtin_signatures::ternary
:
1766 case aarch64_builtin_signatures::ternary_lane
:
1767 return_type
= builtin_data
.types
[0].type ();
1768 for (int i
= 1; i
<= 3; ++i
)
1769 arg_types
.quick_push (builtin_data
.types
[i
].type ());
1772 switch (builtin_data
.signature
)
1774 case aarch64_builtin_signatures::binary_lane
:
1775 case aarch64_builtin_signatures::load_lane
:
1776 case aarch64_builtin_signatures::store_lane
:
1777 case aarch64_builtin_signatures::ternary_lane
:
1778 case aarch64_builtin_signatures::unary_lane
:
1779 arg_types
.quick_push (integer_type_node
);
1785 if (builtin_data
.flags
& FLAG_USES_FPMR
)
1786 arg_types
.quick_push (uint64_type_node
);
1787 return build_function_type_array (return_type
, arg_types
.length (),
1788 arg_types
.address ());
1791 /* Simulate function definitions for all of the builtins in
1792 aarch64_pragma_builtins. */
1794 aarch64_init_pragma_builtins ()
1796 for (size_t i
= 0; i
< ARRAY_SIZE (aarch64_pragma_builtins
); ++i
)
1798 auto data
= aarch64_pragma_builtins
[i
];
1799 auto fntype
= aarch64_fntype (data
);
1800 auto code
= AARCH64_PRAGMA_BUILTIN_START
+ i
+ 1;
1801 auto flag_mode
= data
.types
[0].mode
;
1802 auto attrs
= aarch64_get_attributes (data
.flags
, flag_mode
);
1803 aarch64_builtin_decls
[code
]
1804 = aarch64_general_simulate_builtin (data
.name
, fntype
, code
, attrs
);
1808 /* If the builtin function with code CODE has an entry in
1809 aarch64_pragma_builtins, return its entry, otherwise return null. */
1811 static const aarch64_pragma_builtins_data
*
1812 aarch64_get_pragma_builtin (int code
)
1814 if (!(code
> AARCH64_PRAGMA_BUILTIN_START
1815 && code
< AARCH64_PRAGMA_BUILTIN_END
))
1818 auto idx
= code
- (AARCH64_PRAGMA_BUILTIN_START
+ 1);
1819 return &aarch64_pragma_builtins
[idx
];
1822 /* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type
1823 indexed by TYPE_INDEX. */
1825 register_tuple_type (unsigned int num_vectors
, unsigned int type_index
)
1827 const aarch64_simd_type_info
*type
= &aarch64_simd_types
[type_index
];
1828 aarch64_simd_type_info_trees
*trees
= &aarch64_simd_types_trees
[type_index
];
1830 /* Synthesize the name of the user-visible vector tuple type. */
1831 const char *vector_type_name
= type
->name
;
1832 char tuple_type_name
[sizeof ("bfloat16x4x2_t")];
1833 snprintf (tuple_type_name
, sizeof (tuple_type_name
), "%.*sx%d_t",
1834 (int) strlen (vector_type_name
) - 4, vector_type_name
+ 2,
1836 tuple_type_name
[0] = TOLOWER (tuple_type_name
[0]);
1838 tree vector_type
= trees
->itype
;
1839 tree array_type
= build_array_type_nelts (vector_type
, num_vectors
);
1840 if (type
->mode
== DImode
)
1842 if (num_vectors
== 2)
1843 SET_TYPE_MODE (array_type
, V2x1DImode
);
1844 else if (num_vectors
== 3)
1845 SET_TYPE_MODE (array_type
, V3x1DImode
);
1846 else if (num_vectors
== 4)
1847 SET_TYPE_MODE (array_type
, V4x1DImode
);
1850 unsigned int alignment
1851 = known_eq (GET_MODE_SIZE (type
->mode
), 16) ? 128 : 64;
1852 machine_mode tuple_mode
= TYPE_MODE_RAW (array_type
);
1853 gcc_assert (VECTOR_MODE_P (tuple_mode
)
1854 && TYPE_MODE (array_type
) == tuple_mode
1855 && TYPE_ALIGN (array_type
) == alignment
);
1857 tree field
= build_decl (input_location
, FIELD_DECL
,
1858 get_identifier ("val"), array_type
);
1860 tree t
= lang_hooks
.types
.simulate_record_decl (input_location
,
1862 make_array_slice (&field
,
1864 gcc_assert (TYPE_MODE_RAW (t
) == TYPE_MODE (t
)
1865 && (flag_pack_struct
1866 || maximum_field_alignment
1867 || (TYPE_MODE_RAW (t
) == tuple_mode
1868 && TYPE_ALIGN (t
) == alignment
)));
1870 aarch64_simd_tuple_modes
[type_index
][num_vectors
- 2] = tuple_mode
;
1871 aarch64_simd_tuple_types
[type_index
][num_vectors
- 2] = t
;
1875 aarch64_scalar_builtin_type_p (aarch64_simd_type t
)
1877 return (t
== Poly8_t
|| t
== Poly16_t
|| t
== Poly64_t
|| t
== Poly128_t
);
1880 /* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD
1882 aarch64_simd_switcher::aarch64_simd_switcher (aarch64_feature_flags extra_flags
)
1883 : m_old_asm_isa_flags (aarch64_asm_isa_flags
),
1884 m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY
)
1886 /* Changing the ISA flags should be enough here. We shouldn't need to
1887 pay the compile-time cost of a full target switch. */
1888 global_options
.x_target_flags
&= ~MASK_GENERAL_REGS_ONLY
;
1889 aarch64_set_asm_isa_flags (AARCH64_FL_FP
| AARCH64_FL_SIMD
| extra_flags
);
1892 aarch64_simd_switcher::~aarch64_simd_switcher ()
1894 if (m_old_general_regs_only
)
1895 global_options
.x_target_flags
|= MASK_GENERAL_REGS_ONLY
;
1896 aarch64_set_asm_isa_flags (m_old_asm_isa_flags
);
1899 /* Implement #pragma GCC aarch64 "arm_neon.h".
1901 The types and functions defined here need to be available internally
1902 during LTO as well. */
1904 handle_arm_neon_h (void)
1906 aarch64_simd_switcher simd
;
1908 /* Register the AdvSIMD vector tuple types. */
1909 for (unsigned int i
= 0; i
< ARM_NEON_H_TYPES_LAST
; i
++)
1910 for (unsigned int count
= 2; count
<= 4; ++count
)
1911 if (!aarch64_scalar_builtin_type_p (aarch64_simd_types
[i
].type
))
1912 register_tuple_type (count
, i
);
1914 aarch64_init_simd_builtin_functions (true);
1915 aarch64_init_simd_intrinsics ();
1916 aarch64_init_pragma_builtins ();
1920 aarch64_init_simd_builtins (void)
1922 aarch64_init_simd_builtin_types ();
1924 /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
1925 Therefore we need to preserve the old __builtin scalar types. It can be
1926 removed once all the intrinsics become strongly typed using the qualifier
1928 aarch64_init_simd_builtin_scalar_types ();
1930 aarch64_init_simd_builtin_functions (false);
1932 handle_arm_neon_h ();
1934 /* Initialize the remaining fcmla_laneq intrinsics. */
1935 aarch64_init_fcmla_laneq_builtins ();
1939 aarch64_init_crc32_builtins ()
1941 tree usi_type
= aarch64_simd_builtin_type (SImode
, qualifier_unsigned
);
1944 for (i
= 0; i
< ARRAY_SIZE (aarch64_crc_builtin_data
); ++i
)
1946 aarch64_crc_builtin_datum
* d
= &aarch64_crc_builtin_data
[i
];
1947 tree argtype
= aarch64_simd_builtin_type (d
->mode
, qualifier_unsigned
);
1948 tree ftype
= build_function_type_list (usi_type
, usi_type
, argtype
, NULL_TREE
);
1949 tree attrs
= aarch64_get_attributes (FLAG_DEFAULT
, d
->mode
);
1951 = aarch64_general_add_builtin (d
->name
, ftype
, d
->fcode
, attrs
);
1953 aarch64_builtin_decls
[d
->fcode
] = fndecl
;
1957 /* Add builtins for reciprocal square root. */
1960 aarch64_init_builtin_rsqrt (void)
1965 tree V2SF_type_node
= build_vector_type (float_type_node
, 2);
1966 tree V2DF_type_node
= build_vector_type (double_type_node
, 2);
1967 tree V4SF_type_node
= build_vector_type (float_type_node
, 4);
1969 struct builtin_decls_data
1972 const char *builtin_name
;
1976 builtin_decls_data bdda
[] =
1978 { double_type_node
, "__builtin_aarch64_rsqrt_df", AARCH64_BUILTIN_RSQRT_DF
},
1979 { float_type_node
, "__builtin_aarch64_rsqrt_sf", AARCH64_BUILTIN_RSQRT_SF
},
1980 { V2DF_type_node
, "__builtin_aarch64_rsqrt_v2df", AARCH64_BUILTIN_RSQRT_V2DF
},
1981 { V2SF_type_node
, "__builtin_aarch64_rsqrt_v2sf", AARCH64_BUILTIN_RSQRT_V2SF
},
1982 { V4SF_type_node
, "__builtin_aarch64_rsqrt_v4sf", AARCH64_BUILTIN_RSQRT_V4SF
}
1985 builtin_decls_data
*bdd
= bdda
;
1986 builtin_decls_data
*bdd_end
= bdd
+ (ARRAY_SIZE (bdda
));
1988 for (; bdd
< bdd_end
; bdd
++)
1990 ftype
= build_function_type_list (bdd
->type_node
, bdd
->type_node
, NULL_TREE
);
1991 tree attrs
= aarch64_get_attributes (FLAG_FP
, TYPE_MODE (bdd
->type_node
));
1992 fndecl
= aarch64_general_add_builtin (bdd
->builtin_name
,
1993 ftype
, bdd
->function_code
, attrs
);
1994 aarch64_builtin_decls
[bdd
->function_code
] = fndecl
;
1998 /* Initialize the backend type that supports the user-visible __mfp8
1999 type and its relative pointer type. */
2002 aarch64_init_fp8_types (void)
2004 aarch64_mfp8_type_node
= make_unsigned_type (8);
2005 SET_TYPE_MODE (aarch64_mfp8_type_node
, QImode
);
2007 lang_hooks
.types
.register_builtin_type (aarch64_mfp8_type_node
, "__mfp8");
2008 aarch64_mfp8_ptr_type_node
= build_pointer_type (aarch64_mfp8_type_node
);
2011 /* Initialize the backend types that support the user-visible __fp16
2012 type, also initialize a pointer to that type, to be used when
2016 aarch64_init_fp16_types (void)
2018 aarch64_fp16_type_node
= make_node (REAL_TYPE
);
2019 TYPE_PRECISION (aarch64_fp16_type_node
) = 16;
2020 layout_type (aarch64_fp16_type_node
);
2022 (*lang_hooks
.types
.register_builtin_type
) (aarch64_fp16_type_node
, "__fp16");
2023 aarch64_fp16_ptr_type_node
= build_pointer_type (aarch64_fp16_type_node
);
2026 /* Initialize the backend REAL_TYPE type supporting bfloat types. */
2028 aarch64_init_bf16_types (void)
2030 lang_hooks
.types
.register_builtin_type (bfloat16_type_node
, "__bf16");
2031 aarch64_bf16_ptr_type_node
= build_pointer_type (bfloat16_type_node
);
2034 /* Pointer authentication builtins that will become NOP on legacy platform.
2035 Currently, these builtins are for internal use only (libgcc EH unwinder). */
2038 aarch64_init_pauth_hint_builtins (void)
2040 /* Pointer Authentication builtins. */
2041 tree ftype_pointer_auth
2042 = build_function_type_list (ptr_type_node
, ptr_type_node
,
2043 unsigned_intDI_type_node
, NULL_TREE
);
2044 tree ftype_pointer_strip
2045 = build_function_type_list (ptr_type_node
, ptr_type_node
, NULL_TREE
);
2047 aarch64_builtin_decls
[AARCH64_PAUTH_BUILTIN_AUTIA1716
]
2048 = aarch64_general_add_builtin ("__builtin_aarch64_autia1716",
2050 AARCH64_PAUTH_BUILTIN_AUTIA1716
);
2051 aarch64_builtin_decls
[AARCH64_PAUTH_BUILTIN_PACIA1716
]
2052 = aarch64_general_add_builtin ("__builtin_aarch64_pacia1716",
2054 AARCH64_PAUTH_BUILTIN_PACIA1716
);
2055 aarch64_builtin_decls
[AARCH64_PAUTH_BUILTIN_AUTIB1716
]
2056 = aarch64_general_add_builtin ("__builtin_aarch64_autib1716",
2058 AARCH64_PAUTH_BUILTIN_AUTIB1716
);
2059 aarch64_builtin_decls
[AARCH64_PAUTH_BUILTIN_PACIB1716
]
2060 = aarch64_general_add_builtin ("__builtin_aarch64_pacib1716",
2062 AARCH64_PAUTH_BUILTIN_PACIB1716
);
2063 aarch64_builtin_decls
[AARCH64_PAUTH_BUILTIN_XPACLRI
]
2064 = aarch64_general_add_builtin ("__builtin_aarch64_xpaclri",
2065 ftype_pointer_strip
,
2066 AARCH64_PAUTH_BUILTIN_XPACLRI
);
2069 /* Initialize the transactional memory extension (TME) builtins. */
2071 aarch64_init_tme_builtins (void)
2073 tree ftype_uint64_void
2074 = build_function_type_list (uint64_type_node
, NULL
);
2075 tree ftype_void_void
2076 = build_function_type_list (void_type_node
, NULL
);
2077 tree ftype_void_uint64
2078 = build_function_type_list (void_type_node
, uint64_type_node
, NULL
);
2080 aarch64_builtin_decls
[AARCH64_TME_BUILTIN_TSTART
]
2081 = aarch64_general_simulate_builtin ("__tstart", ftype_uint64_void
,
2082 AARCH64_TME_BUILTIN_TSTART
);
2083 aarch64_builtin_decls
[AARCH64_TME_BUILTIN_TTEST
]
2084 = aarch64_general_simulate_builtin ("__ttest", ftype_uint64_void
,
2085 AARCH64_TME_BUILTIN_TTEST
);
2086 aarch64_builtin_decls
[AARCH64_TME_BUILTIN_TCOMMIT
]
2087 = aarch64_general_simulate_builtin ("__tcommit", ftype_void_void
,
2088 AARCH64_TME_BUILTIN_TCOMMIT
);
2089 aarch64_builtin_decls
[AARCH64_TME_BUILTIN_TCANCEL
]
2090 = aarch64_general_simulate_builtin ("__tcancel", ftype_void_uint64
,
2091 AARCH64_TME_BUILTIN_TCANCEL
);
2094 /* Add builtins for Random Number instructions. */
2097 aarch64_init_rng_builtins (void)
2099 tree unsigned_ptr_type
2100 = build_pointer_type (get_typenode_from_name (UINT64_TYPE
));
2102 = build_function_type_list (integer_type_node
, unsigned_ptr_type
, NULL
);
2103 aarch64_builtin_decls
[AARCH64_BUILTIN_RNG_RNDR
]
2104 = aarch64_general_add_builtin ("__builtin_aarch64_rndr", ftype
,
2105 AARCH64_BUILTIN_RNG_RNDR
);
2106 aarch64_builtin_decls
[AARCH64_BUILTIN_RNG_RNDRRS
]
2107 = aarch64_general_add_builtin ("__builtin_aarch64_rndrrs", ftype
,
2108 AARCH64_BUILTIN_RNG_RNDRRS
);
2111 /* Add builtins for reading system register. */
2113 aarch64_init_rwsr_builtins (void)
2116 tree const_char_ptr_type
2117 = build_pointer_type (build_type_variant (char_type_node
, true, false));
2119 #define AARCH64_INIT_RWSR_BUILTINS_DECL(F, N, T) \
2120 aarch64_builtin_decls[AARCH64_##F] \
2121 = aarch64_general_add_builtin ("__builtin_aarch64_"#N, T, AARCH64_##F);
2124 = build_function_type_list (uint32_type_node
, const_char_ptr_type
, NULL
);
2125 AARCH64_INIT_RWSR_BUILTINS_DECL (RSR
, rsr
, fntype
);
2128 = build_function_type_list (ptr_type_node
, const_char_ptr_type
, NULL
);
2129 AARCH64_INIT_RWSR_BUILTINS_DECL (RSRP
, rsrp
, fntype
);
2132 = build_function_type_list (uint64_type_node
, const_char_ptr_type
, NULL
);
2133 AARCH64_INIT_RWSR_BUILTINS_DECL (RSR64
, rsr64
, fntype
);
2136 = build_function_type_list (float_type_node
, const_char_ptr_type
, NULL
);
2137 AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF
, rsrf
, fntype
);
2140 = build_function_type_list (double_type_node
, const_char_ptr_type
, NULL
);
2141 AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF64
, rsrf64
, fntype
);
2144 = build_function_type_list (uint128_type_node
, const_char_ptr_type
, NULL
);
2145 AARCH64_INIT_RWSR_BUILTINS_DECL (RSR128
, rsr128
, fntype
);
2148 = build_function_type_list (void_type_node
, const_char_ptr_type
,
2149 uint32_type_node
, NULL
);
2151 AARCH64_INIT_RWSR_BUILTINS_DECL (WSR
, wsr
, fntype
);
2154 = build_function_type_list (void_type_node
, const_char_ptr_type
,
2155 const_ptr_type_node
, NULL
);
2156 AARCH64_INIT_RWSR_BUILTINS_DECL (WSRP
, wsrp
, fntype
);
2159 = build_function_type_list (void_type_node
, const_char_ptr_type
,
2160 uint64_type_node
, NULL
);
2161 AARCH64_INIT_RWSR_BUILTINS_DECL (WSR64
, wsr64
, fntype
);
2164 = build_function_type_list (void_type_node
, const_char_ptr_type
,
2165 float_type_node
, NULL
);
2166 AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF
, wsrf
, fntype
);
2169 = build_function_type_list (void_type_node
, const_char_ptr_type
,
2170 double_type_node
, NULL
);
2171 AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF64
, wsrf64
, fntype
);
2174 = build_function_type_list (void_type_node
, const_char_ptr_type
,
2175 uint128_type_node
, NULL
);
2176 AARCH64_INIT_RWSR_BUILTINS_DECL (WSR128
, wsr128
, fntype
);
2179 /* Add builtins for data and instrution prefetch. */
2181 aarch64_init_prefetch_builtin (void)
2183 #define AARCH64_INIT_PREFETCH_BUILTIN(INDEX, N) \
2184 aarch64_builtin_decls[INDEX] = \
2185 aarch64_general_add_builtin ("__builtin_aarch64_" N, ftype, INDEX, \
2190 tree prefetch_attrs
= aarch64_get_attributes (FLAG_PREFETCH_MEMORY
, DImode
);
2191 cv_argtype
= build_qualified_type (void_type_node
, TYPE_QUAL_CONST
2192 | TYPE_QUAL_VOLATILE
);
2193 cv_argtype
= build_pointer_type (cv_argtype
);
2195 ftype
= build_function_type_list (void_type_node
, cv_argtype
, NULL
);
2196 AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLD
, "pld");
2197 AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLI
, "pli");
2199 ftype
= build_function_type_list (void_type_node
, unsigned_type_node
,
2200 unsigned_type_node
, unsigned_type_node
,
2202 AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLDX
, "pldx");
2204 ftype
= build_function_type_list (void_type_node
, unsigned_type_node
,
2205 unsigned_type_node
, cv_argtype
, NULL
);
2206 AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLIX
, "plix");
2209 /* Initialize the memory tagging extension (MTE) builtins. */
2210 static GTY(()) struct GTY(())
2213 enum insn_code icode
;
2214 } aarch64_memtag_builtin_data
[AARCH64_MEMTAG_BUILTIN_END
-
2215 AARCH64_MEMTAG_BUILTIN_START
- 1];
2218 aarch64_init_memtag_builtins (void)
2222 #define AARCH64_INIT_MEMTAG_BUILTINS_DECL(F, N, I, T) \
2223 aarch64_builtin_decls[AARCH64_MEMTAG_BUILTIN_##F] \
2224 = aarch64_general_simulate_builtin ("__arm_mte_"#N, T, \
2225 AARCH64_MEMTAG_BUILTIN_##F); \
2226 aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_##F - \
2227 AARCH64_MEMTAG_BUILTIN_START - 1] = \
2230 fntype
= build_function_type_list (ptr_type_node
, ptr_type_node
,
2231 uint64_type_node
, NULL
);
2232 AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG
, create_random_tag
, irg
, fntype
);
2234 fntype
= build_function_type_list (uint64_type_node
, ptr_type_node
,
2235 uint64_type_node
, NULL
);
2236 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI
, exclude_tag
, gmi
, fntype
);
2238 fntype
= build_function_type_list (ptrdiff_type_node
, ptr_type_node
,
2239 ptr_type_node
, NULL
);
2240 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP
, ptrdiff
, subp
, fntype
);
2242 fntype
= build_function_type_list (ptr_type_node
, ptr_type_node
,
2243 unsigned_type_node
, NULL
);
2244 AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG
, increment_tag
, addg
, fntype
);
2246 fntype
= build_function_type_list (void_type_node
, ptr_type_node
, NULL
);
2247 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SET_TAG
, set_tag
, stg
, fntype
);
2249 fntype
= build_function_type_list (ptr_type_node
, ptr_type_node
, NULL
);
2250 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GET_TAG
, get_tag
, ldg
, fntype
);
2252 #undef AARCH64_INIT_MEMTAG_BUILTINS_DECL
2255 /* Add builtins for Load/store 64 Byte instructions. */
2262 } ls64_builtins_data
;
2264 static GTY(()) tree ls64_arm_data_t
= NULL_TREE
;
2267 aarch64_init_ls64_builtins_types (void)
2273 } __arm_data512_t; */
2274 const char *tuple_type_name
= "__arm_data512_t";
2275 tree node_type
= get_typenode_from_name (UINT64_TYPE
);
2276 tree array_type
= build_array_type_nelts (node_type
, 8);
2277 SET_TYPE_MODE (array_type
, V8DImode
);
2279 gcc_assert (TYPE_MODE_RAW (array_type
) == TYPE_MODE (array_type
));
2280 gcc_assert (TYPE_ALIGN (array_type
) == 64);
2282 tree field
= build_decl (input_location
, FIELD_DECL
,
2283 get_identifier ("val"), array_type
);
2285 ls64_arm_data_t
= lang_hooks
.types
.simulate_record_decl (input_location
,
2287 make_array_slice (&field
, 1));
2289 gcc_assert (TYPE_MODE (ls64_arm_data_t
) == V8DImode
);
2290 gcc_assert (TYPE_MODE_RAW (ls64_arm_data_t
) == TYPE_MODE (ls64_arm_data_t
));
2291 gcc_assert (TYPE_ALIGN (ls64_arm_data_t
) == 64);
2295 aarch64_init_ls64_builtins (void)
2297 aarch64_init_ls64_builtins_types ();
2299 ls64_builtins_data data
[4] = {
2300 {"__arm_ld64b", AARCH64_LS64_BUILTIN_LD64B
,
2301 build_function_type_list (ls64_arm_data_t
,
2302 const_ptr_type_node
, NULL_TREE
)},
2303 {"__arm_st64b", AARCH64_LS64_BUILTIN_ST64B
,
2304 build_function_type_list (void_type_node
, ptr_type_node
,
2305 ls64_arm_data_t
, NULL_TREE
)},
2306 {"__arm_st64bv", AARCH64_LS64_BUILTIN_ST64BV
,
2307 build_function_type_list (uint64_type_node
, ptr_type_node
,
2308 ls64_arm_data_t
, NULL_TREE
)},
2309 {"__arm_st64bv0", AARCH64_LS64_BUILTIN_ST64BV0
,
2310 build_function_type_list (uint64_type_node
, ptr_type_node
,
2311 ls64_arm_data_t
, NULL_TREE
)},
2314 for (size_t i
= 0; i
< ARRAY_SIZE (data
); ++i
)
2315 aarch64_builtin_decls
[data
[i
].code
]
2316 = aarch64_general_simulate_builtin (data
[i
].name
, data
[i
].type
,
2321 aarch64_init_data_intrinsics (void)
2323 /* These intrinsics are not fp nor they read/write memory. */
2324 tree attrs
= aarch64_get_attributes (FLAG_DEFAULT
, SImode
);
2325 tree uint32_fntype
= build_function_type_list (uint32_type_node
,
2326 uint32_type_node
, NULL_TREE
);
2327 tree ulong_fntype
= build_function_type_list (long_unsigned_type_node
,
2328 long_unsigned_type_node
,
2330 tree uint64_fntype
= build_function_type_list (uint64_type_node
,
2331 uint64_type_node
, NULL_TREE
);
2332 aarch64_builtin_decls
[AARCH64_REV16
]
2333 = aarch64_general_add_builtin ("__builtin_aarch64_rev16", uint32_fntype
,
2334 AARCH64_REV16
, attrs
);
2335 aarch64_builtin_decls
[AARCH64_REV16L
]
2336 = aarch64_general_add_builtin ("__builtin_aarch64_rev16l", ulong_fntype
,
2337 AARCH64_REV16L
, attrs
);
2338 aarch64_builtin_decls
[AARCH64_REV16LL
]
2339 = aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", uint64_fntype
,
2340 AARCH64_REV16LL
, attrs
);
2341 aarch64_builtin_decls
[AARCH64_RBIT
]
2342 = aarch64_general_add_builtin ("__builtin_aarch64_rbit", uint32_fntype
,
2343 AARCH64_RBIT
, attrs
);
2344 aarch64_builtin_decls
[AARCH64_RBITL
]
2345 = aarch64_general_add_builtin ("__builtin_aarch64_rbitl", ulong_fntype
,
2346 AARCH64_RBITL
, attrs
);
2347 aarch64_builtin_decls
[AARCH64_RBITLL
]
2348 = aarch64_general_add_builtin ("__builtin_aarch64_rbitll", uint64_fntype
,
2349 AARCH64_RBITLL
, attrs
);
2352 /* Implement #pragma GCC aarch64 "arm_acle.h". */
2354 handle_arm_acle_h (void)
2356 aarch64_init_ls64_builtins ();
2357 aarch64_init_tme_builtins ();
2358 aarch64_init_memtag_builtins ();
2361 /* Initialize fpsr fpcr getters and setters. */
2364 aarch64_init_fpsr_fpcr_builtins (void)
2367 = build_function_type_list (void_type_node
, unsigned_type_node
, NULL
);
2369 = build_function_type_list (unsigned_type_node
, NULL
);
2371 aarch64_builtin_decls
[AARCH64_BUILTIN_GET_FPCR
]
2372 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr",
2374 AARCH64_BUILTIN_GET_FPCR
);
2375 aarch64_builtin_decls
[AARCH64_BUILTIN_SET_FPCR
]
2376 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr",
2378 AARCH64_BUILTIN_SET_FPCR
);
2379 aarch64_builtin_decls
[AARCH64_BUILTIN_GET_FPSR
]
2380 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr",
2382 AARCH64_BUILTIN_GET_FPSR
);
2383 aarch64_builtin_decls
[AARCH64_BUILTIN_SET_FPSR
]
2384 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr",
2386 AARCH64_BUILTIN_SET_FPSR
);
2389 = build_function_type_list (void_type_node
, long_long_unsigned_type_node
,
2392 = build_function_type_list (long_long_unsigned_type_node
, NULL
);
2394 aarch64_builtin_decls
[AARCH64_BUILTIN_GET_FPCR64
]
2395 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr64",
2397 AARCH64_BUILTIN_GET_FPCR64
);
2398 aarch64_builtin_decls
[AARCH64_BUILTIN_SET_FPCR64
]
2399 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr64",
2401 AARCH64_BUILTIN_SET_FPCR64
);
2402 aarch64_builtin_decls
[AARCH64_BUILTIN_GET_FPSR64
]
2403 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr64",
2405 AARCH64_BUILTIN_GET_FPSR64
);
2406 aarch64_builtin_decls
[AARCH64_BUILTIN_SET_FPSR64
]
2407 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr64",
2409 AARCH64_BUILTIN_SET_FPSR64
);
2412 /* Add builtins for Guarded Control Stack instructions. */
2415 aarch64_init_gcs_builtins (void)
2419 ftype
= build_function_type_list (ptr_type_node
, NULL
);
2420 aarch64_builtin_decls
[AARCH64_BUILTIN_GCSPR
]
2421 = aarch64_general_add_builtin ("__builtin_aarch64_gcspr", ftype
,
2422 AARCH64_BUILTIN_GCSPR
);
2424 ftype
= build_function_type_list (uint64_type_node
, NULL
);
2425 aarch64_builtin_decls
[AARCH64_BUILTIN_GCSPOPM
]
2426 = aarch64_general_add_builtin ("__builtin_aarch64_gcspopm", ftype
,
2427 AARCH64_BUILTIN_GCSPOPM
);
2429 ftype
= build_function_type_list (ptr_type_node
, ptr_type_node
, NULL
);
2430 aarch64_builtin_decls
[AARCH64_BUILTIN_GCSSS
]
2431 = aarch64_general_add_builtin ("__builtin_aarch64_gcsss", ftype
,
2432 AARCH64_BUILTIN_GCSSS
);
2435 /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */
2438 aarch64_general_init_builtins (void)
2440 aarch64_init_fpsr_fpcr_builtins ();
2442 aarch64_init_fp8_types ();
2444 aarch64_init_fp16_types ();
2446 aarch64_init_bf16_types ();
2449 aarch64_simd_switcher simd
;
2450 aarch64_init_simd_builtins ();
2453 aarch64_init_crc32_builtins ();
2454 aarch64_init_builtin_rsqrt ();
2455 aarch64_init_rng_builtins ();
2456 aarch64_init_data_intrinsics ();
2458 aarch64_init_rwsr_builtins ();
2459 aarch64_init_prefetch_builtin ();
2462 = build_function_type_list (intSI_type_node
, double_type_node
, NULL
);
2463 aarch64_builtin_decls
[AARCH64_JSCVT
]
2464 = aarch64_general_add_builtin ("__builtin_aarch64_jcvtzs", ftype_jcvt
,
2467 /* Initialize pointer authentication builtins which are backed by instructions
2468 in NOP encoding space.
2470 NOTE: these builtins are supposed to be used by libgcc unwinder only, as
2471 there is no support on return address signing under ILP32, we don't
2474 aarch64_init_pauth_hint_builtins ();
2477 = build_function_type_list (uint64_type_node
, uint64_type_node
, NULL
);
2478 aarch64_builtin_decls
[AARCH64_BUILTIN_CHKFEAT
]
2479 = aarch64_general_add_builtin ("__builtin_aarch64_chkfeat", ftype_chkfeat
,
2480 AARCH64_BUILTIN_CHKFEAT
);
2482 aarch64_init_gcs_builtins ();
2485 handle_arm_acle_h ();
2488 /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group. */
2490 aarch64_general_builtin_decl (unsigned code
, bool)
2492 if (code
>= AARCH64_BUILTIN_MAX
)
2493 return error_mark_node
;
2495 return aarch64_builtin_decls
[code
];
2498 /* True if we've already complained about attempts to use functions
2499 when the required extension is disabled. */
2500 static bool reported_missing_extension_p
;
2502 /* True if we've already complained about attempts to use functions
2503 which require registers that are missing. */
2504 static bool reported_missing_registers_p
;
2506 /* Report an error against LOCATION that the user has tried to use
2507 function FNDECL when extension EXTENSION is disabled. */
2509 aarch64_report_missing_extension (location_t location
, tree fndecl
,
2510 const char *extension
)
2512 /* Avoid reporting a slew of messages for a single oversight. */
2513 if (reported_missing_extension_p
)
2516 error_at (location
, "ACLE function %qD requires ISA extension %qs",
2518 inform (location
, "you can enable %qs using the command-line"
2519 " option %<-march%>, or by using the %<target%>"
2520 " attribute or pragma", extension
);
2521 reported_missing_extension_p
= true;
2524 /* Report an error against LOCATION that the user has tried to use
2525 function FNDECL when non-general registers are disabled. */
2527 aarch64_report_missing_registers (location_t location
, tree fndecl
)
2529 /* Avoid reporting a slew of messages for a single oversight. */
2530 if (reported_missing_registers_p
)
2534 "ACLE function %qD is incompatible with the use of %qs",
2535 fndecl
, "-mgeneral-regs-only");
2536 reported_missing_registers_p
= true;
2539 /* Check whether the requirements in REQUIRED_EXTENSIONS are met, given that
2540 those requirements come from calling function FNDECL. Report an error
2541 against LOCATION if not. */
2543 aarch64_check_required_extensions (location_t location
, tree fndecl
,
2544 aarch64_required_extensions
2545 required_extensions
)
2547 aarch64_feature_flags sm_state_extensions
= 0;
2548 if (!TARGET_STREAMING
)
2550 if (required_extensions
.sm_off
== 0)
2552 error_at (location
, "ACLE function %qD can only be called when"
2553 " SME streaming mode is enabled", fndecl
);
2556 sm_state_extensions
|= required_extensions
.sm_off
& ~AARCH64_FL_SM_OFF
;
2558 if (!TARGET_NON_STREAMING
)
2560 if (required_extensions
.sm_on
== 0)
2562 error_at (location
, "ACLE function %qD cannot be called when"
2563 " SME streaming mode is enabled", fndecl
);
2566 sm_state_extensions
|= required_extensions
.sm_on
& ~AARCH64_FL_SM_ON
;
2569 if ((sm_state_extensions
& ~aarch64_isa_flags
) == 0)
2572 auto missing_extensions
= sm_state_extensions
& ~aarch64_asm_isa_flags
;
2573 if (missing_extensions
== 0)
2575 /* All required extensions are enabled in aarch64_asm_isa_flags, so the
2576 error must be the use of general-regs-only. */
2577 aarch64_report_missing_registers (location
, fndecl
);
2581 if (missing_extensions
& AARCH64_FL_ZA_ON
)
2583 error_at (location
, "ACLE function %qD can only be called from"
2584 " a function that has %qs state", fndecl
, "za");
2588 static const struct {
2589 aarch64_feature_flags flag
;
2592 #define AARCH64_OPT_EXTENSION(EXT_NAME, IDENT, C, D, E, F) \
2593 { AARCH64_FL_##IDENT, EXT_NAME },
2594 #include "aarch64-option-extensions.def"
2597 for (unsigned int i
= 0; i
< ARRAY_SIZE (extensions
); ++i
)
2598 if (missing_extensions
& extensions
[i
].flag
)
2600 aarch64_report_missing_extension (location
, fndecl
, extensions
[i
].name
);
2606 /* Return the ISA extensions required by function CODE. */
2607 static aarch64_required_extensions
2608 aarch64_general_required_extensions (unsigned int code
)
2610 using ext
= aarch64_required_extensions
;
2613 case AARCH64_TME_BUILTIN_TSTART
:
2614 case AARCH64_TME_BUILTIN_TCOMMIT
:
2615 case AARCH64_TME_BUILTIN_TTEST
:
2616 case AARCH64_TME_BUILTIN_TCANCEL
:
2617 return ext::streaming_compatible (AARCH64_FL_TME
);
2619 case AARCH64_LS64_BUILTIN_LD64B
:
2620 case AARCH64_LS64_BUILTIN_ST64B
:
2621 case AARCH64_LS64_BUILTIN_ST64BV
:
2622 case AARCH64_LS64_BUILTIN_ST64BV0
:
2623 return ext::streaming_compatible (AARCH64_FL_LS64
);
2626 if (code
>= AARCH64_MEMTAG_BUILTIN_START
2627 && code
<= AARCH64_MEMTAG_BUILTIN_END
)
2628 return ext::streaming_compatible (AARCH64_FL_MEMTAG
);
2630 if (auto builtin_data
= aarch64_get_pragma_builtin (code
))
2631 return builtin_data
->required_extensions
;
2633 return ext::streaming_compatible (0);
2636 /* Checks calls to intrinsics that are defined using
2637 aarch64-simd-pragma-builtins.def. */
2638 struct aarch64_pragma_builtins_checker
2640 aarch64_pragma_builtins_checker (location_t
, tree
, unsigned int, tree
*,
2641 const aarch64_pragma_builtins_data
&);
2643 bool require_immediate_range (unsigned int, HOST_WIDE_INT
,
2645 bool require_immediate_lane_index (unsigned int, unsigned int, unsigned int);
2646 bool require_immediate_lane_index (unsigned int, unsigned int);
2650 location_t location
;
2653 array_slice
<tree
> args
;
2654 const aarch64_pragma_builtins_data
&builtin_data
;
2657 /* LOCATION is the location of the call; FNDECL is the FUNCTION_DECL
2658 that is being called; NARGS is the number of arguments to the call,
2659 which are in a vector starting at FIRST_ARG; and BUILTIN_DATA describes
2661 aarch64_pragma_builtins_checker::
2662 aarch64_pragma_builtins_checker (location_t location
, tree fndecl
,
2663 unsigned int nargs
, tree
*first_arg
,
2664 const aarch64_pragma_builtins_data
2666 : location (location
), fndecl (fndecl
), nargs (nargs
),
2667 args (first_arg
, nargs
), builtin_data (builtin_data
)
2671 /* Require argument ARGNO to be an integer constant expression in the
2672 range [MIN, MAX]. Return true if it was. */
2674 aarch64_pragma_builtins_checker::
2675 require_immediate_range (unsigned int argno
, HOST_WIDE_INT min
,
2678 if (!tree_fits_shwi_p (args
[argno
]))
2680 report_non_ice (location
, fndecl
, argno
);
2684 HOST_WIDE_INT actual
= tree_to_shwi (args
[argno
]);
2685 if (actual
< min
|| actual
> max
)
2687 report_out_of_range (location
, fndecl
, argno
, actual
, min
, max
);
2694 /* Require argument LANE_ARGNO to be an immediate lane index into vector
2695 argument VEC_ARGNO, given that each index selects enough data to fill
2696 one element of argument ELT_ARGNO. Return true if the argument
2699 aarch64_pragma_builtins_checker::
2700 require_immediate_lane_index (unsigned int lane_argno
, unsigned vec_argno
,
2701 unsigned int elt_argno
)
2703 auto vec_mode
= TYPE_MODE (TREE_TYPE (args
[vec_argno
]));
2704 auto elt_mode
= TYPE_MODE (TREE_TYPE (args
[elt_argno
]));
2705 auto nunits
= (aarch64_num_lanes (vec_mode
)
2706 * GET_MODE_UNIT_SIZE (vec_mode
)
2707 / GET_MODE_UNIT_SIZE (elt_mode
));
2708 return require_immediate_range (lane_argno
, 0, nunits
- 1);
2711 /* Require argument LANE_ARGNO to be an immediate lane index that selects
2712 one element of argument VEC_ARGNO. Return true if the argument
2715 aarch64_pragma_builtins_checker::
2716 require_immediate_lane_index (unsigned int lane_argno
, unsigned int vec_argno
)
2718 return require_immediate_lane_index (lane_argno
, vec_argno
, vec_argno
);
2721 /* Check the arguments to the intrinsic call and return true if they
2724 aarch64_pragma_builtins_checker::check ()
2726 auto &types
= builtin_data
.types
;
2728 switch (builtin_data
.unspec
)
2730 case UNSPEC_DUP_LANE
:
2731 case UNSPEC_GET_LANE
:
2732 case UNSPEC_LD2_LANE
:
2733 case UNSPEC_LD3_LANE
:
2734 case UNSPEC_LD4_LANE
:
2735 case UNSPEC_SET_LANE
:
2736 case UNSPEC_ST1_LANE
:
2737 case UNSPEC_ST2_LANE
:
2738 case UNSPEC_ST3_LANE
:
2739 case UNSPEC_ST4_LANE
:
2740 return require_immediate_lane_index (nargs
- 1, nargs
- 2);
2743 return require_immediate_range (2, 0, types
[2].nunits () - 1);
2745 case UNSPEC_FDOT_LANE_FP8
:
2746 return require_immediate_lane_index (nargs
- 2, nargs
- 3, 0);
2748 case UNSPEC_FMLALB_FP8
:
2749 case UNSPEC_FMLALT_FP8
:
2750 case UNSPEC_FMLALLBB_FP8
:
2751 case UNSPEC_FMLALLBT_FP8
:
2752 case UNSPEC_FMLALLTB_FP8
:
2753 case UNSPEC_FMLALLTT_FP8
:
2754 if (builtin_data
.signature
== aarch64_builtin_signatures::ternary_lane
)
2755 return require_immediate_lane_index (nargs
- 2, nargs
- 3);
2756 else if (builtin_data
.signature
== aarch64_builtin_signatures::ternary
)
2764 auto vector_to_index_nunits
= types
[nargs
- 1].nunits ();
2765 int output_mode_nunits
= types
[0].nunits ();
2768 if (builtin_data
.unspec
== UNSPEC_LUTI2
)
2769 high
= (4 * vector_to_index_nunits
/ output_mode_nunits
) - 1;
2771 high
= (2 * vector_to_index_nunits
/ output_mode_nunits
) - 1;
2773 return require_immediate_range (nargs
- 1, 0, high
);
2776 case UNSPEC_VEC_COPY
:
2777 /* & rather than && so that we report errors against both indices. */
2778 return (require_immediate_lane_index (1, 0)
2779 & require_immediate_lane_index (3, 2));
2787 aarch64_general_check_builtin_call (location_t location
, vec
<location_t
>,
2788 unsigned int code
, tree fndecl
,
2789 unsigned int nargs
, tree
*args
)
2791 tree decl
= aarch64_builtin_decls
[code
];
2792 auto required_extensions
= aarch64_general_required_extensions (code
);
2793 if (!aarch64_check_required_extensions (location
, decl
, required_extensions
))
2796 if (auto builtin_data
= aarch64_get_pragma_builtin (code
))
2798 aarch64_pragma_builtins_checker
checker (location
, fndecl
, nargs
, args
,
2800 return checker
.check ();
2809 case AARCH64_RSRF64
:
2814 case AARCH64_WSRF64
:
2816 tree addr
= STRIP_NOPS (args
[0]);
2817 if (TREE_CODE (TREE_TYPE (addr
)) != POINTER_TYPE
2818 || TREE_CODE (addr
) != ADDR_EXPR
2819 || TREE_CODE (TREE_OPERAND (addr
, 0)) != STRING_CST
)
2822 "first argument to %qD must be a string literal",
2835 SIMD_ARG_COPY_TO_REG
,
2837 SIMD_ARG_LANE_INDEX
,
2838 SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX
,
2839 SIMD_ARG_LANE_PAIR_INDEX
,
2840 SIMD_ARG_LANE_QUADTUP_INDEX
,
2846 aarch64_simd_expand_args (rtx target
, int icode
, int have_retval
,
2847 tree exp
, builtin_simd_arg
*args
,
2848 machine_mode builtin_mode
)
2851 rtx op
[SIMD_MAX_BUILTIN_ARGS
+ 1]; /* First element for result operand. */
2856 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
2858 || GET_MODE (target
) != tmode
2859 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
2860 target
= gen_reg_rtx (tmode
);
2866 builtin_simd_arg thisarg
= args
[opc
- have_retval
];
2868 if (thisarg
== SIMD_ARG_STOP
)
2872 tree arg
= CALL_EXPR_ARG (exp
, opc
- have_retval
);
2873 machine_mode mode
= insn_data
[icode
].operand
[opc
].mode
;
2874 op
[opc
] = expand_normal (arg
);
2878 case SIMD_ARG_COPY_TO_REG
:
2879 if (POINTER_TYPE_P (TREE_TYPE (arg
)))
2880 op
[opc
] = convert_memory_address (Pmode
, op
[opc
]);
2881 /*gcc_assert (GET_MODE (op[opc]) == mode); */
2882 if (!(*insn_data
[icode
].operand
[opc
].predicate
)
2884 op
[opc
] = copy_to_mode_reg (mode
, op
[opc
]);
2887 case SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX
:
2888 gcc_assert (opc
> 1);
2889 if (CONST_INT_P (op
[opc
]))
2892 = GET_MODE_NUNITS (builtin_mode
).to_constant ();
2893 aarch64_simd_lane_bounds (op
[opc
], 0, nunits
, exp
);
2894 /* Keep to GCC-vector-extension lane indices in the RTL. */
2895 op
[opc
] = aarch64_endian_lane_rtx (builtin_mode
,
2900 case SIMD_ARG_LANE_INDEX
:
2901 /* Must be a previous operand into which this is an index. */
2902 gcc_assert (opc
> 0);
2903 if (CONST_INT_P (op
[opc
]))
2905 machine_mode vmode
= insn_data
[icode
].operand
[opc
- 1].mode
;
2907 = GET_MODE_NUNITS (vmode
).to_constant ();
2908 aarch64_simd_lane_bounds (op
[opc
], 0, nunits
, exp
);
2909 /* Keep to GCC-vector-extension lane indices in the RTL. */
2910 op
[opc
] = aarch64_endian_lane_rtx (vmode
, INTVAL (op
[opc
]));
2912 /* If the lane index isn't a constant then error out. */
2915 case SIMD_ARG_LANE_PAIR_INDEX
:
2916 /* Must be a previous operand into which this is an index and
2917 index is restricted to nunits / 2. */
2918 gcc_assert (opc
> 0);
2919 if (CONST_INT_P (op
[opc
]))
2921 machine_mode vmode
= insn_data
[icode
].operand
[opc
- 1].mode
;
2923 = GET_MODE_NUNITS (vmode
).to_constant ();
2924 aarch64_simd_lane_bounds (op
[opc
], 0, nunits
/ 2, exp
);
2925 /* Keep to GCC-vector-extension lane indices in the RTL. */
2926 int lane
= INTVAL (op
[opc
]);
2927 op
[opc
] = gen_int_mode (ENDIAN_LANE_N (nunits
/ 2, lane
),
2930 /* If the lane index isn't a constant then error out. */
2932 case SIMD_ARG_LANE_QUADTUP_INDEX
:
2933 /* Must be a previous operand into which this is an index and
2934 index is restricted to nunits / 4. */
2935 gcc_assert (opc
> 0);
2936 if (CONST_INT_P (op
[opc
]))
2938 machine_mode vmode
= insn_data
[icode
].operand
[opc
- 1].mode
;
2940 = GET_MODE_NUNITS (vmode
).to_constant ();
2941 aarch64_simd_lane_bounds (op
[opc
], 0, nunits
/ 4, exp
);
2942 /* Keep to GCC-vector-extension lane indices in the RTL. */
2943 int lane
= INTVAL (op
[opc
]);
2944 op
[opc
] = gen_int_mode (ENDIAN_LANE_N (nunits
/ 4, lane
),
2947 /* If the lane index isn't a constant then error out. */
2949 case SIMD_ARG_CONSTANT
:
2951 if (!(*insn_data
[icode
].operand
[opc
].predicate
)
2954 error_at (EXPR_LOCATION (exp
),
2955 "argument %d must be a constant immediate",
2956 opc
+ 1 - have_retval
);
2972 pat
= GEN_FCN (icode
) (op
[0]);
2976 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
2980 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
2984 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
2988 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
2992 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4], op
[5]);
3007 /* Expand an AArch64 AdvSIMD builtin(intrinsic). */
3009 aarch64_simd_expand_builtin (int fcode
, tree exp
, rtx target
)
3011 if (fcode
== AARCH64_SIMD_BUILTIN_LANE_CHECK
)
3013 rtx totalsize
= expand_normal (CALL_EXPR_ARG (exp
, 0));
3014 rtx elementsize
= expand_normal (CALL_EXPR_ARG (exp
, 1));
3015 if (CONST_INT_P (totalsize
) && CONST_INT_P (elementsize
)
3016 && UINTVAL (elementsize
) != 0
3017 && UINTVAL (totalsize
) != 0)
3019 rtx lane_idx
= expand_normal (CALL_EXPR_ARG (exp
, 2));
3020 if (CONST_INT_P (lane_idx
))
3021 aarch64_simd_lane_bounds (lane_idx
, 0,
3023 / UINTVAL (elementsize
),
3026 error_at (EXPR_LOCATION (exp
),
3027 "lane index must be a constant immediate");
3030 error_at (EXPR_LOCATION (exp
),
3031 "total size and element size must be a nonzero "
3032 "constant immediate");
3033 /* Don't generate any RTL. */
3036 aarch64_simd_builtin_datum
*d
=
3037 &aarch64_simd_builtin_data
[fcode
- AARCH64_SIMD_PATTERN_START
];
3038 enum insn_code icode
= d
->code
;
3039 builtin_simd_arg args
[SIMD_MAX_BUILTIN_ARGS
+ 1];
3040 int num_args
= insn_data
[d
->code
].n_operands
;
3044 is_void
= !!(d
->qualifiers
[0] & qualifier_void
);
3046 num_args
+= is_void
;
3048 for (k
= 1; k
< num_args
; k
++)
3050 /* We have four arrays of data, each indexed in a different fashion.
3051 qualifiers - element 0 always describes the function return type.
3052 operands - element 0 is either the operand for return value (if
3053 the function has a non-void return type) or the operand for the
3055 expr_args - element 0 always holds the first argument.
3056 args - element 0 is always used for the return type. */
3057 int qualifiers_k
= k
;
3058 int operands_k
= k
- is_void
;
3059 int expr_args_k
= k
- 1;
3061 if (d
->qualifiers
[qualifiers_k
] & qualifier_lane_index
)
3062 args
[k
] = SIMD_ARG_LANE_INDEX
;
3063 else if (d
->qualifiers
[qualifiers_k
] & qualifier_lane_pair_index
)
3064 args
[k
] = SIMD_ARG_LANE_PAIR_INDEX
;
3065 else if (d
->qualifiers
[qualifiers_k
] & qualifier_lane_quadtup_index
)
3066 args
[k
] = SIMD_ARG_LANE_QUADTUP_INDEX
;
3067 else if (d
->qualifiers
[qualifiers_k
] & qualifier_struct_load_store_lane_index
)
3068 args
[k
] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX
;
3069 else if (d
->qualifiers
[qualifiers_k
] & qualifier_immediate
)
3070 args
[k
] = SIMD_ARG_CONSTANT
;
3071 else if (d
->qualifiers
[qualifiers_k
] & qualifier_maybe_immediate
)
3074 = expand_normal (CALL_EXPR_ARG (exp
,
3076 /* Handle constants only if the predicate allows it. */
3077 bool op_const_int_p
=
3079 && (*insn_data
[icode
].operand
[operands_k
].predicate
)
3080 (arg
, insn_data
[icode
].operand
[operands_k
].mode
));
3081 args
[k
] = op_const_int_p
? SIMD_ARG_CONSTANT
: SIMD_ARG_COPY_TO_REG
;
3084 args
[k
] = SIMD_ARG_COPY_TO_REG
;
3087 args
[k
] = SIMD_ARG_STOP
;
3089 /* The interface to aarch64_simd_expand_args expects a 0 if
3090 the function is void, and a 1 if it is not. */
3091 return aarch64_simd_expand_args
3092 (target
, icode
, !is_void
, exp
, &args
[1], d
->mode
);
3096 aarch64_crc32_expand_builtin (int fcode
, tree exp
, rtx target
)
3099 aarch64_crc_builtin_datum
*d
3100 = &aarch64_crc_builtin_data
[fcode
- (AARCH64_CRC32_BUILTIN_BASE
+ 1)];
3101 enum insn_code icode
= d
->icode
;
3102 tree arg0
= CALL_EXPR_ARG (exp
, 0);
3103 tree arg1
= CALL_EXPR_ARG (exp
, 1);
3104 rtx op0
= expand_normal (arg0
);
3105 rtx op1
= expand_normal (arg1
);
3106 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
3107 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
3108 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
3111 || GET_MODE (target
) != tmode
3112 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
3113 target
= gen_reg_rtx (tmode
);
3115 gcc_assert ((GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
3116 && (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
));
3118 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
3119 op0
= copy_to_mode_reg (mode0
, op0
);
3120 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
3121 op1
= copy_to_mode_reg (mode1
, op1
);
3123 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
3131 /* Function to expand reciprocal square root builtins. */
3134 aarch64_expand_builtin_rsqrt (int fcode
, tree exp
, rtx target
)
3136 tree arg0
= CALL_EXPR_ARG (exp
, 0);
3137 rtx op0
= expand_normal (arg0
);
3139 rtx (*gen
) (rtx
, rtx
);
3143 case AARCH64_BUILTIN_RSQRT_DF
:
3146 case AARCH64_BUILTIN_RSQRT_SF
:
3149 case AARCH64_BUILTIN_RSQRT_V2DF
:
3150 gen
= gen_rsqrtv2df2
;
3152 case AARCH64_BUILTIN_RSQRT_V2SF
:
3153 gen
= gen_rsqrtv2sf2
;
3155 case AARCH64_BUILTIN_RSQRT_V4SF
:
3156 gen
= gen_rsqrtv4sf2
;
3158 default: gcc_unreachable ();
3162 target
= gen_reg_rtx (GET_MODE (op0
));
3164 emit_insn (gen (target
, op0
));
3169 /* Expand a FCMLA lane expression EXP with code FCODE and
3170 result going to TARGET if that is convenient. */
3173 aarch64_expand_fcmla_builtin (tree exp
, rtx target
, int fcode
)
3175 int bcode
= fcode
- AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE
- 1;
3176 aarch64_fcmla_laneq_builtin_datum
* d
3177 = &aarch64_fcmla_lane_builtin_data
[bcode
];
3178 machine_mode quadmode
= GET_MODE_2XWIDER_MODE (d
->mode
).require ();
3179 rtx op0
= force_reg (d
->mode
, expand_normal (CALL_EXPR_ARG (exp
, 0)));
3180 rtx op1
= force_reg (d
->mode
, expand_normal (CALL_EXPR_ARG (exp
, 1)));
3181 rtx op2
= force_reg (quadmode
, expand_normal (CALL_EXPR_ARG (exp
, 2)));
3182 tree tmp
= CALL_EXPR_ARG (exp
, 3);
3183 rtx lane_idx
= expand_expr (tmp
, NULL_RTX
, VOIDmode
, EXPAND_INITIALIZER
);
3185 /* Validate that the lane index is a constant. */
3186 if (!CONST_INT_P (lane_idx
))
3188 error_at (EXPR_LOCATION (exp
),
3189 "argument %d must be a constant immediate", 4);
3193 /* Validate that the index is within the expected range. */
3194 int nunits
= GET_MODE_NUNITS (quadmode
).to_constant ();
3195 aarch64_simd_lane_bounds (lane_idx
, 0, nunits
/ 2, exp
);
3197 /* Generate the correct register and mode. */
3198 int lane
= INTVAL (lane_idx
);
3200 if (lane
< nunits
/ 4)
3201 op2
= force_lowpart_subreg (d
->mode
, op2
, quadmode
);
3204 /* Select the upper 64 bits, either a V2SF or V4HF, this however
3205 is quite messy, as the operation required even though simple
3206 doesn't have a simple RTL pattern, and seems it's quite hard to
3207 define using a single RTL pattern. The target generic version
3208 gen_highpart_mode generates code that isn't optimal. */
3209 rtx temp1
= gen_reg_rtx (d
->mode
);
3210 rtx temp2
= gen_reg_rtx (DImode
);
3211 temp1
= force_lowpart_subreg (d
->mode
, op2
, quadmode
);
3212 temp1
= force_subreg (V2DImode
, temp1
, d
->mode
, 0);
3213 if (BYTES_BIG_ENDIAN
)
3214 emit_insn (gen_aarch64_get_lanev2di (temp2
, temp1
, const0_rtx
));
3216 emit_insn (gen_aarch64_get_lanev2di (temp2
, temp1
, const1_rtx
));
3217 op2
= force_subreg (d
->mode
, temp2
, GET_MODE (temp2
), 0);
3219 /* And recalculate the index. */
3223 /* Keep to GCC-vector-extension lane indices in the RTL, only nunits / 4
3224 (max nunits in range check) are valid. Which means only 0-1, so we
3225 only need to know the order in a V2mode. */
3226 lane_idx
= aarch64_endian_lane_rtx (V2DImode
, lane
);
3230 || GET_MODE (target
) != d
->mode
)
3231 target
= gen_reg_rtx (d
->mode
);
3236 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
, lane_idx
);
3238 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
3247 /* Function to expand an expression EXP which calls one of the Transactional
3248 Memory Extension (TME) builtins FCODE with the result going to TARGET. */
3250 aarch64_expand_builtin_tme (int fcode
, tree exp
, rtx target
)
3254 case AARCH64_TME_BUILTIN_TSTART
:
3255 target
= gen_reg_rtx (DImode
);
3256 emit_insn (GEN_FCN (CODE_FOR_tstart
) (target
));
3259 case AARCH64_TME_BUILTIN_TTEST
:
3260 target
= gen_reg_rtx (DImode
);
3261 emit_insn (GEN_FCN (CODE_FOR_ttest
) (target
));
3264 case AARCH64_TME_BUILTIN_TCOMMIT
:
3265 emit_insn (GEN_FCN (CODE_FOR_tcommit
) ());
3268 case AARCH64_TME_BUILTIN_TCANCEL
:
3270 tree arg0
= CALL_EXPR_ARG (exp
, 0);
3271 rtx op0
= expand_normal (arg0
);
3272 if (CONST_INT_P (op0
) && UINTVAL (op0
) <= 65536)
3273 emit_insn (GEN_FCN (CODE_FOR_tcancel
) (op0
));
3276 error_at (EXPR_LOCATION (exp
),
3277 "argument must be a 16-bit constant immediate");
3289 /* Function to expand an expression EXP which calls one of the Load/Store
3290 64 Byte extension (LS64) builtins FCODE with the result going to TARGET. */
3292 aarch64_expand_builtin_ls64 (int fcode
, tree exp
, rtx target
)
3294 expand_operand ops
[3];
3298 case AARCH64_LS64_BUILTIN_LD64B
:
3300 rtx op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
3301 create_output_operand (&ops
[0], target
, V8DImode
);
3302 create_input_operand (&ops
[1], op0
, DImode
);
3303 expand_insn (CODE_FOR_ld64b
, 2, ops
);
3304 return ops
[0].value
;
3306 case AARCH64_LS64_BUILTIN_ST64B
:
3308 rtx op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
3309 rtx op1
= expand_normal (CALL_EXPR_ARG (exp
, 1));
3310 create_input_operand (&ops
[0], op0
, DImode
);
3311 create_input_operand (&ops
[1], op1
, V8DImode
);
3312 expand_insn (CODE_FOR_st64b
, 2, ops
);
3315 case AARCH64_LS64_BUILTIN_ST64BV
:
3317 rtx op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
3318 rtx op1
= expand_normal (CALL_EXPR_ARG (exp
, 1));
3319 create_output_operand (&ops
[0], target
, DImode
);
3320 create_input_operand (&ops
[1], op0
, DImode
);
3321 create_input_operand (&ops
[2], op1
, V8DImode
);
3322 expand_insn (CODE_FOR_st64bv
, 3, ops
);
3323 return ops
[0].value
;
3325 case AARCH64_LS64_BUILTIN_ST64BV0
:
3327 rtx op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
3328 rtx op1
= expand_normal (CALL_EXPR_ARG (exp
, 1));
3329 create_output_operand (&ops
[0], target
, DImode
);
3330 create_input_operand (&ops
[1], op0
, DImode
);
3331 create_input_operand (&ops
[2], op1
, V8DImode
);
3332 expand_insn (CODE_FOR_st64bv0
, 3, ops
);
3333 return ops
[0].value
;
3340 /* Expand a random number builtin EXP with code FCODE, putting the result
3341 int TARGET. If IGNORE is true the return value is ignored. */
3344 aarch64_expand_rng_builtin (tree exp
, rtx target
, int fcode
, int ignore
)
3347 enum insn_code icode
;
3348 if (fcode
== AARCH64_BUILTIN_RNG_RNDR
)
3349 icode
= CODE_FOR_aarch64_rndr
;
3350 else if (fcode
== AARCH64_BUILTIN_RNG_RNDRRS
)
3351 icode
= CODE_FOR_aarch64_rndrrs
;
3355 rtx rand
= gen_reg_rtx (DImode
);
3356 pat
= GEN_FCN (icode
) (rand
);
3360 tree arg0
= CALL_EXPR_ARG (exp
, 0);
3361 rtx res_addr
= expand_normal (arg0
);
3362 res_addr
= convert_memory_address (Pmode
, res_addr
);
3363 rtx res_mem
= gen_rtx_MEM (DImode
, res_addr
);
3365 emit_move_insn (res_mem
, rand
);
3366 /* If the status result is unused don't generate the CSET code. */
3370 rtx cc_reg
= gen_rtx_REG (CC_Zmode
, CC_REGNUM
);
3371 rtx cmp_rtx
= gen_rtx_fmt_ee (EQ
, SImode
, cc_reg
, const0_rtx
);
3372 emit_insn (gen_aarch64_cstoresi (target
, cmp_rtx
, cc_reg
));
3376 /* Expand the read/write system register builtin EXPs. */
3378 aarch64_expand_rwsr_builtin (tree exp
, rtx target
, int fcode
)
3381 rtx const_str
, input_val
, subreg
;
3382 enum machine_mode mode
;
3383 enum insn_code icode
;
3384 class expand_operand ops
[2];
3386 arg0
= CALL_EXPR_ARG (exp
, 0);
3388 bool write_op
= (fcode
== AARCH64_WSR
3389 || fcode
== AARCH64_WSRP
3390 || fcode
== AARCH64_WSR64
3391 || fcode
== AARCH64_WSRF
3392 || fcode
== AARCH64_WSRF64
3393 || fcode
== AARCH64_WSR128
);
3395 bool op128
= (fcode
== AARCH64_RSR128
|| fcode
== AARCH64_WSR128
);
3396 enum machine_mode sysreg_mode
= op128
? TImode
: DImode
;
3398 if (op128
&& !TARGET_D128
)
3400 error_at (EXPR_LOCATION (exp
), "128-bit system register support requires"
3401 " the %<d128%> extension");
3405 /* Argument 0 (system register name) must be a string literal. */
3406 gcc_assert (TREE_CODE (arg0
) == ADDR_EXPR
3407 && TREE_CODE (TREE_TYPE (arg0
)) == POINTER_TYPE
3408 && TREE_CODE (TREE_OPERAND (arg0
, 0)) == STRING_CST
);
3410 const char *name_input
= TREE_STRING_POINTER (TREE_OPERAND (arg0
, 0));
3412 tree len_tree
= c_strlen (arg0
, 1);
3413 if (len_tree
== NULL_TREE
)
3415 error_at (EXPR_LOCATION (exp
), "invalid system register name provided");
3419 size_t len
= TREE_INT_CST_LOW (len_tree
);
3420 char *sysreg_name
= xstrdup (name_input
);
3422 for (unsigned pos
= 0; pos
<= len
; pos
++)
3423 sysreg_name
[pos
] = TOLOWER (sysreg_name
[pos
]);
3425 const char* name_output
= aarch64_retrieve_sysreg ((const char *) sysreg_name
,
3427 if (name_output
== NULL
)
3429 error_at (EXPR_LOCATION (exp
), "invalid system register name %qs",
3434 /* Assign the string corresponding to the system register name to an RTX. */
3435 const_str
= rtx_alloc (CONST_STRING
);
3436 PUT_CODE (const_str
, CONST_STRING
);
3437 XSTR (const_str
, 0) = ggc_strdup (name_output
);
3439 /* Set up expander operands and call instruction expansion. */
3442 arg1
= CALL_EXPR_ARG (exp
, 1);
3443 mode
= TYPE_MODE (TREE_TYPE (arg1
));
3444 input_val
= copy_to_mode_reg (mode
, expand_normal (arg1
));
3446 icode
= (op128
? CODE_FOR_aarch64_write_sysregti
3447 : CODE_FOR_aarch64_write_sysregdi
);
3454 case AARCH64_WSRF64
:
3455 case AARCH64_WSR128
:
3456 subreg
= force_lowpart_subreg (sysreg_mode
, input_val
, mode
);
3459 subreg
= gen_lowpart_SUBREG (SImode
, input_val
);
3460 subreg
= gen_lowpart_SUBREG (DImode
, subreg
);
3464 create_fixed_operand (&ops
[0], const_str
);
3465 create_input_operand (&ops
[1], subreg
, sysreg_mode
);
3466 expand_insn (icode
, 2, ops
);
3471 /* Read operations are implied by !write_op. */
3472 gcc_assert (call_expr_nargs (exp
) == 1);
3474 icode
= (op128
? CODE_FOR_aarch64_read_sysregti
3475 : CODE_FOR_aarch64_read_sysregdi
);
3477 /* Emit the initial read_sysregdi rtx. */
3478 create_output_operand (&ops
[0], target
, sysreg_mode
);
3479 create_fixed_operand (&ops
[1], const_str
);
3480 expand_insn (icode
, 2, ops
);
3481 target
= ops
[0].value
;
3483 /* Do any necessary post-processing on the result. */
3489 case AARCH64_RSRF64
:
3490 case AARCH64_RSR128
:
3491 return force_lowpart_subreg (TYPE_MODE (TREE_TYPE (exp
)),
3492 target
, sysreg_mode
);
3494 subreg
= gen_lowpart_SUBREG (SImode
, target
);
3495 return gen_lowpart_SUBREG (SFmode
, subreg
);
3501 /* Ensure argument ARGNO in EXP represents a const-type argument in the range
3502 [MINVAL, MAXVAL). */
3503 static HOST_WIDE_INT
3504 require_const_argument (tree exp
, unsigned int argno
, HOST_WIDE_INT minval
,
3505 HOST_WIDE_INT maxval
)
3508 tree arg
= CALL_EXPR_ARG (exp
, argno
);
3509 if (TREE_CODE (arg
) != INTEGER_CST
)
3510 error_at (EXPR_LOCATION (exp
), "Constant-type argument expected");
3512 auto argval
= wi::to_widest (arg
);
3514 if (argval
< minval
|| argval
> maxval
)
3515 error_at (EXPR_LOCATION (exp
),
3516 "argument %d must be a constant immediate "
3517 "in range [%wd,%wd]", argno
+ 1, minval
, maxval
);
3519 HOST_WIDE_INT retval
= argval
.to_shwi ();
3524 /* Expand a prefetch builtin EXP. */
3526 aarch64_expand_prefetch_builtin (tree exp
, int fcode
)
3532 class expand_operand ops
[2];
3534 static const char *kind_s
[] = {"PLD", "PST", "PLI"};
3535 static const char *level_s
[] = {"L1", "L2", "L3", "SLC"};
3536 static const char *rettn_s
[] = {"KEEP", "STRM"};
3538 /* Each of the four prefetch builtins takes a different number of arguments,
3539 but proceeds to call the PRFM insn which requires 4 pieces of information
3540 to be fully defined. Where one of these takes less than 4 arguments, set
3541 sensible defaults. */
3551 kind_id
= (fcode
== AARCH64_PLD
) ? 0 : 2;
3559 /* Any -1 id variable is to be user-supplied. Here we fill these in and run
3560 bounds checks on them. "PLI" is used only implicitly by AARCH64_PLI &
3561 AARCH64_PLIX, never explicitly. */
3564 kind_id
= require_const_argument (exp
, argno
++, 0, ARRAY_SIZE (kind_s
) - 1);
3566 level_id
= require_const_argument (exp
, argno
++, 0, ARRAY_SIZE (level_s
));
3568 rettn_id
= require_const_argument (exp
, argno
++, 0, ARRAY_SIZE (rettn_s
));
3569 rtx address
= expand_expr (CALL_EXPR_ARG (exp
, argno
), NULL_RTX
, Pmode
,
3575 sprintf (prfop
, "%s%s%s", kind_s
[kind_id
],
3579 rtx const_str
= rtx_alloc (CONST_STRING
);
3580 PUT_CODE (const_str
, CONST_STRING
);
3581 XSTR (const_str
, 0) = ggc_strdup (prfop
);
3583 create_fixed_operand (&ops
[0], const_str
);
3584 create_address_operand (&ops
[1], address
);
3585 maybe_expand_insn (CODE_FOR_aarch64_pldx
, 2, ops
);
3588 /* Expand an expression EXP that calls a MEMTAG built-in FCODE
3589 with result going to TARGET. */
3591 aarch64_expand_builtin_memtag (int fcode
, tree exp
, rtx target
)
3595 error ("Memory Tagging Extension does not support %<-mabi=ilp32%>");
3600 enum insn_code icode
= aarch64_memtag_builtin_data
[fcode
-
3601 AARCH64_MEMTAG_BUILTIN_START
- 1].icode
;
3603 rtx op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
3604 machine_mode mode0
= GET_MODE (op0
);
3605 op0
= force_reg (mode0
== VOIDmode
? DImode
: mode0
, op0
);
3606 op0
= convert_to_mode (DImode
, op0
, true);
3610 case AARCH64_MEMTAG_BUILTIN_IRG
:
3611 case AARCH64_MEMTAG_BUILTIN_GMI
:
3612 case AARCH64_MEMTAG_BUILTIN_SUBP
:
3613 case AARCH64_MEMTAG_BUILTIN_INC_TAG
:
3616 || GET_MODE (target
) != DImode
3617 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, DImode
))
3618 target
= gen_reg_rtx (DImode
);
3620 if (fcode
== AARCH64_MEMTAG_BUILTIN_INC_TAG
)
3622 rtx op1
= expand_normal (CALL_EXPR_ARG (exp
, 1));
3624 if ((*insn_data
[icode
].operand
[3].predicate
) (op1
, QImode
))
3626 pat
= GEN_FCN (icode
) (target
, op0
, const0_rtx
, op1
);
3629 error_at (EXPR_LOCATION (exp
),
3630 "argument %d must be a constant immediate "
3631 "in range [0,15]", 2);
3636 rtx op1
= expand_normal (CALL_EXPR_ARG (exp
, 1));
3637 machine_mode mode1
= GET_MODE (op1
);
3638 op1
= force_reg (mode1
== VOIDmode
? DImode
: mode1
, op1
);
3639 op1
= convert_to_mode (DImode
, op1
, true);
3640 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
3644 case AARCH64_MEMTAG_BUILTIN_GET_TAG
:
3646 pat
= GEN_FCN (icode
) (target
, op0
, const0_rtx
);
3648 case AARCH64_MEMTAG_BUILTIN_SET_TAG
:
3649 pat
= GEN_FCN (icode
) (op0
, op0
, const0_rtx
);
3662 /* Function to expand an expression EXP which calls one of the ACLE Data
3663 Intrinsic builtins FCODE with the result going to TARGET. */
3665 aarch64_expand_builtin_data_intrinsic (unsigned int fcode
, tree exp
, rtx target
)
3667 expand_operand ops
[2];
3668 machine_mode mode
= TYPE_MODE (TREE_TYPE (exp
));
3669 create_output_operand (&ops
[0], target
, mode
);
3670 create_input_operand (&ops
[1], expand_normal (CALL_EXPR_ARG (exp
, 0)), mode
);
3671 enum insn_code icode
;
3676 case AARCH64_REV16L
:
3677 case AARCH64_REV16LL
:
3678 icode
= code_for_aarch64_rev16 (mode
);
3682 case AARCH64_RBITLL
:
3683 icode
= code_for_aarch64_rbit (mode
);
3689 expand_insn (icode
, 2, ops
);
3690 return ops
[0].value
;
3693 /* Convert ptr_mode value OP to a Pmode value (for ILP32). */
3695 aarch64_convert_address (expand_operand
*op
)
3697 op
->value
= convert_memory_address (Pmode
, op
->value
);
3701 /* Dereference the pointer in OP, turning it into a memory reference to
3702 NELTS instances of MEM_MODE. */
3704 aarch64_dereference_pointer (expand_operand
*op
, machine_mode mem_mode
,
3705 unsigned int nelts
= 1)
3709 op
->value
= gen_rtx_MEM (mem_mode
, op
->value
);
3710 op
->mode
= mem_mode
;
3714 op
->value
= gen_rtx_MEM (BLKmode
, op
->value
);
3716 set_mem_size (op
->value
, GET_MODE_SIZE (mem_mode
) * nelts
);
3720 /* OP contains an integer index into a vector or tuple of mode VEC_MODE.
3721 Convert OP from an architectural lane number to a GCC lane number. */
3723 aarch64_canonicalize_lane (expand_operand
*op
, machine_mode vec_mode
)
3725 auto nunits
= aarch64_num_lanes (vec_mode
);
3726 op
->value
= gen_int_mode (ENDIAN_LANE_N (nunits
, UINTVAL (op
->value
)),
3730 /* OP contains an integer index into a vector or tuple of mode VEC_MODE.
3731 Convert OP from an architectural lane number to a vec_merge mask. */
3733 aarch64_convert_to_lane_mask (expand_operand
*op
, machine_mode vec_mode
)
3735 auto nunits
= aarch64_num_lanes (vec_mode
);
3736 create_integer_operand (op
, 1 << ENDIAN_LANE_N (nunits
, INTVAL (op
->value
)));
3739 /* If OP is a 128-bit vector, convert it to the equivalent 64-bit vector.
3740 Do nothing otherwise. */
3742 aarch64_convert_to_v64 (expand_operand
*op
)
3744 if (known_eq (GET_MODE_BITSIZE (op
->mode
), 128u))
3746 op
->mode
= aarch64_v64_mode (GET_MODE_INNER (op
->mode
)).require ();
3747 op
->value
= gen_lowpart (op
->mode
, op
->value
);
3751 /* If OP is a 64-bit (half-register) vector or a structure of 64-bit vectors,
3752 pack its contents into the smallest associated full-register mode,
3753 padding with zeros if necessary. Return true if padding was used. */
3755 aarch64_pack_into_v128s (expand_operand
*op
)
3757 bool padded
= false;
3758 unsigned int nregs
= targetm
.hard_regno_nregs (V0_REGNUM
, op
->mode
);
3760 /* Do nothing if the operand is already a full-register mode. */
3761 if (known_eq (nregs
* UNITS_PER_VREG
, GET_MODE_SIZE (op
->mode
)))
3764 auto elt_mode
= GET_MODE_INNER (op
->mode
);
3765 auto v64_mode
= aarch64_v64_mode (elt_mode
).require ();
3766 auto v128_mode
= aarch64_v128_mode (elt_mode
).require ();
3768 auto new_mode
= v128_mode
;
3770 new_mode
= aarch64_advsimd_vector_array_mode (v128_mode
, CEIL (nregs
, 2))
3773 /* Get enough V64_MODE inputs to fill NEW_MDOE, which is made up of a
3774 whole number of V128_MODEs. */
3775 auto_vec
<rtx
, 4> inputs
;
3776 for (unsigned int i
= 0; i
< nregs
; ++i
)
3778 rtx input
= simplify_gen_subreg (v64_mode
, op
->value
, op
->mode
,
3779 i
* GET_MODE_SIZE (v64_mode
));
3780 inputs
.quick_push (input
);
3784 inputs
.quick_push (CONST0_RTX (v64_mode
));
3788 /* Create a NEW_MODE register and build it up from individual V128_MODEs. */
3789 op
->mode
= new_mode
;
3790 op
->value
= gen_reg_rtx (new_mode
);
3791 for (unsigned int i
= 0; i
< inputs
.length (); i
+= 2)
3793 rtx result
= gen_rtx_SUBREG (v128_mode
, op
->value
,
3794 i
* GET_MODE_SIZE (v64_mode
));
3795 emit_insn (gen_aarch64_combine (v64_mode
, result
,
3796 inputs
[i
], inputs
[i
+ 1]));
3801 /* UNSPEC is a high unspec, indicated by "2" in mnemonics and "_high" in
3802 intrinsic names. Return the equivalent low unspec. */
3804 aarch64_get_low_unspec (int unspec
)
3808 case UNSPEC_FCVTN2_FP8
:
3809 return UNSPEC_FCVTN_FP8
;
3810 case UNSPEC_F1CVTL2_FP8
:
3811 return UNSPEC_F1CVTL_FP8
;
3812 case UNSPEC_F2CVTL2_FP8
:
3813 return UNSPEC_F2CVTL_FP8
;
3819 /* OPS contains the operands for one of the permute pair functions vtrn,
3820 vuzp or vzip. Expand the call, given that PERMUTE1 is the unspec for
3821 the first permute and PERMUTE2 is the unspec for the second permute. */
3823 aarch64_expand_permute_pair (vec
<expand_operand
> &ops
, int permute1
,
3826 rtx op0
= force_reg (ops
[1].mode
, ops
[1].value
);
3827 rtx op1
= force_reg (ops
[2].mode
, ops
[2].value
);
3828 rtx target
= gen_reg_rtx (ops
[0].mode
);
3829 rtx target0
= gen_rtx_SUBREG (ops
[1].mode
, target
, 0);
3830 rtx target1
= gen_rtx_SUBREG (ops
[1].mode
, target
,
3831 GET_MODE_SIZE (ops
[1].mode
));
3832 emit_insn (gen_aarch64 (permute1
, ops
[1].mode
, target0
, op0
, op1
));
3833 emit_insn (gen_aarch64 (permute2
, ops
[1].mode
, target1
, op0
, op1
));
3837 /* Emit a TBL or TBX instruction with inputs INPUTS and a result of mode
3838 MODE. Return the result of the instruction.
3840 UNSPEC is either UNSPEC_TBL or UNSPEC_TBX. The inputs must already be in
3843 aarch64_expand_tbl_tbx (vec
<rtx
> &inputs
, int unspec
, machine_mode mode
)
3845 rtx result
= gen_reg_rtx (mode
);
3846 rtvec vec
= gen_rtvec_v (inputs
.length (), inputs
.address ());
3847 emit_insn (gen_rtx_SET (result
, gen_rtx_UNSPEC (mode
, vec
, unspec
)));
3851 /* Emit a TBL or TBX intrinsic with the operands given by OPS. Return the
3852 result of the intrinsic.
3854 UNSPEC is either UNSPEC_TBL or UNSPEC_TBX. */
3856 aarch64_expand_tbl_tbx (vec
<expand_operand
> &ops
, int unspec
)
3858 for (unsigned int i
= 1; i
< ops
.length (); ++i
)
3859 ops
[i
].value
= force_reg (ops
[i
].mode
, ops
[i
].value
);
3861 /* Handle the legacy forms for which the table is composed of 64-bit
3862 rather than 128-bit vectors. */
3863 auto &table
= ops
[ops
.length () - 2];
3864 auto table_nelts
= GET_MODE_NUNITS (table
.mode
);
3865 bool padded
= aarch64_pack_into_v128s (&table
);
3867 /* Packing to 128-bit vectors is enough for everything except the 64-bit
3868 forms of vtbx1 and vtbx3, where we need to handle the zero padding. */
3869 if (unspec
== UNSPEC_TBL
|| !padded
)
3871 auto_vec
<rtx
, 3> inputs
;
3872 for (unsigned int i
= 1; i
< ops
.length (); ++i
)
3873 inputs
.quick_push (ops
[i
].value
);
3874 return aarch64_expand_tbl_tbx (inputs
, unspec
, ops
[0].mode
);
3877 /* Generate a TBL, which will give the right results for indices that
3878 are less than TABLE_NELTS. */
3879 auto_vec
<rtx
, 2> inputs
;
3880 for (unsigned int i
= 2; i
< ops
.length (); ++i
)
3881 inputs
.quick_push (ops
[i
].value
);
3882 rtx tbl_result
= aarch64_expand_tbl_tbx (inputs
, UNSPEC_TBL
, ops
[0].mode
);
3884 /* Get a mask of the indices that are less than TABLE_NELTS. */
3885 auto &indices
= ops
.last ();
3886 rtx cmp_result
= gen_reg_rtx (indices
.mode
);
3887 rtx bound_rtx
= gen_int_mode (table_nelts
, GET_MODE_INNER (indices
.mode
));
3888 rtx bound_vec_rtx
= gen_const_vec_duplicate (indices
.mode
, bound_rtx
);
3889 emit_insn (gen_aarch64_cm (GTU
, indices
.mode
, cmp_result
,
3890 force_reg (indices
.mode
, bound_vec_rtx
),
3893 /* Select from the TBL result if the index is less than TABLE_NELTS
3894 and from OPS[1] otherwise. */
3895 rtx result
= gen_reg_rtx (ops
[0].mode
);
3896 auto icode
= get_vcond_mask_icode (ops
[0].mode
, indices
.mode
);
3897 emit_insn (GEN_FCN (icode
) (result
, tbl_result
, ops
[1].value
, cmp_result
));
3901 /* Expand CALL_EXPR EXP, given that it is a call to the function described
3902 by BUILTIN_DATA, and return the function's return value. Put the result
3903 in TARGET if convenient. */
3905 aarch64_expand_pragma_builtin (tree exp
, rtx target
,
3906 const aarch64_pragma_builtins_data
&builtin_data
)
3908 unsigned int nargs
= call_expr_nargs (exp
);
3909 bool returns_void
= VOID_TYPE_P (TREE_TYPE (exp
));
3911 auto_vec
<expand_operand
, 8> ops
;
3913 create_output_operand (ops
.safe_push ({}), target
,
3914 TYPE_MODE (TREE_TYPE (exp
)));
3915 for (unsigned int i
= 0; i
< nargs
; ++i
)
3917 tree arg
= CALL_EXPR_ARG (exp
, i
);
3918 create_input_operand (ops
.safe_push ({}), expand_normal (arg
),
3919 TYPE_MODE (TREE_TYPE (arg
)));
3920 if (POINTER_TYPE_P (TREE_TYPE (arg
)))
3921 aarch64_convert_address (&ops
.last ());
3924 if (builtin_data
.flags
& FLAG_USES_FPMR
)
3926 auto fpm_input
= ops
.pop ().value
;
3927 auto fpmr
= gen_rtx_REG (DImode
, FPM_REGNUM
);
3928 emit_move_insn (fpmr
, fpm_input
);
3931 switch (builtin_data
.unspec
)
3933 case UNSPEC_F1CVTL_FP8
:
3934 case UNSPEC_F2CVTL_FP8
:
3935 /* Convert _low forms (which take 128-bit vectors) to the base
3937 aarch64_convert_to_v64 (&ops
[1]);
3941 /* LUTI2 treats the first argument as a vector of 4 elements. The forms
3942 with 128-bit inputs are only provided as a convenience; the upper
3943 halves don't actually matter. */
3944 aarch64_convert_to_v64 (&ops
[1]);
3949 switch (builtin_data
.unspec
)
3952 icode
= code_for_aarch64_simd_bsl (ops
[0].mode
);
3955 case UNSPEC_COMBINE
:
3956 icode
= code_for_aarch64_combine (ops
[1].mode
);
3960 if (builtin_data
.signature
== aarch64_builtin_signatures::load
)
3961 aarch64_dereference_pointer (&ops
[1], GET_MODE_INNER (ops
[0].mode
));
3962 return expand_vector_broadcast (ops
[0].mode
, ops
[1].value
);
3964 case UNSPEC_DUP_LANE
:
3965 aarch64_canonicalize_lane (&ops
[2], ops
[1].mode
);
3966 if (ops
[0].mode
== ops
[1].mode
)
3967 icode
= code_for_aarch64_dup_lane (ops
[0].mode
);
3969 icode
= code_for_aarch64_dup_lane (ops
[0].mode
, ops
[0].mode
);
3973 icode
= code_for_aarch64_ext (ops
[0].mode
);
3978 case UNSPEC_F1CVTL_FP8
:
3979 case UNSPEC_F2CVTL_FP8
:
3980 case UNSPEC_FDOT_FP8
:
3988 icode
= code_for_aarch64 (builtin_data
.unspec
, ops
[0].mode
);
3991 case UNSPEC_F1CVTL2_FP8
:
3992 case UNSPEC_F2CVTL2_FP8
:
3994 /* Add a high-part selector for the vec_merge. */
3995 auto src_mode
= ops
.last ().mode
;
3996 auto nunits
= GET_MODE_NUNITS (src_mode
).to_constant ();
3997 rtx par
= aarch64_simd_vect_par_cnst_half (src_mode
, nunits
, true);
3998 create_fixed_operand (ops
.safe_push ({}), par
);
4000 auto unspec
= aarch64_get_low_unspec (builtin_data
.unspec
);
4001 icode
= code_for_aarch64_high (unspec
, ops
[0].mode
);
4005 case UNSPEC_FCVTN_FP8
:
4006 icode
= code_for_aarch64 (builtin_data
.unspec
, ops
[1].mode
);
4009 case UNSPEC_FCVTN2_FP8
:
4011 auto unspec
= aarch64_get_low_unspec (builtin_data
.unspec
);
4012 auto mode
= ops
.last ().mode
;
4013 if (BYTES_BIG_ENDIAN
)
4014 icode
= code_for_aarch64_high_be (unspec
, mode
);
4016 icode
= code_for_aarch64_high_le (unspec
, mode
);
4020 case UNSPEC_FDOT_LANE_FP8
:
4021 /* This pattern does not canonicalize the lane number. */
4022 icode
= code_for_aarch64_lane (builtin_data
.unspec
,
4023 ops
[0].mode
, ops
[3].mode
);
4026 case UNSPEC_FMLALB_FP8
:
4027 case UNSPEC_FMLALT_FP8
:
4028 case UNSPEC_FMLALLBB_FP8
:
4029 case UNSPEC_FMLALLBT_FP8
:
4030 case UNSPEC_FMLALLTB_FP8
:
4031 case UNSPEC_FMLALLTT_FP8
:
4032 if (builtin_data
.signature
== aarch64_builtin_signatures::ternary_lane
)
4034 aarch64_canonicalize_lane (&ops
[4], ops
[3].mode
);
4035 icode
= code_for_aarch64_lane (builtin_data
.unspec
,
4036 ops
[0].mode
, ops
[3].mode
);
4038 else if (builtin_data
.signature
== aarch64_builtin_signatures::ternary
)
4039 icode
= code_for_aarch64 (builtin_data
.unspec
, ops
[0].mode
);
4044 case UNSPEC_GET_LANE
:
4045 aarch64_canonicalize_lane (&ops
[2], ops
[1].mode
);
4046 icode
= code_for_aarch64_get_lane (ops
[1].mode
);
4050 icode
= code_for_aarch64_ld1 (ops
[0].mode
);
4054 icode
= code_for_aarch64_ld1x2 (ops
[0].mode
);
4058 icode
= code_for_aarch64_ld1x3 (ops
[0].mode
);
4062 icode
= code_for_aarch64_ld1x4 (ops
[0].mode
);
4068 icode
= code_for_aarch64_ld (ops
[0].mode
, ops
[0].mode
);
4071 case UNSPEC_LD2_DUP
:
4072 aarch64_dereference_pointer (&ops
[1], GET_MODE_INNER (ops
[0].mode
), 2);
4073 icode
= code_for_aarch64_simd_ld2r (ops
[0].mode
);
4076 case UNSPEC_LD3_DUP
:
4077 aarch64_dereference_pointer (&ops
[1], GET_MODE_INNER (ops
[0].mode
), 3);
4078 icode
= code_for_aarch64_simd_ld3r (ops
[0].mode
);
4081 case UNSPEC_LD4_DUP
:
4082 aarch64_dereference_pointer (&ops
[1], GET_MODE_INNER (ops
[0].mode
), 4);
4083 icode
= code_for_aarch64_simd_ld4r (ops
[0].mode
);
4086 case UNSPEC_LD2_LANE
:
4087 case UNSPEC_LD3_LANE
:
4088 case UNSPEC_LD4_LANE
:
4089 aarch64_canonicalize_lane (&ops
[3], ops
[2].mode
);
4090 icode
= code_for_aarch64_ld_lane (ops
[0].mode
, ops
[0].mode
);
4095 create_integer_operand (ops
.safe_push ({}),
4096 builtin_data
.unspec
== UNSPEC_LUTI2
? 2 : 4);
4097 icode
= code_for_aarch64_lut (ops
[1].mode
, ops
[2].mode
);
4103 icode
= code_for_aarch64_rev (builtin_data
.unspec
, ops
[0].mode
);
4106 case UNSPEC_SET_LANE
:
4107 if (builtin_data
.signature
== aarch64_builtin_signatures::load_lane
)
4108 aarch64_dereference_pointer (&ops
[1], GET_MODE_INNER (ops
[0].mode
));
4109 /* The vec_set operand order is: dest, scalar, mask, vector. */
4110 std::swap (ops
[2], ops
[3]);
4111 aarch64_convert_to_lane_mask (&ops
[2], ops
[3].mode
);
4112 icode
= code_for_aarch64_simd_vec_set (ops
[0].mode
);
4116 icode
= code_for_aarch64_st1 (ops
[1].mode
);
4119 case UNSPEC_ST1_LANE
:
4120 aarch64_dereference_pointer (&ops
[0], GET_MODE_INNER (ops
[1].mode
));
4121 /* Reinterpret ops[0] as an output. */
4122 create_fixed_operand (&ops
[0], ops
[0].value
);
4123 aarch64_canonicalize_lane (&ops
[2], ops
[1].mode
);
4124 icode
= code_for_aarch64_get_lane (ops
[1].mode
);
4128 icode
= code_for_aarch64_st1x2 (ops
[1].mode
);
4132 icode
= code_for_aarch64_st1x3 (ops
[1].mode
);
4136 icode
= code_for_aarch64_st1x4 (ops
[1].mode
);
4142 icode
= code_for_aarch64_st (ops
[1].mode
, ops
[1].mode
);
4145 case UNSPEC_ST2_LANE
:
4146 case UNSPEC_ST3_LANE
:
4147 case UNSPEC_ST4_LANE
:
4148 aarch64_canonicalize_lane (&ops
[2], ops
[1].mode
);
4149 icode
= code_for_aarch64_st_lane (ops
[1].mode
, ops
[1].mode
);
4154 return aarch64_expand_tbl_tbx (ops
, builtin_data
.unspec
);
4157 return aarch64_expand_permute_pair (ops
, UNSPEC_TRN1
, UNSPEC_TRN2
);
4160 return aarch64_expand_permute_pair (ops
, UNSPEC_UZP1
, UNSPEC_UZP2
);
4162 case UNSPEC_VCREATE
:
4163 return force_lowpart_subreg (ops
[0].mode
, ops
[1].value
, ops
[1].mode
);
4165 case UNSPEC_VEC_COPY
:
4167 aarch64_convert_to_lane_mask (&ops
[2], ops
[1].mode
);
4168 aarch64_canonicalize_lane (&ops
[4], ops
[3].mode
);
4169 if (ops
[1].mode
== ops
[3].mode
)
4170 icode
= code_for_aarch64_simd_vec_copy_lane (ops
[1].mode
);
4172 icode
= code_for_aarch64_simd_vec_copy_lane (ops
[1].mode
,
4178 return aarch64_expand_permute_pair (ops
, UNSPEC_ZIP1
, UNSPEC_ZIP2
);
4184 expand_insn (icode
, ops
.length (), ops
.address ());
4185 return ops
[0].value
;
4188 /* Expand an expression EXP as fpsr or fpcr setter (depending on
4189 UNSPEC) using MODE. */
4191 aarch64_expand_fpsr_fpcr_setter (int unspec
, machine_mode mode
, tree exp
)
4193 tree arg
= CALL_EXPR_ARG (exp
, 0);
4194 rtx op
= force_reg (mode
, expand_normal (arg
));
4195 emit_insn (gen_aarch64_set (unspec
, mode
, op
));
4198 /* Expand a fpsr or fpcr getter (depending on UNSPEC) using MODE.
4199 Return the target. */
4201 aarch64_expand_fpsr_fpcr_getter (enum insn_code icode
, machine_mode mode
,
4205 create_output_operand (&op
, target
, mode
);
4206 expand_insn (icode
, 1, &op
);
4210 /* Expand GCS builtin EXP with code FCODE, putting the result
4211 into TARGET. If IGNORE is true the return value is ignored. */
4214 aarch64_expand_gcs_builtin (tree exp
, rtx target
, int fcode
, int ignore
)
4216 if (fcode
== AARCH64_BUILTIN_GCSPR
)
4219 create_output_operand (&op
, target
, DImode
);
4220 expand_insn (CODE_FOR_aarch64_load_gcspr
, 1, &op
);
4223 if (fcode
== AARCH64_BUILTIN_GCSPOPM
&& ignore
)
4225 expand_insn (CODE_FOR_aarch64_gcspopm_xzr
, 0, 0);
4228 if (fcode
== AARCH64_BUILTIN_GCSPOPM
)
4230 expand_operand ops
[2];
4231 create_output_operand (&ops
[0], target
, DImode
);
4232 create_input_operand (&ops
[1], const0_rtx
, DImode
);
4233 expand_insn (CODE_FOR_aarch64_gcspopm
, 2, ops
);
4234 return gen_lowpart (ptr_mode
, ops
[0].value
);
4236 if (fcode
== AARCH64_BUILTIN_GCSSS
)
4238 expand_operand opnd
;
4239 rtx arg
= expand_normal (CALL_EXPR_ARG (exp
, 0));
4240 arg
= convert_modes (DImode
, ptr_mode
, arg
, true);
4241 create_input_operand (&opnd
, arg
, DImode
);
4242 expand_insn (CODE_FOR_aarch64_gcsss1
, 1, &opnd
);
4243 expand_operand ops
[2];
4244 create_output_operand (&ops
[0], target
, DImode
);
4245 create_input_operand (&ops
[1], const0_rtx
, DImode
);
4246 expand_insn (CODE_FOR_aarch64_gcsss2
, 2, ops
);
4247 return gen_lowpart (ptr_mode
, ops
[0].value
);
4252 /* Expand an expression EXP that calls built-in function FCODE,
4253 with result going to TARGET if that's convenient. IGNORE is true
4254 if the result of the builtin is ignored. */
4256 aarch64_general_expand_builtin (unsigned int fcode
, tree exp
, rtx target
,
4265 case AARCH64_BUILTIN_GET_FPCR
:
4266 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrsi
,
4268 case AARCH64_BUILTIN_SET_FPCR
:
4269 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR
, SImode
, exp
);
4271 case AARCH64_BUILTIN_GET_FPSR
:
4272 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrsi
,
4274 case AARCH64_BUILTIN_SET_FPSR
:
4275 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR
, SImode
, exp
);
4277 case AARCH64_BUILTIN_GET_FPCR64
:
4278 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrdi
,
4280 case AARCH64_BUILTIN_SET_FPCR64
:
4281 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR
, DImode
, exp
);
4283 case AARCH64_BUILTIN_GET_FPSR64
:
4284 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrdi
,
4286 case AARCH64_BUILTIN_SET_FPSR64
:
4287 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR
, DImode
, exp
);
4289 case AARCH64_PAUTH_BUILTIN_AUTIA1716
:
4290 case AARCH64_PAUTH_BUILTIN_PACIA1716
:
4291 case AARCH64_PAUTH_BUILTIN_AUTIB1716
:
4292 case AARCH64_PAUTH_BUILTIN_PACIB1716
:
4293 case AARCH64_PAUTH_BUILTIN_XPACLRI
:
4294 arg0
= CALL_EXPR_ARG (exp
, 0);
4295 op0
= force_reg (Pmode
, expand_normal (arg0
));
4297 if (fcode
== AARCH64_PAUTH_BUILTIN_XPACLRI
)
4299 rtx lr
= gen_rtx_REG (Pmode
, R30_REGNUM
);
4300 icode
= CODE_FOR_xpaclri
;
4301 emit_move_insn (lr
, op0
);
4302 emit_insn (GEN_FCN (icode
) ());
4307 tree arg1
= CALL_EXPR_ARG (exp
, 1);
4308 rtx op1
= force_reg (Pmode
, expand_normal (arg1
));
4311 case AARCH64_PAUTH_BUILTIN_AUTIA1716
:
4312 icode
= CODE_FOR_autia1716
;
4314 case AARCH64_PAUTH_BUILTIN_AUTIB1716
:
4315 icode
= CODE_FOR_autib1716
;
4317 case AARCH64_PAUTH_BUILTIN_PACIA1716
:
4318 icode
= CODE_FOR_pacia1716
;
4320 case AARCH64_PAUTH_BUILTIN_PACIB1716
:
4321 icode
= CODE_FOR_pacib1716
;
4328 rtx x16_reg
= gen_rtx_REG (Pmode
, R16_REGNUM
);
4329 rtx x17_reg
= gen_rtx_REG (Pmode
, R17_REGNUM
);
4330 emit_move_insn (x17_reg
, op0
);
4331 emit_move_insn (x16_reg
, op1
);
4332 emit_insn (GEN_FCN (icode
) ());
4338 expand_operand ops
[2];
4339 create_output_operand (&ops
[0], target
, SImode
);
4340 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
4341 create_input_operand (&ops
[1], op0
, DFmode
);
4342 expand_insn (CODE_FOR_aarch64_fjcvtzs
, 2, ops
);
4343 return ops
[0].value
;
4346 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF
:
4347 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF
:
4348 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF
:
4349 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V2SF
:
4350 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF
:
4351 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF
:
4352 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF
:
4353 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF
:
4354 return aarch64_expand_fcmla_builtin (exp
, target
, fcode
);
4355 case AARCH64_BUILTIN_RNG_RNDR
:
4356 case AARCH64_BUILTIN_RNG_RNDRRS
:
4357 return aarch64_expand_rng_builtin (exp
, target
, fcode
, ignore
);
4362 case AARCH64_RSRF64
:
4363 case AARCH64_RSR128
:
4368 case AARCH64_WSRF64
:
4369 case AARCH64_WSR128
:
4370 return aarch64_expand_rwsr_builtin (exp
, target
, fcode
);
4375 aarch64_expand_prefetch_builtin (exp
, fcode
);
4378 case AARCH64_BUILTIN_CHKFEAT
:
4380 rtx x16_reg
= gen_rtx_REG (DImode
, R16_REGNUM
);
4381 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
4382 emit_move_insn (x16_reg
, op0
);
4383 expand_insn (CODE_FOR_aarch64_chkfeat
, 0, 0);
4384 return copy_to_reg (x16_reg
);
4387 case AARCH64_BUILTIN_GCSPR
:
4388 case AARCH64_BUILTIN_GCSPOPM
:
4389 case AARCH64_BUILTIN_GCSSS
:
4390 return aarch64_expand_gcs_builtin (exp
, target
, fcode
, ignore
);
4393 if (fcode
>= AARCH64_SIMD_BUILTIN_BASE
&& fcode
<= AARCH64_SIMD_BUILTIN_MAX
)
4394 return aarch64_simd_expand_builtin (fcode
, exp
, target
);
4395 else if (fcode
>= AARCH64_CRC32_BUILTIN_BASE
&& fcode
<= AARCH64_CRC32_BUILTIN_MAX
)
4396 return aarch64_crc32_expand_builtin (fcode
, exp
, target
);
4398 if (fcode
== AARCH64_BUILTIN_RSQRT_DF
4399 || fcode
== AARCH64_BUILTIN_RSQRT_SF
4400 || fcode
== AARCH64_BUILTIN_RSQRT_V2DF
4401 || fcode
== AARCH64_BUILTIN_RSQRT_V2SF
4402 || fcode
== AARCH64_BUILTIN_RSQRT_V4SF
)
4403 return aarch64_expand_builtin_rsqrt (fcode
, exp
, target
);
4405 if (fcode
== AARCH64_TME_BUILTIN_TSTART
4406 || fcode
== AARCH64_TME_BUILTIN_TCOMMIT
4407 || fcode
== AARCH64_TME_BUILTIN_TTEST
4408 || fcode
== AARCH64_TME_BUILTIN_TCANCEL
)
4409 return aarch64_expand_builtin_tme (fcode
, exp
, target
);
4411 if (fcode
== AARCH64_LS64_BUILTIN_LD64B
4412 || fcode
== AARCH64_LS64_BUILTIN_ST64B
4413 || fcode
== AARCH64_LS64_BUILTIN_ST64BV
4414 || fcode
== AARCH64_LS64_BUILTIN_ST64BV0
)
4415 return aarch64_expand_builtin_ls64 (fcode
, exp
, target
);
4417 if (fcode
>= AARCH64_MEMTAG_BUILTIN_START
4418 && fcode
<= AARCH64_MEMTAG_BUILTIN_END
)
4419 return aarch64_expand_builtin_memtag (fcode
, exp
, target
);
4420 if (fcode
>= AARCH64_REV16
4421 && fcode
<= AARCH64_RBITLL
)
4422 return aarch64_expand_builtin_data_intrinsic (fcode
, exp
, target
);
4424 if (auto builtin_data
= aarch64_get_pragma_builtin (fcode
))
4425 return aarch64_expand_pragma_builtin (exp
, target
, *builtin_data
);
4430 /* Return builtin for reciprocal square root. */
4433 aarch64_general_builtin_rsqrt (unsigned int fn
)
4435 if (fn
== AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df
)
4436 return aarch64_builtin_decls
[AARCH64_BUILTIN_RSQRT_V2DF
];
4437 if (fn
== AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf
)
4438 return aarch64_builtin_decls
[AARCH64_BUILTIN_RSQRT_V2SF
];
4439 if (fn
== AARCH64_SIMD_BUILTIN_UNOP_sqrtv4sf
)
4440 return aarch64_builtin_decls
[AARCH64_BUILTIN_RSQRT_V4SF
];
4444 /* Return true if the lane check can be removed as there is no
4445 error going to be emitted. */
4447 aarch64_fold_builtin_lane_check (tree arg0
, tree arg1
, tree arg2
)
4449 if (TREE_CODE (arg0
) != INTEGER_CST
)
4451 if (TREE_CODE (arg1
) != INTEGER_CST
)
4453 if (TREE_CODE (arg2
) != INTEGER_CST
)
4456 auto totalsize
= wi::to_widest (arg0
);
4457 auto elementsize
= wi::to_widest (arg1
);
4458 if (totalsize
== 0 || elementsize
== 0)
4460 auto lane
= wi::to_widest (arg2
);
4461 auto high
= wi::udiv_trunc (totalsize
, elementsize
);
4462 return wi::ltu_p (lane
, high
);
4466 #define VAR1(T, N, MAP, FLAG, A) \
4467 case AARCH64_SIMD_BUILTIN_##T##_##N##A:
4469 #undef VREINTERPRET_BUILTIN
4470 #define VREINTERPRET_BUILTIN(A, B, L) \
4471 case AARCH64_SIMD_BUILTIN_VREINTERPRET##L##_##A##_##B:
4473 #undef VGET_LOW_BUILTIN
4474 #define VGET_LOW_BUILTIN(A) \
4475 case AARCH64_SIMD_BUILTIN_VGET_LOW_##A:
4477 #undef VGET_HIGH_BUILTIN
4478 #define VGET_HIGH_BUILTIN(A) \
4479 case AARCH64_SIMD_BUILTIN_VGET_HIGH_##A:
4481 /* Try to fold a call to the built-in function with subcode FCODE. The
4482 function is passed the N_ARGS arguments in ARGS and it returns a value
4483 of type TYPE. Return the new expression on success and NULL_TREE on
4486 aarch64_general_fold_builtin (unsigned int fcode
, tree type
,
4487 unsigned int n_args ATTRIBUTE_UNUSED
, tree
*args
)
4491 BUILTIN_VDQF (UNOP
, abs
, 2, ALL
)
4492 return fold_build1 (ABS_EXPR
, type
, args
[0]);
4493 VAR1 (UNOP
, floatv2si
, 2, ALL
, v2sf
)
4494 VAR1 (UNOP
, floatv4si
, 2, ALL
, v4sf
)
4495 VAR1 (UNOP
, floatv2di
, 2, ALL
, v2df
)
4496 return fold_build1 (FLOAT_EXPR
, type
, args
[0]);
4497 AARCH64_SIMD_VREINTERPRET_BUILTINS
4498 return fold_build1 (VIEW_CONVERT_EXPR
, type
, args
[0]);
4499 AARCH64_SIMD_VGET_LOW_BUILTINS
4501 auto pos
= BYTES_BIG_ENDIAN
? 64 : 0;
4503 return fold_build3 (BIT_FIELD_REF
, type
, args
[0], bitsize_int (64),
4506 AARCH64_SIMD_VGET_HIGH_BUILTINS
4508 auto pos
= BYTES_BIG_ENDIAN
? 0 : 64;
4510 return fold_build3 (BIT_FIELD_REF
, type
, args
[0], bitsize_int (64),
4513 case AARCH64_SIMD_BUILTIN_LANE_CHECK
:
4514 gcc_assert (n_args
== 3);
4515 if (aarch64_fold_builtin_lane_check (args
[0], args
[1], args
[2]))
4525 enum aarch64_simd_type
4526 get_mem_type_for_load_store (unsigned int fcode
)
4530 VAR1 (LOAD1
, ld1
, 0, LOAD
, v8qi
)
4531 VAR1 (STORE1
, st1
, 0, STORE
, v8qi
)
4533 VAR1 (LOAD1
, ld1
, 0, LOAD
, v16qi
)
4534 VAR1 (STORE1
, st1
, 0, STORE
, v16qi
)
4536 VAR1 (LOAD1
, ld1
, 0, LOAD
, v4hi
)
4537 VAR1 (STORE1
, st1
, 0, STORE
, v4hi
)
4539 VAR1 (LOAD1
, ld1
, 0, LOAD
, v8hi
)
4540 VAR1 (STORE1
, st1
, 0, STORE
, v8hi
)
4542 VAR1 (LOAD1
, ld1
, 0, LOAD
, v2si
)
4543 VAR1 (STORE1
, st1
, 0, STORE
, v2si
)
4545 VAR1 (LOAD1
, ld1
, 0, LOAD
, v4si
)
4546 VAR1 (STORE1
, st1
, 0, STORE
, v4si
)
4548 VAR1 (LOAD1
, ld1
, 0, LOAD
, v2di
)
4549 VAR1 (STORE1
, st1
, 0, STORE
, v2di
)
4551 VAR1 (LOAD1_U
, ld1
, 0, LOAD
, v8qi
)
4552 VAR1 (STORE1_U
, st1
, 0, STORE
, v8qi
)
4554 VAR1 (LOAD1_U
, ld1
, 0, LOAD
, v16qi
)
4555 VAR1 (STORE1_U
, st1
, 0, STORE
, v16qi
)
4557 VAR1 (LOAD1_U
, ld1
, 0, LOAD
, v4hi
)
4558 VAR1 (STORE1_U
, st1
, 0, STORE
, v4hi
)
4560 VAR1 (LOAD1_U
, ld1
, 0, LOAD
, v8hi
)
4561 VAR1 (STORE1_U
, st1
, 0, STORE
, v8hi
)
4563 VAR1 (LOAD1_U
, ld1
, 0, LOAD
, v2si
)
4564 VAR1 (STORE1_U
, st1
, 0, STORE
, v2si
)
4566 VAR1 (LOAD1_U
, ld1
, 0, LOAD
, v4si
)
4567 VAR1 (STORE1_U
, st1
, 0, STORE
, v4si
)
4569 VAR1 (LOAD1_U
, ld1
, 0, LOAD
, v2di
)
4570 VAR1 (STORE1_U
, st1
, 0, STORE
, v2di
)
4572 VAR1 (LOAD1_P
, ld1
, 0, LOAD
, v8qi
)
4573 VAR1 (STORE1_P
, st1
, 0, STORE
, v8qi
)
4575 VAR1 (LOAD1_P
, ld1
, 0, LOAD
, v16qi
)
4576 VAR1 (STORE1_P
, st1
, 0, STORE
, v16qi
)
4578 VAR1 (LOAD1_P
, ld1
, 0, LOAD
, v4hi
)
4579 VAR1 (STORE1_P
, st1
, 0, STORE
, v4hi
)
4581 VAR1 (LOAD1_P
, ld1
, 0, LOAD
, v8hi
)
4582 VAR1 (STORE1_P
, st1
, 0, STORE
, v8hi
)
4584 VAR1 (LOAD1_P
, ld1
, 0, LOAD
, v2di
)
4585 VAR1 (STORE1_P
, st1
, 0, STORE
, v2di
)
4587 VAR1 (LOAD1
, ld1
, 0, LOAD
, v4hf
)
4588 VAR1 (STORE1
, st1
, 0, STORE
, v4hf
)
4590 VAR1 (LOAD1
, ld1
, 0, LOAD
, v8hf
)
4591 VAR1 (STORE1
, st1
, 0, STORE
, v8hf
)
4593 VAR1 (LOAD1
, ld1
, 0, LOAD
, v4bf
)
4594 VAR1 (STORE1
, st1
, 0, STORE
, v4bf
)
4595 return Bfloat16x4_t
;
4596 VAR1 (LOAD1
, ld1
, 0, LOAD
, v8bf
)
4597 VAR1 (STORE1
, st1
, 0, STORE
, v8bf
)
4598 return Bfloat16x8_t
;
4599 VAR1 (LOAD1
, ld1
, 0, LOAD
, v2sf
)
4600 VAR1 (STORE1
, st1
, 0, STORE
, v2sf
)
4602 VAR1 (LOAD1
, ld1
, 0, LOAD
, v4sf
)
4603 VAR1 (STORE1
, st1
, 0, STORE
, v4sf
)
4605 VAR1 (LOAD1
, ld1
, 0, LOAD
, v2df
)
4606 VAR1 (STORE1
, st1
, 0, STORE
, v2df
)
4614 /* We've seen a vector load from address ADDR. Record it in
4615 vector_load_decls, if appropriate. */
4617 aarch64_record_vector_load_arg (tree addr
)
4619 tree decl
= aarch64_vector_load_decl (addr
);
4622 if (!cfun
->machine
->vector_load_decls
)
4623 cfun
->machine
->vector_load_decls
= hash_set
<tree
>::create_ggc (31);
4624 cfun
->machine
->vector_load_decls
->add (decl
);
4627 /* Force VAL into a valid gimple value, creating a new SSA_NAME if
4628 necessary. Insert any new statements before GSI. */
4630 aarch64_force_gimple_val (gimple_stmt_iterator
*gsi
, tree val
)
4632 if (is_gimple_val (val
))
4635 tree tmp
= make_ssa_name (TREE_TYPE (val
));
4636 gsi_insert_before_without_update (gsi
, gimple_build_assign (tmp
, val
),
4641 /* Copy vops from FROM to TO and return TO. */
4643 aarch64_copy_vops (gimple
*to
, gimple
*from
)
4645 gimple_set_vuse (to
, gimple_vuse (from
));
4646 gimple_set_vdef (to
, gimple_vdef (from
));
4650 /* Fold STMT (at GSI) to VAL, with SEQ setting up the value of VAL.
4651 Return the replacement statement. */
4653 aarch64_fold_to_val (gcall
*stmt
, gimple_stmt_iterator
*gsi
,
4654 gimple
*seq
, tree val
)
4656 auto *assign
= gimple_build_assign (gimple_call_lhs (stmt
), val
);
4657 gimple_seq_add_stmt_without_update (&seq
, assign
);
4658 gsi_replace_with_seq_vops (gsi
, seq
);
4662 /* Dereference pointer ADDR, giving a memory reference of type TYPE. */
4664 aarch64_dereference (tree addr
, tree type
)
4666 tree elt_type
= (VECTOR_TYPE_P (type
) ? TREE_TYPE (type
) : type
);
4667 tree elt_ptr_type
= build_pointer_type_for_mode (elt_type
, VOIDmode
, true);
4668 tree zero
= build_zero_cst (elt_ptr_type
);
4669 /* Use element type alignment. */
4670 tree access_type
= build_aligned_type (type
, TYPE_ALIGN (elt_type
));
4671 return fold_build2 (MEM_REF
, access_type
, addr
, zero
);
4674 /* LANE is a lane index into VEC. Return the associated bit index
4675 (counting from the first byte in memory order). */
4677 aarch64_get_lane_bit_index (tree vec
, tree lane
)
4679 auto vec_mode
= TYPE_MODE (TREE_TYPE (vec
));
4680 auto nunits
= aarch64_num_lanes (vec_mode
);
4681 auto idx
= ENDIAN_LANE_N (nunits
, tree_to_uhwi (lane
));
4682 return bitsize_int (idx
* GET_MODE_UNIT_BITSIZE (vec_mode
));
4685 /* LANE is a lane index into VEC. Return a BIT_FIELD_REF for the
4686 selected element. */
4688 aarch64_get_lane (tree vec
, tree lane
)
4690 auto elt_type
= TREE_TYPE (TREE_TYPE (vec
));
4691 return fold_build3 (BIT_FIELD_REF
, elt_type
, vec
, TYPE_SIZE (elt_type
),
4692 aarch64_get_lane_bit_index (vec
, lane
));
4695 /* LANE is a lane index into VEC. Return a BIT_INSERT_EXPR that replaces
4696 that index with ELT and stores the result in LHS. */
4698 aarch64_set_lane (tree lhs
, tree elt
, tree vec
, tree lane
)
4700 tree bit
= aarch64_get_lane_bit_index (vec
, lane
);
4701 return gimple_build_assign (lhs
, BIT_INSERT_EXPR
, vec
, elt
, bit
);
4704 /* Fold a call to vcombine. */
4706 aarch64_fold_combine (gcall
*stmt
)
4708 tree first_part
, second_part
;
4709 if (BYTES_BIG_ENDIAN
)
4711 second_part
= gimple_call_arg (stmt
, 0);
4712 first_part
= gimple_call_arg (stmt
, 1);
4716 first_part
= gimple_call_arg (stmt
, 0);
4717 second_part
= gimple_call_arg (stmt
, 1);
4719 tree ret_type
= gimple_call_return_type (stmt
);
4720 tree ctor
= build_constructor_va (ret_type
, 2, NULL_TREE
, first_part
,
4721 NULL_TREE
, second_part
);
4722 return gimple_build_assign (gimple_call_lhs (stmt
), ctor
);
4725 /* Fold a call to vld1, given that it loads something of type TYPE. */
4727 aarch64_fold_load (gcall
*stmt
, tree type
)
4729 /* Punt until after inlining, so that we stand more chance of
4730 recording something meaningful in vector_load_decls. */
4731 if (!cfun
->after_inlining
)
4733 tree addr
= gimple_call_arg (stmt
, 0);
4734 aarch64_record_vector_load_arg (addr
);
4735 if (!BYTES_BIG_ENDIAN
)
4737 tree mem
= aarch64_dereference (addr
, type
);
4738 auto *new_stmt
= gimple_build_assign (gimple_get_lhs (stmt
), mem
);
4739 return aarch64_copy_vops (new_stmt
, stmt
);
4744 /* Fold a call to vst1, given that it loads something of type TYPE. */
4746 aarch64_fold_store (gcall
*stmt
, tree type
)
4748 tree addr
= gimple_call_arg (stmt
, 0);
4749 tree data
= gimple_call_arg (stmt
, 1);
4750 if (!BYTES_BIG_ENDIAN
)
4752 tree mem
= aarch64_dereference (addr
, type
);
4753 auto *new_stmt
= gimple_build_assign (mem
, data
);
4754 return aarch64_copy_vops (new_stmt
, stmt
);
4759 /* An aarch64_fold_permute callback for vext. SELECTOR is the value of
4760 the final argument. */
4762 aarch64_ext_index (unsigned int, unsigned int selector
, unsigned int i
)
4764 return selector
+ i
;
4767 /* An aarch64_fold_permute callback for vrev. SELECTOR is the number
4768 of elements in each reversal group. */
4770 aarch64_rev_index (unsigned int, unsigned int selector
, unsigned int i
)
4772 return ROUND_DOWN (i
, selector
) + (selector
- 1) - (i
% selector
);
4775 /* An aarch64_fold_permute callback for vtrn. SELECTOR is 0 for TRN1
4778 aarch64_trn_index (unsigned int nelts
, unsigned int selector
, unsigned int i
)
4780 return (i
% 2) * nelts
+ ROUND_DOWN (i
, 2) + selector
;
4783 /* An aarch64_fold_permute callback for vuzp. SELECTOR is 0 for UZP1
4786 aarch64_uzp_index (unsigned int, unsigned int selector
, unsigned int i
)
4788 return i
* 2 + selector
;
4791 /* An aarch64_fold_permute callback for vzip. SELECTOR is 0 for ZIP1
4794 aarch64_zip_index (unsigned int nelts
, unsigned int selector
, unsigned int i
)
4796 return (i
% 2) * nelts
+ (i
/ 2) + selector
* (nelts
/ 2);
4799 /* Fold STMT to a VEC_PERM_EXPR on the first NINPUTS arguments.
4800 Make the VEC_PERM_EXPR emulate an NINPUTS-input TBL in which
4801 architectural lane I of the result selects architectural lane:
4803 GET_INDEX (NELTS, SELECTOR, I)
4805 of the input table. NELTS is the number of elements in one vector. */
4807 aarch64_fold_permute (gcall
*stmt
, unsigned int ninputs
,
4808 unsigned int (*get_index
) (unsigned int, unsigned int,
4810 unsigned int selector
)
4812 tree op0
= gimple_call_arg (stmt
, 0);
4813 tree op1
= ninputs
== 2 ? gimple_call_arg (stmt
, 1) : op0
;
4814 auto nelts
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0
)).to_constant ();
4815 vec_perm_builder
sel (nelts
, nelts
, 1);
4816 for (unsigned int i
= 0; i
< nelts
; ++i
)
4818 unsigned int index
= get_index (nelts
, selector
,
4819 ENDIAN_LANE_N (nelts
, i
));
4820 unsigned int vec
= index
/ nelts
;
4821 unsigned int elt
= ENDIAN_LANE_N (nelts
, index
% nelts
);
4822 sel
.quick_push (vec
* nelts
+ elt
);
4825 vec_perm_indices
indices (sel
, ninputs
, nelts
);
4826 tree mask_type
= build_vector_type (ssizetype
, nelts
);
4827 tree mask
= vec_perm_indices_to_tree (mask_type
, indices
);
4828 return gimple_build_assign (gimple_call_lhs (stmt
), VEC_PERM_EXPR
,
4832 /* Try to fold STMT (at GSI), given that it is a call to the builtin
4833 described by BUILTIN_DATA. Return the new statement on success,
4834 otherwise return null. */
4836 aarch64_gimple_fold_pragma_builtin
4837 (gcall
*stmt
, gimple_stmt_iterator
*gsi
,
4838 const aarch64_pragma_builtins_data
&builtin_data
)
4840 auto &types
= builtin_data
.types
;
4842 switch (builtin_data
.unspec
)
4844 case UNSPEC_COMBINE
:
4845 return aarch64_fold_combine (stmt
);
4848 case UNSPEC_DUP_LANE
:
4850 tree arg
= gimple_call_arg (stmt
, 0);
4851 tree type
= types
[0].type ();
4852 if (builtin_data
.signature
== aarch64_builtin_signatures::load
)
4853 arg
= aarch64_dereference (arg
, TREE_TYPE (type
));
4854 else if (builtin_data
.unspec
== UNSPEC_DUP_LANE
)
4855 arg
= aarch64_get_lane (arg
, gimple_call_arg (stmt
, 1));
4856 arg
= aarch64_force_gimple_val (gsi
, arg
);
4858 tree dup
= build_vector_from_val (type
, arg
);
4859 return aarch64_fold_to_val (stmt
, gsi
, nullptr, dup
);
4864 auto index
= tree_to_uhwi (gimple_call_arg (stmt
, 2));
4865 return aarch64_fold_permute (stmt
, 2, aarch64_ext_index
, index
);
4868 case UNSPEC_GET_LANE
:
4870 tree val
= aarch64_get_lane (gimple_call_arg (stmt
, 0),
4871 gimple_call_arg (stmt
, 1));
4872 return gimple_build_assign (gimple_call_lhs (stmt
), val
);
4876 return aarch64_fold_load (stmt
, types
[0].type ());
4880 auto selector
= 16 / GET_MODE_UNIT_BITSIZE (types
[0].mode
);
4881 return aarch64_fold_permute (stmt
, 1, aarch64_rev_index
, selector
);
4886 auto selector
= 32 / GET_MODE_UNIT_BITSIZE (types
[0].mode
);
4887 return aarch64_fold_permute (stmt
, 1, aarch64_rev_index
, selector
);
4892 auto selector
= 64 / GET_MODE_UNIT_BITSIZE (types
[0].mode
);
4893 return aarch64_fold_permute (stmt
, 1, aarch64_rev_index
, selector
);
4896 case UNSPEC_SET_LANE
:
4898 tree elt
= gimple_call_arg (stmt
, 0);
4899 if (builtin_data
.signature
== aarch64_builtin_signatures::load_lane
)
4901 elt
= aarch64_dereference (elt
, TREE_TYPE (types
[0].type ()));
4902 elt
= aarch64_force_gimple_val (gsi
, elt
);
4904 return aarch64_set_lane (gimple_call_lhs (stmt
), elt
,
4905 gimple_call_arg (stmt
, 1),
4906 gimple_call_arg (stmt
, 2));
4910 return aarch64_fold_store (stmt
, types
[1].type ());
4912 case UNSPEC_ST1_LANE
:
4914 tree val
= aarch64_get_lane (gimple_call_arg (stmt
, 1),
4915 gimple_call_arg (stmt
, 2));
4916 tree mem
= aarch64_dereference (gimple_call_arg (stmt
, 0),
4917 TREE_TYPE (types
[0].type ()));
4918 val
= aarch64_force_gimple_val (gsi
, val
);
4919 return aarch64_copy_vops (gimple_build_assign (mem
, val
), stmt
);
4923 return aarch64_fold_permute (stmt
, 2, aarch64_trn_index
, 0);
4926 return aarch64_fold_permute (stmt
, 2, aarch64_trn_index
, 1);
4929 return aarch64_fold_permute (stmt
, 2, aarch64_uzp_index
, 0);
4932 return aarch64_fold_permute (stmt
, 2, aarch64_uzp_index
, 1);
4934 case UNSPEC_VCREATE
:
4935 return gimple_build_assign (gimple_call_lhs (stmt
),
4936 fold_build1 (VIEW_CONVERT_EXPR
,
4938 gimple_call_arg (stmt
, 0)));
4940 case UNSPEC_VEC_COPY
:
4942 tree elt
= aarch64_get_lane (gimple_call_arg (stmt
, 2),
4943 gimple_call_arg (stmt
, 3));
4944 elt
= aarch64_force_gimple_val (gsi
, elt
);
4945 return aarch64_set_lane (gimple_call_lhs (stmt
), elt
,
4946 gimple_call_arg (stmt
, 0),
4947 gimple_call_arg (stmt
, 1));
4951 return aarch64_fold_permute (stmt
, 2, aarch64_zip_index
, 0);
4954 return aarch64_fold_permute (stmt
, 2, aarch64_zip_index
, 1);
4961 /* Try to fold STMT, given that it's a call to the built-in function with
4962 subcode FCODE. Return the new statement on success and null on
4965 aarch64_general_gimple_fold_builtin (unsigned int fcode
, gcall
*stmt
,
4966 gimple_stmt_iterator
*gsi
)
4968 gimple
*new_stmt
= NULL
;
4969 unsigned nargs
= gimple_call_num_args (stmt
);
4970 tree
*args
= (nargs
> 0
4971 ? gimple_call_arg_ptr (stmt
, 0)
4972 : &error_mark_node
);
4974 /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int
4975 and unsigned int; it will distinguish according to the types of
4976 the arguments to the __builtin. */
4979 BUILTIN_VALL (UNOP
, reduc_plus_scal_
, 10, ALL
)
4980 BUILTIN_VDQ_I (UNOPU
, reduc_plus_scal_
, 10, DEFAULT
)
4981 new_stmt
= gimple_build_call_internal (IFN_REDUC_PLUS
,
4983 gimple_call_set_lhs (new_stmt
, gimple_call_lhs (stmt
));
4986 /* Lower sqrt builtins to gimple/internal function sqrt. */
4987 BUILTIN_VHSDF_DF (UNOP
, sqrt
, 2, FP
)
4988 new_stmt
= gimple_build_call_internal (IFN_SQRT
,
4990 gimple_call_set_lhs (new_stmt
, gimple_call_lhs (stmt
));
4993 BUILTIN_VDC (BINOP
, combine
, 0, QUIET
)
4994 BUILTIN_VD_I (BINOPU
, combine
, 0, DEFAULT
)
4995 BUILTIN_VDC_P (BINOPP
, combine
, 0, DEFAULT
)
4996 new_stmt
= aarch64_fold_combine (stmt
);
4999 /*lower store and load neon builtins to gimple. */
5000 BUILTIN_VALL_F16 (LOAD1
, ld1
, 0, LOAD
)
5001 BUILTIN_VDQ_I (LOAD1_U
, ld1
, 0, LOAD
)
5002 BUILTIN_VALLP_NO_DI (LOAD1_P
, ld1
, 0, LOAD
)
5004 enum aarch64_simd_type mem_type
5005 = get_mem_type_for_load_store (fcode
);
5006 aarch64_simd_type_info_trees simd_type
5007 = aarch64_simd_types_trees
[mem_type
];
5008 new_stmt
= aarch64_fold_load (stmt
, simd_type
.itype
);
5012 BUILTIN_VALL_F16 (STORE1
, st1
, 0, STORE
)
5013 BUILTIN_VDQ_I (STORE1_U
, st1
, 0, STORE
)
5014 BUILTIN_VALLP_NO_DI (STORE1_P
, st1
, 0, STORE
)
5016 enum aarch64_simd_type mem_type
5017 = get_mem_type_for_load_store (fcode
);
5018 aarch64_simd_type_info_trees simd_type
5019 = aarch64_simd_types_trees
[mem_type
];
5020 new_stmt
= aarch64_fold_store (stmt
, simd_type
.itype
);
5024 BUILTIN_VDQIF (UNOP
, reduc_smax_scal_
, 10, ALL
)
5025 BUILTIN_VDQ_BHSI (UNOPU
, reduc_umax_scal_
, 10, ALL
)
5026 new_stmt
= gimple_build_call_internal (IFN_REDUC_MAX
,
5028 gimple_call_set_lhs (new_stmt
, gimple_call_lhs (stmt
));
5030 BUILTIN_VDQIF (UNOP
, reduc_smin_scal_
, 10, ALL
)
5031 BUILTIN_VDQ_BHSI (UNOPU
, reduc_umin_scal_
, 10, ALL
)
5032 new_stmt
= gimple_build_call_internal (IFN_REDUC_MIN
,
5034 gimple_call_set_lhs (new_stmt
, gimple_call_lhs (stmt
));
5036 BUILTIN_VSDQ_I_DI (BINOP
, ashl
, 3, DEFAULT
)
5037 if (TREE_CODE (args
[1]) == INTEGER_CST
5038 && wi::ltu_p (wi::to_wide (args
[1]), element_precision (args
[0])))
5039 new_stmt
= gimple_build_assign (gimple_call_lhs (stmt
),
5040 LSHIFT_EXPR
, args
[0], args
[1]);
5042 /* lower saturating add/sub neon builtins to gimple. */
5043 BUILTIN_VSDQ_I (BINOP
, ssadd
, 3, DEFAULT
)
5044 BUILTIN_VSDQ_I (BINOPU
, usadd
, 3, DEFAULT
)
5045 new_stmt
= gimple_build_call_internal (IFN_SAT_ADD
, 2, args
[0], args
[1]);
5046 gimple_call_set_lhs (new_stmt
, gimple_call_lhs (stmt
));
5048 BUILTIN_VSDQ_I (BINOP
, sssub
, 3, DEFAULT
)
5049 BUILTIN_VSDQ_I (BINOPU
, ussub
, 3, DEFAULT
)
5050 new_stmt
= gimple_build_call_internal (IFN_SAT_SUB
, 2, args
[0], args
[1]);
5051 gimple_call_set_lhs (new_stmt
, gimple_call_lhs (stmt
));
5054 BUILTIN_VSDQ_I_DI (BINOP
, sshl
, 0, DEFAULT
)
5055 BUILTIN_VSDQ_I_DI (BINOP_UUS
, ushl
, 0, DEFAULT
)
5058 tree ctype
= TREE_TYPE (cst
);
5059 /* Left shifts can be both scalar or vector, e.g. uint64x1_t is
5060 treated as a scalar type not a vector one. */
5061 if ((cst
= uniform_integer_cst_p (cst
)) != NULL_TREE
)
5063 wide_int wcst
= wi::to_wide (cst
);
5064 tree unit_ty
= TREE_TYPE (cst
);
5066 wide_int abs_cst
= wi::abs (wcst
);
5067 if (wi::geu_p (abs_cst
, element_precision (args
[0])))
5070 if (wi::neg_p (wcst
, TYPE_SIGN (ctype
)))
5073 final_cst
= wide_int_to_tree (unit_ty
, abs_cst
);
5074 if (TREE_CODE (cst
) != INTEGER_CST
)
5075 final_cst
= build_uniform_cst (ctype
, final_cst
);
5077 new_stmt
= gimple_build_assign (gimple_call_lhs (stmt
),
5078 RSHIFT_EXPR
, args
[0],
5082 new_stmt
= gimple_build_assign (gimple_call_lhs (stmt
),
5083 LSHIFT_EXPR
, args
[0], args
[1]);
5087 BUILTIN_VDQ_I (SHIFTIMM
, ashr
, 3, DEFAULT
)
5088 VAR1 (SHIFTIMM
, ashr_simd
, 0, DEFAULT
, di
)
5089 BUILTIN_VDQ_I (USHIFTIMM
, lshr
, 3, DEFAULT
)
5090 VAR1 (USHIFTIMM
, lshr_simd
, 0, DEFAULT
, di
)
5091 if (TREE_CODE (args
[1]) == INTEGER_CST
5092 && wi::ltu_p (wi::to_wide (args
[1]), element_precision (args
[0])))
5093 new_stmt
= gimple_build_assign (gimple_call_lhs (stmt
),
5094 RSHIFT_EXPR
, args
[0], args
[1]);
5096 BUILTIN_GPF (BINOP
, fmulx
, 0, ALL
)
5098 gcc_assert (nargs
== 2);
5099 bool a0_cst_p
= TREE_CODE (args
[0]) == REAL_CST
;
5100 bool a1_cst_p
= TREE_CODE (args
[1]) == REAL_CST
;
5101 if (a0_cst_p
|| a1_cst_p
)
5103 if (a0_cst_p
&& a1_cst_p
)
5105 tree t0
= TREE_TYPE (args
[0]);
5106 real_value a0
= (TREE_REAL_CST (args
[0]));
5107 real_value a1
= (TREE_REAL_CST (args
[1]));
5108 if (real_equal (&a1
, &dconst0
))
5110 /* According to real_equal (), +0 equals -0. */
5111 if (real_equal (&a0
, &dconst0
) && real_isinf (&a1
))
5113 real_value res
= dconst2
;
5114 res
.sign
= a0
.sign
^ a1
.sign
;
5115 new_stmt
= gimple_build_assign (gimple_call_lhs (stmt
),
5117 build_real (t0
, res
));
5120 new_stmt
= gimple_build_assign (gimple_call_lhs (stmt
),
5124 else /* a0_cst_p ^ a1_cst_p. */
5126 real_value const_part
= a0_cst_p
5127 ? TREE_REAL_CST (args
[0]) : TREE_REAL_CST (args
[1]);
5128 if (!real_equal (&const_part
, &dconst0
)
5129 && !real_isinf (&const_part
))
5130 new_stmt
= gimple_build_assign (gimple_call_lhs (stmt
),
5137 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
5138 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
5142 case AARCH64_SIMD_BUILTIN_LANE_CHECK
:
5143 if (aarch64_fold_builtin_lane_check (args
[0], args
[1], args
[2]))
5145 unlink_stmt_vdef (stmt
);
5146 release_defs (stmt
);
5147 new_stmt
= gimple_build_nop ();
5151 if (auto builtin_data
= aarch64_get_pragma_builtin (fcode
))
5152 new_stmt
= aarch64_gimple_fold_pragma_builtin (stmt
, gsi
,
5157 /* GIMPLE assign statements (unlike calls) require a non-null lhs. If we
5158 created an assign statement with a null lhs, then fix this by assigning
5159 to a new (and subsequently unused) variable. */
5160 if (new_stmt
&& is_gimple_assign (new_stmt
) && !gimple_assign_lhs (new_stmt
))
5162 tree new_lhs
= make_ssa_name (gimple_call_return_type (stmt
));
5163 gimple_assign_set_lhs (new_stmt
, new_lhs
);
5170 aarch64_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
5172 const unsigned AARCH64_FE_INVALID
= 1;
5173 const unsigned AARCH64_FE_DIVBYZERO
= 2;
5174 const unsigned AARCH64_FE_OVERFLOW
= 4;
5175 const unsigned AARCH64_FE_UNDERFLOW
= 8;
5176 const unsigned AARCH64_FE_INEXACT
= 16;
5177 const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT
= (AARCH64_FE_INVALID
5178 | AARCH64_FE_DIVBYZERO
5179 | AARCH64_FE_OVERFLOW
5180 | AARCH64_FE_UNDERFLOW
5181 | AARCH64_FE_INEXACT
);
5182 const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT
= 8;
5183 tree fenv_cr
, fenv_sr
, get_fpcr
, set_fpcr
, mask_cr
, mask_sr
;
5184 tree ld_fenv_cr
, ld_fenv_sr
, masked_fenv_cr
, masked_fenv_sr
, hold_fnclex_cr
;
5185 tree hold_fnclex_sr
, new_fenv_var
, reload_fenv
, restore_fnenv
, get_fpsr
, set_fpsr
;
5186 tree update_call
, atomic_feraiseexcept
, hold_fnclex
, masked_fenv
, ld_fenv
;
5188 /* Generate the equivalence of :
5189 unsigned int fenv_cr;
5190 fenv_cr = __builtin_aarch64_get_fpcr ();
5192 unsigned int fenv_sr;
5193 fenv_sr = __builtin_aarch64_get_fpsr ();
5195 Now set all exceptions to non-stop
5196 unsigned int mask_cr
5197 = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
5198 unsigned int masked_cr;
5199 masked_cr = fenv_cr & mask_cr;
5201 And clear all exception flags
5202 unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
5203 unsigned int masked_cr;
5204 masked_sr = fenv_sr & mask_sr;
5206 __builtin_aarch64_set_cr (masked_cr);
5207 __builtin_aarch64_set_sr (masked_sr); */
5209 fenv_cr
= create_tmp_var_raw (unsigned_type_node
);
5210 fenv_sr
= create_tmp_var_raw (unsigned_type_node
);
5212 get_fpcr
= aarch64_builtin_decls
[AARCH64_BUILTIN_GET_FPCR
];
5213 set_fpcr
= aarch64_builtin_decls
[AARCH64_BUILTIN_SET_FPCR
];
5214 get_fpsr
= aarch64_builtin_decls
[AARCH64_BUILTIN_GET_FPSR
];
5215 set_fpsr
= aarch64_builtin_decls
[AARCH64_BUILTIN_SET_FPSR
];
5217 mask_cr
= build_int_cst (unsigned_type_node
,
5218 ~(AARCH64_FE_ALL_EXCEPT
<< AARCH64_FE_EXCEPT_SHIFT
));
5219 mask_sr
= build_int_cst (unsigned_type_node
,
5220 ~(AARCH64_FE_ALL_EXCEPT
));
5222 ld_fenv_cr
= build4 (TARGET_EXPR
, unsigned_type_node
,
5223 fenv_cr
, build_call_expr (get_fpcr
, 0),
5224 NULL_TREE
, NULL_TREE
);
5225 ld_fenv_sr
= build4 (TARGET_EXPR
, unsigned_type_node
,
5226 fenv_sr
, build_call_expr (get_fpsr
, 0),
5227 NULL_TREE
, NULL_TREE
);
5229 masked_fenv_cr
= build2 (BIT_AND_EXPR
, unsigned_type_node
, fenv_cr
, mask_cr
);
5230 masked_fenv_sr
= build2 (BIT_AND_EXPR
, unsigned_type_node
, fenv_sr
, mask_sr
);
5232 hold_fnclex_cr
= build_call_expr (set_fpcr
, 1, masked_fenv_cr
);
5233 hold_fnclex_sr
= build_call_expr (set_fpsr
, 1, masked_fenv_sr
);
5235 hold_fnclex
= build2 (COMPOUND_EXPR
, void_type_node
, hold_fnclex_cr
,
5237 masked_fenv
= build2 (COMPOUND_EXPR
, void_type_node
, masked_fenv_cr
,
5239 ld_fenv
= build2 (COMPOUND_EXPR
, void_type_node
, ld_fenv_cr
, ld_fenv_sr
);
5241 *hold
= build2 (COMPOUND_EXPR
, void_type_node
,
5242 build2 (COMPOUND_EXPR
, void_type_node
, masked_fenv
, ld_fenv
),
5245 /* Store the value of masked_fenv to clear the exceptions:
5246 __builtin_aarch64_set_fpsr (masked_fenv_sr); */
5248 *clear
= build_call_expr (set_fpsr
, 1, masked_fenv_sr
);
5250 /* Generate the equivalent of :
5251 unsigned int new_fenv_var;
5252 new_fenv_var = __builtin_aarch64_get_fpsr ();
5254 __builtin_aarch64_set_fpsr (fenv_sr);
5256 __atomic_feraiseexcept (new_fenv_var); */
5258 new_fenv_var
= create_tmp_var_raw (unsigned_type_node
);
5259 reload_fenv
= build4 (TARGET_EXPR
, unsigned_type_node
,
5260 new_fenv_var
, build_call_expr (get_fpsr
, 0),
5261 NULL_TREE
, NULL_TREE
);
5262 restore_fnenv
= build_call_expr (set_fpsr
, 1, fenv_sr
);
5263 atomic_feraiseexcept
= builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
5264 update_call
= build_call_expr (atomic_feraiseexcept
, 1,
5265 fold_convert (integer_type_node
, new_fenv_var
));
5266 *update
= build2 (COMPOUND_EXPR
, void_type_node
,
5267 build2 (COMPOUND_EXPR
, void_type_node
,
5268 reload_fenv
, restore_fnenv
), update_call
);
5271 /* Resolve overloaded MEMTAG build-in functions. */
5272 #define AARCH64_BUILTIN_SUBCODE(F) \
5273 (DECL_MD_FUNCTION_CODE (F) >> AARCH64_BUILTIN_SHIFT)
5276 aarch64_resolve_overloaded_memtag (location_t loc
,
5277 tree fndecl
, void *pass_params
)
5279 vec
<tree
, va_gc
> *params
= static_cast<vec
<tree
, va_gc
> *> (pass_params
);
5280 unsigned param_num
= params
? params
->length() : 0;
5281 unsigned int fcode
= AARCH64_BUILTIN_SUBCODE (fndecl
);
5282 tree inittype
= aarch64_memtag_builtin_data
[
5283 fcode
- AARCH64_MEMTAG_BUILTIN_START
- 1].ftype
;
5284 unsigned arg_num
= list_length (TYPE_ARG_TYPES (inittype
)) - 1;
5286 if (param_num
!= arg_num
)
5288 TREE_TYPE (fndecl
) = inittype
;
5293 if (fcode
== AARCH64_MEMTAG_BUILTIN_SUBP
)
5295 tree t0
= TREE_TYPE ((*params
)[0]);
5296 tree t1
= TREE_TYPE ((*params
)[1]);
5298 if (t0
== error_mark_node
|| TREE_CODE (t0
) != POINTER_TYPE
)
5300 if (t1
== error_mark_node
|| TREE_CODE (t1
) != POINTER_TYPE
)
5303 if (TYPE_MODE (t0
) != DImode
)
5304 warning_at (loc
, 1, "expected 64-bit address but argument 1 is %d-bit",
5305 (int)tree_to_shwi (DECL_SIZE ((*params
)[0])));
5307 if (TYPE_MODE (t1
) != DImode
)
5308 warning_at (loc
, 1, "expected 64-bit address but argument 2 is %d-bit",
5309 (int)tree_to_shwi (DECL_SIZE ((*params
)[1])));
5311 retype
= build_function_type_list (ptrdiff_type_node
, t0
, t1
, NULL
);
5315 tree t0
= TREE_TYPE ((*params
)[0]);
5317 if (t0
== error_mark_node
|| TREE_CODE (t0
) != POINTER_TYPE
)
5319 TREE_TYPE (fndecl
) = inittype
;
5323 if (TYPE_MODE (t0
) != DImode
)
5324 warning_at (loc
, 1, "expected 64-bit address but argument 1 is %d-bit",
5325 (int)tree_to_shwi (DECL_SIZE ((*params
)[0])));
5329 case AARCH64_MEMTAG_BUILTIN_IRG
:
5330 retype
= build_function_type_list (t0
, t0
, uint64_type_node
, NULL
);
5332 case AARCH64_MEMTAG_BUILTIN_GMI
:
5333 retype
= build_function_type_list (uint64_type_node
, t0
,
5334 uint64_type_node
, NULL
);
5336 case AARCH64_MEMTAG_BUILTIN_INC_TAG
:
5337 retype
= build_function_type_list (t0
, t0
, unsigned_type_node
, NULL
);
5339 case AARCH64_MEMTAG_BUILTIN_SET_TAG
:
5340 retype
= build_function_type_list (void_type_node
, t0
, NULL
);
5342 case AARCH64_MEMTAG_BUILTIN_GET_TAG
:
5343 retype
= build_function_type_list (t0
, t0
, NULL
);
5350 if (!retype
|| retype
== error_mark_node
)
5351 TREE_TYPE (fndecl
) = inittype
;
5353 TREE_TYPE (fndecl
) = retype
;
5358 /* Called at aarch64_resolve_overloaded_builtin in aarch64-c.cc. */
5360 aarch64_resolve_overloaded_builtin_general (location_t loc
, tree function
,
5363 unsigned int fcode
= AARCH64_BUILTIN_SUBCODE (function
);
5365 if (fcode
>= AARCH64_MEMTAG_BUILTIN_START
5366 && fcode
<= AARCH64_MEMTAG_BUILTIN_END
)
5367 return aarch64_resolve_overloaded_memtag(loc
, function
, pass_params
);
5372 #undef AARCH64_CHECK_BUILTIN_MODE
5373 #undef AARCH64_FIND_FRINT_VARIANT
5392 #include "gt-aarch64-builtins.h"