Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gcc4 / gcc / config / i386 / i386.c
blobaf43c4761db4ed145146165660931e323561ed4f
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
53 #ifndef CHECK_STACK_LIMIT
54 #define CHECK_STACK_LIMIT (-1)
55 #endif
57 /* Return index of given mode in mult and division cost tables. */
58 #define MODE_INDEX(mode) \
59 ((mode) == QImode ? 0 \
60 : (mode) == HImode ? 1 \
61 : (mode) == SImode ? 2 \
62 : (mode) == DImode ? 3 \
63 : 4)
65 /* Processor costs (relative to an add) */
66 static const
67 struct processor_costs size_cost = { /* costs for tunning for size */
68 2, /* cost of an add instruction */
69 3, /* cost of a lea instruction */
70 2, /* variable shift costs */
71 3, /* constant shift costs */
72 {3, 3, 3, 3, 5}, /* cost of starting a multiply */
73 0, /* cost of multiply per each bit set */
74 {3, 3, 3, 3, 5}, /* cost of a divide/mod */
75 3, /* cost of movsx */
76 3, /* cost of movzx */
77 0, /* "large" insn */
78 2, /* MOVE_RATIO */
79 2, /* cost for loading QImode using movzbl */
80 {2, 2, 2}, /* cost of loading integer registers
81 in QImode, HImode and SImode.
82 Relative to reg-reg move (2). */
83 {2, 2, 2}, /* cost of storing integer registers */
84 2, /* cost of reg,reg fld/fst */
85 {2, 2, 2}, /* cost of loading fp registers
86 in SFmode, DFmode and XFmode */
87 {2, 2, 2}, /* cost of loading integer registers */
88 3, /* cost of moving MMX register */
89 {3, 3}, /* cost of loading MMX registers
90 in SImode and DImode */
91 {3, 3}, /* cost of storing MMX registers
92 in SImode and DImode */
93 3, /* cost of moving SSE register */
94 {3, 3, 3}, /* cost of loading SSE registers
95 in SImode, DImode and TImode */
96 {3, 3, 3}, /* cost of storing SSE registers
97 in SImode, DImode and TImode */
98 3, /* MMX or SSE register to integer */
99 0, /* size of prefetch block */
100 0, /* number of parallel prefetches */
101 1, /* Branch cost */
102 2, /* cost of FADD and FSUB insns. */
103 2, /* cost of FMUL instruction. */
104 2, /* cost of FDIV instruction. */
105 2, /* cost of FABS instruction. */
106 2, /* cost of FCHS instruction. */
107 2, /* cost of FSQRT instruction. */
110 /* Processor costs (relative to an add) */
111 static const
112 struct processor_costs i386_cost = { /* 386 specific costs */
113 1, /* cost of an add instruction */
114 1, /* cost of a lea instruction */
115 3, /* variable shift costs */
116 2, /* constant shift costs */
117 {6, 6, 6, 6, 6}, /* cost of starting a multiply */
118 1, /* cost of multiply per each bit set */
119 {23, 23, 23, 23, 23}, /* cost of a divide/mod */
120 3, /* cost of movsx */
121 2, /* cost of movzx */
122 15, /* "large" insn */
123 3, /* MOVE_RATIO */
124 4, /* cost for loading QImode using movzbl */
125 {2, 4, 2}, /* cost of loading integer registers
126 in QImode, HImode and SImode.
127 Relative to reg-reg move (2). */
128 {2, 4, 2}, /* cost of storing integer registers */
129 2, /* cost of reg,reg fld/fst */
130 {8, 8, 8}, /* cost of loading fp registers
131 in SFmode, DFmode and XFmode */
132 {8, 8, 8}, /* cost of loading integer registers */
133 2, /* cost of moving MMX register */
134 {4, 8}, /* cost of loading MMX registers
135 in SImode and DImode */
136 {4, 8}, /* cost of storing MMX registers
137 in SImode and DImode */
138 2, /* cost of moving SSE register */
139 {4, 8, 16}, /* cost of loading SSE registers
140 in SImode, DImode and TImode */
141 {4, 8, 16}, /* cost of storing SSE registers
142 in SImode, DImode and TImode */
143 3, /* MMX or SSE register to integer */
144 0, /* size of prefetch block */
145 0, /* number of parallel prefetches */
146 1, /* Branch cost */
147 23, /* cost of FADD and FSUB insns. */
148 27, /* cost of FMUL instruction. */
149 88, /* cost of FDIV instruction. */
150 22, /* cost of FABS instruction. */
151 24, /* cost of FCHS instruction. */
152 122, /* cost of FSQRT instruction. */
155 static const
156 struct processor_costs i486_cost = { /* 486 specific costs */
157 1, /* cost of an add instruction */
158 1, /* cost of a lea instruction */
159 3, /* variable shift costs */
160 2, /* constant shift costs */
161 {12, 12, 12, 12, 12}, /* cost of starting a multiply */
162 1, /* cost of multiply per each bit set */
163 {40, 40, 40, 40, 40}, /* cost of a divide/mod */
164 3, /* cost of movsx */
165 2, /* cost of movzx */
166 15, /* "large" insn */
167 3, /* MOVE_RATIO */
168 4, /* cost for loading QImode using movzbl */
169 {2, 4, 2}, /* cost of loading integer registers
170 in QImode, HImode and SImode.
171 Relative to reg-reg move (2). */
172 {2, 4, 2}, /* cost of storing integer registers */
173 2, /* cost of reg,reg fld/fst */
174 {8, 8, 8}, /* cost of loading fp registers
175 in SFmode, DFmode and XFmode */
176 {8, 8, 8}, /* cost of loading integer registers */
177 2, /* cost of moving MMX register */
178 {4, 8}, /* cost of loading MMX registers
179 in SImode and DImode */
180 {4, 8}, /* cost of storing MMX registers
181 in SImode and DImode */
182 2, /* cost of moving SSE register */
183 {4, 8, 16}, /* cost of loading SSE registers
184 in SImode, DImode and TImode */
185 {4, 8, 16}, /* cost of storing SSE registers
186 in SImode, DImode and TImode */
187 3, /* MMX or SSE register to integer */
188 0, /* size of prefetch block */
189 0, /* number of parallel prefetches */
190 1, /* Branch cost */
191 8, /* cost of FADD and FSUB insns. */
192 16, /* cost of FMUL instruction. */
193 73, /* cost of FDIV instruction. */
194 3, /* cost of FABS instruction. */
195 3, /* cost of FCHS instruction. */
196 83, /* cost of FSQRT instruction. */
199 static const
200 struct processor_costs pentium_cost = {
201 1, /* cost of an add instruction */
202 1, /* cost of a lea instruction */
203 4, /* variable shift costs */
204 1, /* constant shift costs */
205 {11, 11, 11, 11, 11}, /* cost of starting a multiply */
206 0, /* cost of multiply per each bit set */
207 {25, 25, 25, 25, 25}, /* cost of a divide/mod */
208 3, /* cost of movsx */
209 2, /* cost of movzx */
210 8, /* "large" insn */
211 6, /* MOVE_RATIO */
212 6, /* cost for loading QImode using movzbl */
213 {2, 4, 2}, /* cost of loading integer registers
214 in QImode, HImode and SImode.
215 Relative to reg-reg move (2). */
216 {2, 4, 2}, /* cost of storing integer registers */
217 2, /* cost of reg,reg fld/fst */
218 {2, 2, 6}, /* cost of loading fp registers
219 in SFmode, DFmode and XFmode */
220 {4, 4, 6}, /* cost of loading integer registers */
221 8, /* cost of moving MMX register */
222 {8, 8}, /* cost of loading MMX registers
223 in SImode and DImode */
224 {8, 8}, /* cost of storing MMX registers
225 in SImode and DImode */
226 2, /* cost of moving SSE register */
227 {4, 8, 16}, /* cost of loading SSE registers
228 in SImode, DImode and TImode */
229 {4, 8, 16}, /* cost of storing SSE registers
230 in SImode, DImode and TImode */
231 3, /* MMX or SSE register to integer */
232 0, /* size of prefetch block */
233 0, /* number of parallel prefetches */
234 2, /* Branch cost */
235 3, /* cost of FADD and FSUB insns. */
236 3, /* cost of FMUL instruction. */
237 39, /* cost of FDIV instruction. */
238 1, /* cost of FABS instruction. */
239 1, /* cost of FCHS instruction. */
240 70, /* cost of FSQRT instruction. */
243 static const
244 struct processor_costs pentiumpro_cost = {
245 1, /* cost of an add instruction */
246 1, /* cost of a lea instruction */
247 1, /* variable shift costs */
248 1, /* constant shift costs */
249 {4, 4, 4, 4, 4}, /* cost of starting a multiply */
250 0, /* cost of multiply per each bit set */
251 {17, 17, 17, 17, 17}, /* cost of a divide/mod */
252 1, /* cost of movsx */
253 1, /* cost of movzx */
254 8, /* "large" insn */
255 6, /* MOVE_RATIO */
256 2, /* cost for loading QImode using movzbl */
257 {4, 4, 4}, /* cost of loading integer registers
258 in QImode, HImode and SImode.
259 Relative to reg-reg move (2). */
260 {2, 2, 2}, /* cost of storing integer registers */
261 2, /* cost of reg,reg fld/fst */
262 {2, 2, 6}, /* cost of loading fp registers
263 in SFmode, DFmode and XFmode */
264 {4, 4, 6}, /* cost of loading integer registers */
265 2, /* cost of moving MMX register */
266 {2, 2}, /* cost of loading MMX registers
267 in SImode and DImode */
268 {2, 2}, /* cost of storing MMX registers
269 in SImode and DImode */
270 2, /* cost of moving SSE register */
271 {2, 2, 8}, /* cost of loading SSE registers
272 in SImode, DImode and TImode */
273 {2, 2, 8}, /* cost of storing SSE registers
274 in SImode, DImode and TImode */
275 3, /* MMX or SSE register to integer */
276 32, /* size of prefetch block */
277 6, /* number of parallel prefetches */
278 2, /* Branch cost */
279 3, /* cost of FADD and FSUB insns. */
280 5, /* cost of FMUL instruction. */
281 56, /* cost of FDIV instruction. */
282 2, /* cost of FABS instruction. */
283 2, /* cost of FCHS instruction. */
284 56, /* cost of FSQRT instruction. */
287 static const
288 struct processor_costs k6_cost = {
289 1, /* cost of an add instruction */
290 2, /* cost of a lea instruction */
291 1, /* variable shift costs */
292 1, /* constant shift costs */
293 {3, 3, 3, 3, 3}, /* cost of starting a multiply */
294 0, /* cost of multiply per each bit set */
295 {18, 18, 18, 18, 18}, /* cost of a divide/mod */
296 2, /* cost of movsx */
297 2, /* cost of movzx */
298 8, /* "large" insn */
299 4, /* MOVE_RATIO */
300 3, /* cost for loading QImode using movzbl */
301 {4, 5, 4}, /* cost of loading integer registers
302 in QImode, HImode and SImode.
303 Relative to reg-reg move (2). */
304 {2, 3, 2}, /* cost of storing integer registers */
305 4, /* cost of reg,reg fld/fst */
306 {6, 6, 6}, /* cost of loading fp registers
307 in SFmode, DFmode and XFmode */
308 {4, 4, 4}, /* cost of loading integer registers */
309 2, /* cost of moving MMX register */
310 {2, 2}, /* cost of loading MMX registers
311 in SImode and DImode */
312 {2, 2}, /* cost of storing MMX registers
313 in SImode and DImode */
314 2, /* cost of moving SSE register */
315 {2, 2, 8}, /* cost of loading SSE registers
316 in SImode, DImode and TImode */
317 {2, 2, 8}, /* cost of storing SSE registers
318 in SImode, DImode and TImode */
319 6, /* MMX or SSE register to integer */
320 32, /* size of prefetch block */
321 1, /* number of parallel prefetches */
322 1, /* Branch cost */
323 2, /* cost of FADD and FSUB insns. */
324 2, /* cost of FMUL instruction. */
325 56, /* cost of FDIV instruction. */
326 2, /* cost of FABS instruction. */
327 2, /* cost of FCHS instruction. */
328 56, /* cost of FSQRT instruction. */
331 static const
332 struct processor_costs athlon_cost = {
333 1, /* cost of an add instruction */
334 2, /* cost of a lea instruction */
335 1, /* variable shift costs */
336 1, /* constant shift costs */
337 {5, 5, 5, 5, 5}, /* cost of starting a multiply */
338 0, /* cost of multiply per each bit set */
339 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
340 1, /* cost of movsx */
341 1, /* cost of movzx */
342 8, /* "large" insn */
343 9, /* MOVE_RATIO */
344 4, /* cost for loading QImode using movzbl */
345 {3, 4, 3}, /* cost of loading integer registers
346 in QImode, HImode and SImode.
347 Relative to reg-reg move (2). */
348 {3, 4, 3}, /* cost of storing integer registers */
349 4, /* cost of reg,reg fld/fst */
350 {4, 4, 12}, /* cost of loading fp registers
351 in SFmode, DFmode and XFmode */
352 {6, 6, 8}, /* cost of loading integer registers */
353 2, /* cost of moving MMX register */
354 {4, 4}, /* cost of loading MMX registers
355 in SImode and DImode */
356 {4, 4}, /* cost of storing MMX registers
357 in SImode and DImode */
358 2, /* cost of moving SSE register */
359 {4, 4, 6}, /* cost of loading SSE registers
360 in SImode, DImode and TImode */
361 {4, 4, 5}, /* cost of storing SSE registers
362 in SImode, DImode and TImode */
363 5, /* MMX or SSE register to integer */
364 64, /* size of prefetch block */
365 6, /* number of parallel prefetches */
366 5, /* Branch cost */
367 4, /* cost of FADD and FSUB insns. */
368 4, /* cost of FMUL instruction. */
369 24, /* cost of FDIV instruction. */
370 2, /* cost of FABS instruction. */
371 2, /* cost of FCHS instruction. */
372 35, /* cost of FSQRT instruction. */
375 static const
376 struct processor_costs k8_cost = {
377 1, /* cost of an add instruction */
378 2, /* cost of a lea instruction */
379 1, /* variable shift costs */
380 1, /* constant shift costs */
381 {3, 4, 3, 4, 5}, /* cost of starting a multiply */
382 0, /* cost of multiply per each bit set */
383 {18, 26, 42, 74, 74}, /* cost of a divide/mod */
384 1, /* cost of movsx */
385 1, /* cost of movzx */
386 8, /* "large" insn */
387 9, /* MOVE_RATIO */
388 4, /* cost for loading QImode using movzbl */
389 {3, 4, 3}, /* cost of loading integer registers
390 in QImode, HImode and SImode.
391 Relative to reg-reg move (2). */
392 {3, 4, 3}, /* cost of storing integer registers */
393 4, /* cost of reg,reg fld/fst */
394 {4, 4, 12}, /* cost of loading fp registers
395 in SFmode, DFmode and XFmode */
396 {6, 6, 8}, /* cost of loading integer registers */
397 2, /* cost of moving MMX register */
398 {3, 3}, /* cost of loading MMX registers
399 in SImode and DImode */
400 {4, 4}, /* cost of storing MMX registers
401 in SImode and DImode */
402 2, /* cost of moving SSE register */
403 {4, 3, 6}, /* cost of loading SSE registers
404 in SImode, DImode and TImode */
405 {4, 4, 5}, /* cost of storing SSE registers
406 in SImode, DImode and TImode */
407 5, /* MMX or SSE register to integer */
408 64, /* size of prefetch block */
409 6, /* number of parallel prefetches */
410 5, /* Branch cost */
411 4, /* cost of FADD and FSUB insns. */
412 4, /* cost of FMUL instruction. */
413 19, /* cost of FDIV instruction. */
414 2, /* cost of FABS instruction. */
415 2, /* cost of FCHS instruction. */
416 35, /* cost of FSQRT instruction. */
419 static const
420 struct processor_costs pentium4_cost = {
421 1, /* cost of an add instruction */
422 3, /* cost of a lea instruction */
423 4, /* variable shift costs */
424 4, /* constant shift costs */
425 {15, 15, 15, 15, 15}, /* cost of starting a multiply */
426 0, /* cost of multiply per each bit set */
427 {56, 56, 56, 56, 56}, /* cost of a divide/mod */
428 1, /* cost of movsx */
429 1, /* cost of movzx */
430 16, /* "large" insn */
431 6, /* MOVE_RATIO */
432 2, /* cost for loading QImode using movzbl */
433 {4, 5, 4}, /* cost of loading integer registers
434 in QImode, HImode and SImode.
435 Relative to reg-reg move (2). */
436 {2, 3, 2}, /* cost of storing integer registers */
437 2, /* cost of reg,reg fld/fst */
438 {2, 2, 6}, /* cost of loading fp registers
439 in SFmode, DFmode and XFmode */
440 {4, 4, 6}, /* cost of loading integer registers */
441 2, /* cost of moving MMX register */
442 {2, 2}, /* cost of loading MMX registers
443 in SImode and DImode */
444 {2, 2}, /* cost of storing MMX registers
445 in SImode and DImode */
446 12, /* cost of moving SSE register */
447 {12, 12, 12}, /* cost of loading SSE registers
448 in SImode, DImode and TImode */
449 {2, 2, 8}, /* cost of storing SSE registers
450 in SImode, DImode and TImode */
451 10, /* MMX or SSE register to integer */
452 64, /* size of prefetch block */
453 6, /* number of parallel prefetches */
454 2, /* Branch cost */
455 5, /* cost of FADD and FSUB insns. */
456 7, /* cost of FMUL instruction. */
457 43, /* cost of FDIV instruction. */
458 2, /* cost of FABS instruction. */
459 2, /* cost of FCHS instruction. */
460 43, /* cost of FSQRT instruction. */
463 static const
464 struct processor_costs nocona_cost = {
465 1, /* cost of an add instruction */
466 1, /* cost of a lea instruction */
467 1, /* variable shift costs */
468 1, /* constant shift costs */
469 {10, 10, 10, 10, 10}, /* cost of starting a multiply */
470 0, /* cost of multiply per each bit set */
471 {66, 66, 66, 66, 66}, /* cost of a divide/mod */
472 1, /* cost of movsx */
473 1, /* cost of movzx */
474 16, /* "large" insn */
475 17, /* MOVE_RATIO */
476 4, /* cost for loading QImode using movzbl */
477 {4, 4, 4}, /* cost of loading integer registers
478 in QImode, HImode and SImode.
479 Relative to reg-reg move (2). */
480 {4, 4, 4}, /* cost of storing integer registers */
481 3, /* cost of reg,reg fld/fst */
482 {12, 12, 12}, /* cost of loading fp registers
483 in SFmode, DFmode and XFmode */
484 {4, 4, 4}, /* cost of loading integer registers */
485 6, /* cost of moving MMX register */
486 {12, 12}, /* cost of loading MMX registers
487 in SImode and DImode */
488 {12, 12}, /* cost of storing MMX registers
489 in SImode and DImode */
490 6, /* cost of moving SSE register */
491 {12, 12, 12}, /* cost of loading SSE registers
492 in SImode, DImode and TImode */
493 {12, 12, 12}, /* cost of storing SSE registers
494 in SImode, DImode and TImode */
495 8, /* MMX or SSE register to integer */
496 128, /* size of prefetch block */
497 8, /* number of parallel prefetches */
498 1, /* Branch cost */
499 6, /* cost of FADD and FSUB insns. */
500 8, /* cost of FMUL instruction. */
501 40, /* cost of FDIV instruction. */
502 3, /* cost of FABS instruction. */
503 3, /* cost of FCHS instruction. */
504 44, /* cost of FSQRT instruction. */
507 const struct processor_costs *ix86_cost = &pentium_cost;
509 /* Processor feature/optimization bitmasks. */
510 #define m_386 (1<<PROCESSOR_I386)
511 #define m_486 (1<<PROCESSOR_I486)
512 #define m_PENT (1<<PROCESSOR_PENTIUM)
513 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
514 #define m_K6 (1<<PROCESSOR_K6)
515 #define m_ATHLON (1<<PROCESSOR_ATHLON)
516 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
517 #define m_K8 (1<<PROCESSOR_K8)
518 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
519 #define m_NOCONA (1<<PROCESSOR_NOCONA)
521 const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
522 const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
523 const int x86_zero_extend_with_and = m_486 | m_PENT;
524 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
525 const int x86_double_with_add = ~m_386;
526 const int x86_use_bit_test = m_386;
527 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
528 const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
529 const int x86_fisttp = m_NOCONA;
530 const int x86_3dnow_a = m_ATHLON_K8;
531 const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
532 /* Branch hints were put in P4 based on simulation result. But
533 after P4 was made, no performance benefit was observed with
534 branch hints. It also increases the code size. As the result,
535 icc never generates branch hints. */
536 const int x86_branch_hints = 0;
537 const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
538 const int x86_partial_reg_stall = m_PPRO;
539 const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
540 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
541 const int x86_use_mov0 = m_K6;
542 const int x86_use_cltd = ~(m_PENT | m_K6);
543 const int x86_read_modify_write = ~m_PENT;
544 const int x86_read_modify = ~(m_PENT | m_PPRO);
545 const int x86_split_long_moves = m_PPRO;
546 const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
547 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
548 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
549 const int x86_qimode_math = ~(0);
550 const int x86_promote_qi_regs = 0;
551 const int x86_himode_math = ~(m_PPRO);
552 const int x86_promote_hi_regs = m_PPRO;
553 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
554 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
555 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
556 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
557 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
558 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
559 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
561 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
562 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
563 const int x86_decompose_lea = m_PENT4 | m_NOCONA;
564 const int x86_shift1 = ~m_486;
565 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
566 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
567 /* Set for machines where the type and dependencies are resolved on SSE
568 register parts instead of whole registers, so we may maintain just
569 lower part of scalar values in proper format leaving the upper part
570 undefined. */
571 const int x86_sse_split_regs = m_ATHLON_K8;
572 const int x86_sse_typeless_stores = m_ATHLON_K8;
573 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
574 const int x86_use_ffreep = m_ATHLON_K8;
575 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
577 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
578 integer data in xmm registers. Which results in pretty abysmal code. */
579 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
581 const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
582 /* Some CPU cores are not able to predict more than 4 branch instructions in
583 the 16 byte window. */
584 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
585 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
586 const int x86_use_bt = m_ATHLON_K8;
587 /* Compare and exchange was added for 80486. */
588 const int x86_cmpxchg = ~m_386;
589 /* Exchange and add was added for 80486. */
590 const int x86_xadd = ~m_386;
592 /* In case the average insn count for single function invocation is
593 lower than this constant, emit fast (but longer) prologue and
594 epilogue code. */
595 #define FAST_PROLOGUE_INSN_COUNT 20
597 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
598 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
599 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
600 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
602 /* Array of the smallest class containing reg number REGNO, indexed by
603 REGNO. Used by REGNO_REG_CLASS in i386.h. */
605 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
607 /* ax, dx, cx, bx */
608 AREG, DREG, CREG, BREG,
609 /* si, di, bp, sp */
610 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
611 /* FP registers */
612 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
613 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
614 /* arg pointer */
615 NON_Q_REGS,
616 /* flags, fpsr, dirflag, frame */
617 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
618 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
619 SSE_REGS, SSE_REGS,
620 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
621 MMX_REGS, MMX_REGS,
622 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
623 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
624 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
625 SSE_REGS, SSE_REGS,
628 /* The "default" register map used in 32bit mode. */
630 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
632 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
633 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
634 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
635 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
636 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
637 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
638 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
641 static int const x86_64_int_parameter_registers[6] =
643 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
644 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
647 static int const x86_64_int_return_registers[4] =
649 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
652 /* The "default" register map used in 64bit mode. */
653 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
655 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
656 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
657 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
658 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
659 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
660 8,9,10,11,12,13,14,15, /* extended integer registers */
661 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
664 /* Define the register numbers to be used in Dwarf debugging information.
665 The SVR4 reference port C compiler uses the following register numbers
666 in its Dwarf output code:
667 0 for %eax (gcc regno = 0)
668 1 for %ecx (gcc regno = 2)
669 2 for %edx (gcc regno = 1)
670 3 for %ebx (gcc regno = 3)
671 4 for %esp (gcc regno = 7)
672 5 for %ebp (gcc regno = 6)
673 6 for %esi (gcc regno = 4)
674 7 for %edi (gcc regno = 5)
675 The following three DWARF register numbers are never generated by
676 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
677 believes these numbers have these meanings.
678 8 for %eip (no gcc equivalent)
679 9 for %eflags (gcc regno = 17)
680 10 for %trapno (no gcc equivalent)
681 It is not at all clear how we should number the FP stack registers
682 for the x86 architecture. If the version of SDB on x86/svr4 were
683 a bit less brain dead with respect to floating-point then we would
684 have a precedent to follow with respect to DWARF register numbers
685 for x86 FP registers, but the SDB on x86/svr4 is so completely
686 broken with respect to FP registers that it is hardly worth thinking
687 of it as something to strive for compatibility with.
688 The version of x86/svr4 SDB I have at the moment does (partially)
689 seem to believe that DWARF register number 11 is associated with
690 the x86 register %st(0), but that's about all. Higher DWARF
691 register numbers don't seem to be associated with anything in
692 particular, and even for DWARF regno 11, SDB only seems to under-
693 stand that it should say that a variable lives in %st(0) (when
694 asked via an `=' command) if we said it was in DWARF regno 11,
695 but SDB still prints garbage when asked for the value of the
696 variable in question (via a `/' command).
697 (Also note that the labels SDB prints for various FP stack regs
698 when doing an `x' command are all wrong.)
699 Note that these problems generally don't affect the native SVR4
700 C compiler because it doesn't allow the use of -O with -g and
701 because when it is *not* optimizing, it allocates a memory
702 location for each floating-point variable, and the memory
703 location is what gets described in the DWARF AT_location
704 attribute for the variable in question.
705 Regardless of the severe mental illness of the x86/svr4 SDB, we
706 do something sensible here and we use the following DWARF
707 register numbers. Note that these are all stack-top-relative
708 numbers.
709 11 for %st(0) (gcc regno = 8)
710 12 for %st(1) (gcc regno = 9)
711 13 for %st(2) (gcc regno = 10)
712 14 for %st(3) (gcc regno = 11)
713 15 for %st(4) (gcc regno = 12)
714 16 for %st(5) (gcc regno = 13)
715 17 for %st(6) (gcc regno = 14)
716 18 for %st(7) (gcc regno = 15)
718 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
720 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
721 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
722 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
723 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
724 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
725 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
726 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
729 /* Test and compare insns in i386.md store the information needed to
730 generate branch and scc insns here. */
732 rtx ix86_compare_op0 = NULL_RTX;
733 rtx ix86_compare_op1 = NULL_RTX;
734 rtx ix86_compare_emitted = NULL_RTX;
736 /* Size of the register save area. */
737 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
739 /* Define the structure for the machine field in struct function. */
741 struct stack_local_entry GTY(())
743 unsigned short mode;
744 unsigned short n;
745 rtx rtl;
746 struct stack_local_entry *next;
749 /* Structure describing stack frame layout.
750 Stack grows downward:
752 [arguments]
753 <- ARG_POINTER
754 saved pc
756 saved frame pointer if frame_pointer_needed
757 <- HARD_FRAME_POINTER
758 [saved regs]
760 [padding1] \
762 [va_arg registers] (
763 > to_allocate <- FRAME_POINTER
764 [frame] (
766 [padding2] /
768 struct ix86_frame
770 int nregs;
771 int padding1;
772 int va_arg_size;
773 HOST_WIDE_INT frame;
774 int padding2;
775 int outgoing_arguments_size;
776 int red_zone_size;
778 HOST_WIDE_INT to_allocate;
779 /* The offsets relative to ARG_POINTER. */
780 HOST_WIDE_INT frame_pointer_offset;
781 HOST_WIDE_INT hard_frame_pointer_offset;
782 HOST_WIDE_INT stack_pointer_offset;
784 /* When save_regs_using_mov is set, emit prologue using
785 move instead of push instructions. */
786 bool save_regs_using_mov;
789 /* Code model option. */
790 enum cmodel ix86_cmodel;
791 /* Asm dialect. */
792 enum asm_dialect ix86_asm_dialect = ASM_ATT;
793 /* TLS dialext. */
794 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
796 /* Which unit we are generating floating point math for. */
797 enum fpmath_unit ix86_fpmath;
799 /* Which cpu are we scheduling for. */
800 enum processor_type ix86_tune;
801 /* Which instruction set architecture to use. */
802 enum processor_type ix86_arch;
804 /* true if sse prefetch instruction is not NOOP. */
805 int x86_prefetch_sse;
807 /* ix86_regparm_string as a number */
808 static int ix86_regparm;
810 /* Preferred alignment for stack boundary in bits. */
811 unsigned int ix86_preferred_stack_boundary;
813 /* Values 1-5: see jump.c */
814 int ix86_branch_cost;
816 /* Variables which are this size or smaller are put in the data/bss
817 or ldata/lbss sections. */
819 int ix86_section_threshold = 65536;
821 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
822 char internal_label_prefix[16];
823 int internal_label_prefix_len;
825 static bool ix86_handle_option (size_t, const char *, int);
826 static void output_pic_addr_const (FILE *, rtx, int);
827 static void put_condition_code (enum rtx_code, enum machine_mode,
828 int, int, FILE *);
829 static const char *get_some_local_dynamic_name (void);
830 static int get_some_local_dynamic_name_1 (rtx *, void *);
831 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
832 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
833 rtx *);
834 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
835 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
836 enum machine_mode);
837 static rtx get_thread_pointer (int);
838 static rtx legitimize_tls_address (rtx, enum tls_model, int);
839 static void get_pc_thunk_name (char [32], unsigned int);
840 static rtx gen_push (rtx);
841 static int ix86_flags_dependant (rtx, rtx, enum attr_type);
842 static int ix86_agi_dependant (rtx, rtx, enum attr_type);
843 static struct machine_function * ix86_init_machine_status (void);
844 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
845 static int ix86_nsaved_regs (void);
846 static void ix86_emit_save_regs (void);
847 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
848 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
849 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
850 static HOST_WIDE_INT ix86_GOT_alias_set (void);
851 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
852 static rtx ix86_expand_aligntest (rtx, int);
853 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
854 static int ix86_issue_rate (void);
855 static int ix86_adjust_cost (rtx, rtx, rtx, int);
856 static int ia32_multipass_dfa_lookahead (void);
857 static void ix86_init_mmx_sse_builtins (void);
858 static rtx x86_this_parameter (tree);
859 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
860 HOST_WIDE_INT, tree);
861 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
862 static void x86_file_start (void);
863 static void ix86_reorg (void);
864 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
865 static tree ix86_build_builtin_va_list (void);
866 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
867 tree, int *, int);
868 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
869 static bool ix86_vector_mode_supported_p (enum machine_mode);
871 static int ix86_address_cost (rtx);
872 static bool ix86_cannot_force_const_mem (rtx);
873 static rtx ix86_delegitimize_address (rtx);
875 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
877 struct builtin_description;
878 static rtx ix86_expand_sse_comi (const struct builtin_description *,
879 tree, rtx);
880 static rtx ix86_expand_sse_compare (const struct builtin_description *,
881 tree, rtx);
882 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
883 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
884 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
885 static rtx ix86_expand_store_builtin (enum insn_code, tree);
886 static rtx safe_vector_operand (rtx, enum machine_mode);
887 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
888 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
889 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
890 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
891 static int ix86_fp_comparison_cost (enum rtx_code code);
892 static unsigned int ix86_select_alt_pic_regnum (void);
893 static int ix86_save_reg (unsigned int, int);
894 static void ix86_compute_frame_layout (struct ix86_frame *);
895 static int ix86_comp_type_attributes (tree, tree);
896 static int ix86_function_regparm (tree, tree);
897 const struct attribute_spec ix86_attribute_table[];
898 static bool ix86_function_ok_for_sibcall (tree, tree);
899 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
900 static int ix86_value_regno (enum machine_mode, tree, tree);
901 static bool contains_128bit_aligned_vector_p (tree);
902 static rtx ix86_struct_value_rtx (tree, int);
903 static bool ix86_ms_bitfield_layout_p (tree);
904 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
905 static int extended_reg_mentioned_1 (rtx *, void *);
906 static bool ix86_rtx_costs (rtx, int, int, int *);
907 static int min_insn_size (rtx);
908 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
909 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
910 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
911 tree, bool);
912 static void ix86_init_builtins (void);
913 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
914 static const char *ix86_mangle_fundamental_type (tree);
915 static tree ix86_stack_protect_fail (void);
916 static rtx ix86_internal_arg_pointer (void);
917 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
919 /* This function is only used on Solaris. */
920 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
921 ATTRIBUTE_UNUSED;
923 /* Register class used for passing given 64bit part of the argument.
924 These represent classes as documented by the PS ABI, with the exception
925 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
926 use SF or DFmode move instead of DImode to avoid reformatting penalties.
928 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
929 whenever possible (upper half does contain padding).
931 enum x86_64_reg_class
933 X86_64_NO_CLASS,
934 X86_64_INTEGER_CLASS,
935 X86_64_INTEGERSI_CLASS,
936 X86_64_SSE_CLASS,
937 X86_64_SSESF_CLASS,
938 X86_64_SSEDF_CLASS,
939 X86_64_SSEUP_CLASS,
940 X86_64_X87_CLASS,
941 X86_64_X87UP_CLASS,
942 X86_64_COMPLEX_X87_CLASS,
943 X86_64_MEMORY_CLASS
945 static const char * const x86_64_reg_class_name[] = {
946 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
947 "sseup", "x87", "x87up", "cplx87", "no"
950 #define MAX_CLASSES 4
952 /* Table of constants used by fldpi, fldln2, etc.... */
953 static REAL_VALUE_TYPE ext_80387_constants_table [5];
954 static bool ext_80387_constants_init = 0;
955 static void init_ext_80387_constants (void);
956 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
957 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
958 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
959 static void x86_64_elf_select_section (tree decl, int reloc,
960 unsigned HOST_WIDE_INT align)
961 ATTRIBUTE_UNUSED;
963 /* Initialize the GCC target structure. */
964 #undef TARGET_ATTRIBUTE_TABLE
965 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
966 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
967 # undef TARGET_MERGE_DECL_ATTRIBUTES
968 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
969 #endif
971 #undef TARGET_COMP_TYPE_ATTRIBUTES
972 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
974 #undef TARGET_INIT_BUILTINS
975 #define TARGET_INIT_BUILTINS ix86_init_builtins
976 #undef TARGET_EXPAND_BUILTIN
977 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
979 #undef TARGET_ASM_FUNCTION_EPILOGUE
980 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
982 #undef TARGET_ENCODE_SECTION_INFO
983 #ifndef SUBTARGET_ENCODE_SECTION_INFO
984 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
985 #else
986 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
987 #endif
989 #undef TARGET_ASM_OPEN_PAREN
990 #define TARGET_ASM_OPEN_PAREN ""
991 #undef TARGET_ASM_CLOSE_PAREN
992 #define TARGET_ASM_CLOSE_PAREN ""
994 #undef TARGET_ASM_ALIGNED_HI_OP
995 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
996 #undef TARGET_ASM_ALIGNED_SI_OP
997 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
998 #ifdef ASM_QUAD
999 #undef TARGET_ASM_ALIGNED_DI_OP
1000 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1001 #endif
1003 #undef TARGET_ASM_UNALIGNED_HI_OP
1004 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1005 #undef TARGET_ASM_UNALIGNED_SI_OP
1006 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1007 #undef TARGET_ASM_UNALIGNED_DI_OP
1008 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1010 #undef TARGET_SCHED_ADJUST_COST
1011 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1012 #undef TARGET_SCHED_ISSUE_RATE
1013 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1014 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1015 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1016 ia32_multipass_dfa_lookahead
1018 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1019 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1021 #ifdef HAVE_AS_TLS
1022 #undef TARGET_HAVE_TLS
1023 #define TARGET_HAVE_TLS true
1024 #endif
1025 #undef TARGET_CANNOT_FORCE_CONST_MEM
1026 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1028 #undef TARGET_DELEGITIMIZE_ADDRESS
1029 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1031 #undef TARGET_MS_BITFIELD_LAYOUT_P
1032 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1034 #if TARGET_MACHO
1035 #undef TARGET_BINDS_LOCAL_P
1036 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1037 #endif
1039 #undef TARGET_ASM_OUTPUT_MI_THUNK
1040 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1041 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1042 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1044 #undef TARGET_ASM_FILE_START
1045 #define TARGET_ASM_FILE_START x86_file_start
1047 #undef TARGET_DEFAULT_TARGET_FLAGS
1048 #define TARGET_DEFAULT_TARGET_FLAGS \
1049 (TARGET_DEFAULT \
1050 | TARGET_64BIT_DEFAULT \
1051 | TARGET_SUBTARGET_DEFAULT \
1052 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1054 #undef TARGET_HANDLE_OPTION
1055 #define TARGET_HANDLE_OPTION ix86_handle_option
1057 #undef TARGET_RTX_COSTS
1058 #define TARGET_RTX_COSTS ix86_rtx_costs
1059 #undef TARGET_ADDRESS_COST
1060 #define TARGET_ADDRESS_COST ix86_address_cost
1062 #undef TARGET_FIXED_CONDITION_CODE_REGS
1063 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1064 #undef TARGET_CC_MODES_COMPATIBLE
1065 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1067 #undef TARGET_MACHINE_DEPENDENT_REORG
1068 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1070 #undef TARGET_BUILD_BUILTIN_VA_LIST
1071 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1073 #undef TARGET_MD_ASM_CLOBBERS
1074 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1076 #undef TARGET_PROMOTE_PROTOTYPES
1077 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1078 #undef TARGET_STRUCT_VALUE_RTX
1079 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1080 #undef TARGET_SETUP_INCOMING_VARARGS
1081 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1082 #undef TARGET_MUST_PASS_IN_STACK
1083 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1084 #undef TARGET_PASS_BY_REFERENCE
1085 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1086 #undef TARGET_INTERNAL_ARG_POINTER
1087 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1088 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1089 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1091 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1092 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1094 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1095 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1097 #ifdef HAVE_AS_TLS
1098 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1099 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1100 #endif
1102 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1103 #undef TARGET_INSERT_ATTRIBUTES
1104 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1105 #endif
1107 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1108 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1110 #undef TARGET_STACK_PROTECT_FAIL
1111 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1113 #undef TARGET_FUNCTION_VALUE
1114 #define TARGET_FUNCTION_VALUE ix86_function_value
1116 struct gcc_target targetm = TARGET_INITIALIZER;
1119 /* The svr4 ABI for the i386 says that records and unions are returned
1120 in memory. */
1121 #ifndef DEFAULT_PCC_STRUCT_RETURN
1122 #define DEFAULT_PCC_STRUCT_RETURN 1
1123 #endif
1125 /* Implement TARGET_HANDLE_OPTION. */
1127 static bool
1128 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1130 switch (code)
1132 case OPT_m3dnow:
1133 if (!value)
1135 target_flags &= ~MASK_3DNOW_A;
1136 target_flags_explicit |= MASK_3DNOW_A;
1138 return true;
1140 case OPT_mmmx:
1141 if (!value)
1143 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1144 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1146 return true;
1148 case OPT_msse:
1149 if (!value)
1151 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1152 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1154 return true;
1156 case OPT_msse2:
1157 if (!value)
1159 target_flags &= ~MASK_SSE3;
1160 target_flags_explicit |= MASK_SSE3;
1162 return true;
1164 default:
1165 return true;
1169 /* Sometimes certain combinations of command options do not make
1170 sense on a particular target machine. You can define a macro
1171 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1172 defined, is executed once just after all the command options have
1173 been parsed.
1175 Don't use this macro to turn on various extra optimizations for
1176 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1178 void
1179 override_options (void)
1181 int i;
1182 int ix86_tune_defaulted = 0;
1184 /* Comes from final.c -- no real reason to change it. */
1185 #define MAX_CODE_ALIGN 16
1187 static struct ptt
1189 const struct processor_costs *cost; /* Processor costs */
1190 const int target_enable; /* Target flags to enable. */
1191 const int target_disable; /* Target flags to disable. */
1192 const int align_loop; /* Default alignments. */
1193 const int align_loop_max_skip;
1194 const int align_jump;
1195 const int align_jump_max_skip;
1196 const int align_func;
1198 const processor_target_table[PROCESSOR_max] =
1200 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1201 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1202 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1203 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1204 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1205 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1206 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1207 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1208 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1211 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1212 static struct pta
1214 const char *const name; /* processor name or nickname. */
1215 const enum processor_type processor;
1216 const enum pta_flags
1218 PTA_SSE = 1,
1219 PTA_SSE2 = 2,
1220 PTA_SSE3 = 4,
1221 PTA_MMX = 8,
1222 PTA_PREFETCH_SSE = 16,
1223 PTA_3DNOW = 32,
1224 PTA_3DNOW_A = 64,
1225 PTA_64BIT = 128
1226 } flags;
1228 const processor_alias_table[] =
1230 {"i386", PROCESSOR_I386, 0},
1231 {"i486", PROCESSOR_I486, 0},
1232 {"i586", PROCESSOR_PENTIUM, 0},
1233 {"pentium", PROCESSOR_PENTIUM, 0},
1234 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1235 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1236 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1237 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1238 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1239 {"i686", PROCESSOR_PENTIUMPRO, 0},
1240 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1241 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1242 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1243 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1244 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1245 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1246 | PTA_MMX | PTA_PREFETCH_SSE},
1247 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1248 | PTA_MMX | PTA_PREFETCH_SSE},
1249 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1250 | PTA_MMX | PTA_PREFETCH_SSE},
1251 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1252 | PTA_MMX | PTA_PREFETCH_SSE},
1253 {"k6", PROCESSOR_K6, PTA_MMX},
1254 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1255 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1256 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1257 | PTA_3DNOW_A},
1258 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1259 | PTA_3DNOW | PTA_3DNOW_A},
1260 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1261 | PTA_3DNOW_A | PTA_SSE},
1262 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1263 | PTA_3DNOW_A | PTA_SSE},
1264 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1265 | PTA_3DNOW_A | PTA_SSE},
1266 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1267 | PTA_SSE | PTA_SSE2 },
1268 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1269 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1270 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1271 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1272 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1273 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1274 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1275 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1278 int const pta_size = ARRAY_SIZE (processor_alias_table);
1280 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1281 SUBTARGET_OVERRIDE_OPTIONS;
1282 #endif
1284 /* Set the default values for switches whose default depends on TARGET_64BIT
1285 in case they weren't overwritten by command line options. */
1286 if (TARGET_64BIT)
1288 if (flag_omit_frame_pointer == 2)
1289 flag_omit_frame_pointer = 1;
1290 if (flag_asynchronous_unwind_tables == 2)
1291 flag_asynchronous_unwind_tables = 1;
1292 if (flag_pcc_struct_return == 2)
1293 flag_pcc_struct_return = 0;
1295 else
1297 if (flag_omit_frame_pointer == 2)
1298 flag_omit_frame_pointer = 0;
1299 if (flag_asynchronous_unwind_tables == 2)
1300 flag_asynchronous_unwind_tables = 0;
1301 if (flag_pcc_struct_return == 2)
1302 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1305 if (!ix86_tune_string && ix86_arch_string)
1306 ix86_tune_string = ix86_arch_string;
1307 if (!ix86_tune_string)
1309 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1310 ix86_tune_defaulted = 1;
1312 if (!ix86_arch_string)
1313 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1315 if (ix86_cmodel_string != 0)
1317 if (!strcmp (ix86_cmodel_string, "small"))
1318 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1319 else if (!strcmp (ix86_cmodel_string, "medium"))
1320 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1321 else if (flag_pic)
1322 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1323 else if (!strcmp (ix86_cmodel_string, "32"))
1324 ix86_cmodel = CM_32;
1325 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1326 ix86_cmodel = CM_KERNEL;
1327 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1328 ix86_cmodel = CM_LARGE;
1329 else
1330 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1332 else
1334 ix86_cmodel = CM_32;
1335 if (TARGET_64BIT)
1336 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1338 if (ix86_asm_string != 0)
1340 if (! TARGET_MACHO
1341 && !strcmp (ix86_asm_string, "intel"))
1342 ix86_asm_dialect = ASM_INTEL;
1343 else if (!strcmp (ix86_asm_string, "att"))
1344 ix86_asm_dialect = ASM_ATT;
1345 else
1346 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1348 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1349 error ("code model %qs not supported in the %s bit mode",
1350 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1351 if (ix86_cmodel == CM_LARGE)
1352 sorry ("code model %<large%> not supported yet");
1353 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1354 sorry ("%i-bit mode not compiled in",
1355 (target_flags & MASK_64BIT) ? 64 : 32);
1357 for (i = 0; i < pta_size; i++)
1358 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1360 ix86_arch = processor_alias_table[i].processor;
1361 /* Default cpu tuning to the architecture. */
1362 ix86_tune = ix86_arch;
1363 if (processor_alias_table[i].flags & PTA_MMX
1364 && !(target_flags_explicit & MASK_MMX))
1365 target_flags |= MASK_MMX;
1366 if (processor_alias_table[i].flags & PTA_3DNOW
1367 && !(target_flags_explicit & MASK_3DNOW))
1368 target_flags |= MASK_3DNOW;
1369 if (processor_alias_table[i].flags & PTA_3DNOW_A
1370 && !(target_flags_explicit & MASK_3DNOW_A))
1371 target_flags |= MASK_3DNOW_A;
1372 if (processor_alias_table[i].flags & PTA_SSE
1373 && !(target_flags_explicit & MASK_SSE))
1374 target_flags |= MASK_SSE;
1375 if (processor_alias_table[i].flags & PTA_SSE2
1376 && !(target_flags_explicit & MASK_SSE2))
1377 target_flags |= MASK_SSE2;
1378 if (processor_alias_table[i].flags & PTA_SSE3
1379 && !(target_flags_explicit & MASK_SSE3))
1380 target_flags |= MASK_SSE3;
1381 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1382 x86_prefetch_sse = true;
1383 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1384 error ("CPU you selected does not support x86-64 "
1385 "instruction set");
1386 break;
1389 if (i == pta_size)
1390 error ("bad value (%s) for -march= switch", ix86_arch_string);
1392 for (i = 0; i < pta_size; i++)
1393 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1395 ix86_tune = processor_alias_table[i].processor;
1396 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1398 if (ix86_tune_defaulted)
1400 ix86_tune_string = "x86-64";
1401 for (i = 0; i < pta_size; i++)
1402 if (! strcmp (ix86_tune_string,
1403 processor_alias_table[i].name))
1404 break;
1405 ix86_tune = processor_alias_table[i].processor;
1407 else
1408 error ("CPU you selected does not support x86-64 "
1409 "instruction set");
1411 /* Intel CPUs have always interpreted SSE prefetch instructions as
1412 NOPs; so, we can enable SSE prefetch instructions even when
1413 -mtune (rather than -march) points us to a processor that has them.
1414 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1415 higher processors. */
1416 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1417 x86_prefetch_sse = true;
1418 break;
1420 if (i == pta_size)
1421 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1423 if (optimize_size)
1424 ix86_cost = &size_cost;
1425 else
1426 ix86_cost = processor_target_table[ix86_tune].cost;
1427 target_flags |= processor_target_table[ix86_tune].target_enable;
1428 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1430 /* Arrange to set up i386_stack_locals for all functions. */
1431 init_machine_status = ix86_init_machine_status;
1433 /* Validate -mregparm= value. */
1434 if (ix86_regparm_string)
1436 i = atoi (ix86_regparm_string);
1437 if (i < 0 || i > REGPARM_MAX)
1438 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1439 else
1440 ix86_regparm = i;
1442 else
1443 if (TARGET_64BIT)
1444 ix86_regparm = REGPARM_MAX;
1446 /* If the user has provided any of the -malign-* options,
1447 warn and use that value only if -falign-* is not set.
1448 Remove this code in GCC 3.2 or later. */
1449 if (ix86_align_loops_string)
1451 warning (0, "-malign-loops is obsolete, use -falign-loops");
1452 if (align_loops == 0)
1454 i = atoi (ix86_align_loops_string);
1455 if (i < 0 || i > MAX_CODE_ALIGN)
1456 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1457 else
1458 align_loops = 1 << i;
1462 if (ix86_align_jumps_string)
1464 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1465 if (align_jumps == 0)
1467 i = atoi (ix86_align_jumps_string);
1468 if (i < 0 || i > MAX_CODE_ALIGN)
1469 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1470 else
1471 align_jumps = 1 << i;
1475 if (ix86_align_funcs_string)
1477 warning (0, "-malign-functions is obsolete, use -falign-functions");
1478 if (align_functions == 0)
1480 i = atoi (ix86_align_funcs_string);
1481 if (i < 0 || i > MAX_CODE_ALIGN)
1482 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1483 else
1484 align_functions = 1 << i;
1488 /* Default align_* from the processor table. */
1489 if (align_loops == 0)
1491 align_loops = processor_target_table[ix86_tune].align_loop;
1492 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1494 if (align_jumps == 0)
1496 align_jumps = processor_target_table[ix86_tune].align_jump;
1497 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1499 if (align_functions == 0)
1501 align_functions = processor_target_table[ix86_tune].align_func;
1504 /* Validate -mpreferred-stack-boundary= value, or provide default.
1505 The default of 128 bits is for Pentium III's SSE __m128, We can't
1506 change it because of optimize_size. Otherwise, we can't mix object
1507 files compiled with -Os and -On. */
1508 ix86_preferred_stack_boundary = 128;
1509 if (ix86_preferred_stack_boundary_string)
1511 i = atoi (ix86_preferred_stack_boundary_string);
1512 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1513 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1514 TARGET_64BIT ? 4 : 2);
1515 else
1516 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1519 /* Validate -mbranch-cost= value, or provide default. */
1520 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1521 if (ix86_branch_cost_string)
1523 i = atoi (ix86_branch_cost_string);
1524 if (i < 0 || i > 5)
1525 error ("-mbranch-cost=%d is not between 0 and 5", i);
1526 else
1527 ix86_branch_cost = i;
1529 if (ix86_section_threshold_string)
1531 i = atoi (ix86_section_threshold_string);
1532 if (i < 0)
1533 error ("-mlarge-data-threshold=%d is negative", i);
1534 else
1535 ix86_section_threshold = i;
1538 if (ix86_tls_dialect_string)
1540 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1541 ix86_tls_dialect = TLS_DIALECT_GNU;
1542 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1543 ix86_tls_dialect = TLS_DIALECT_SUN;
1544 else
1545 error ("bad value (%s) for -mtls-dialect= switch",
1546 ix86_tls_dialect_string);
1549 /* Keep nonleaf frame pointers. */
1550 if (flag_omit_frame_pointer)
1551 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1552 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1553 flag_omit_frame_pointer = 1;
1555 /* If we're doing fast math, we don't care about comparison order
1556 wrt NaNs. This lets us use a shorter comparison sequence. */
1557 if (flag_unsafe_math_optimizations)
1558 target_flags &= ~MASK_IEEE_FP;
1560 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1561 since the insns won't need emulation. */
1562 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1563 target_flags &= ~MASK_NO_FANCY_MATH_387;
1565 /* Likewise, if the target doesn't have a 387, or we've specified
1566 software floating point, don't use 387 inline intrinsics. */
1567 if (!TARGET_80387)
1568 target_flags |= MASK_NO_FANCY_MATH_387;
1570 /* Turn on SSE2 builtins for -msse3. */
1571 if (TARGET_SSE3)
1572 target_flags |= MASK_SSE2;
1574 /* Turn on SSE builtins for -msse2. */
1575 if (TARGET_SSE2)
1576 target_flags |= MASK_SSE;
1578 /* Turn on MMX builtins for -msse. */
1579 if (TARGET_SSE)
1581 target_flags |= MASK_MMX & ~target_flags_explicit;
1582 x86_prefetch_sse = true;
1585 /* Turn on MMX builtins for 3Dnow. */
1586 if (TARGET_3DNOW)
1587 target_flags |= MASK_MMX;
1589 if (TARGET_64BIT)
1591 if (TARGET_ALIGN_DOUBLE)
1592 error ("-malign-double makes no sense in the 64bit mode");
1593 if (TARGET_RTD)
1594 error ("-mrtd calling convention not supported in the 64bit mode");
1596 /* Enable by default the SSE and MMX builtins. Do allow the user to
1597 explicitly disable any of these. In particular, disabling SSE and
1598 MMX for kernel code is extremely useful. */
1599 target_flags
1600 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1601 & ~target_flags_explicit);
1603 else
1605 /* i386 ABI does not specify red zone. It still makes sense to use it
1606 when programmer takes care to stack from being destroyed. */
1607 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1608 target_flags |= MASK_NO_RED_ZONE;
1611 /* Accept -msseregparm only if at least SSE support is enabled. */
1612 if (TARGET_SSEREGPARM
1613 && ! TARGET_SSE)
1614 error ("-msseregparm used without SSE enabled");
1616 ix86_fpmath = TARGET_FPMATH_DEFAULT;
1618 if (ix86_fpmath_string != 0)
1620 if (! strcmp (ix86_fpmath_string, "387"))
1621 ix86_fpmath = FPMATH_387;
1622 else if (! strcmp (ix86_fpmath_string, "sse"))
1624 if (!TARGET_SSE)
1626 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1627 ix86_fpmath = FPMATH_387;
1629 else
1630 ix86_fpmath = FPMATH_SSE;
1632 else if (! strcmp (ix86_fpmath_string, "387,sse")
1633 || ! strcmp (ix86_fpmath_string, "sse,387"))
1635 if (!TARGET_SSE)
1637 warning (0, "SSE instruction set disabled, using 387 arithmetics");
1638 ix86_fpmath = FPMATH_387;
1640 else if (!TARGET_80387)
1642 warning (0, "387 instruction set disabled, using SSE arithmetics");
1643 ix86_fpmath = FPMATH_SSE;
1645 else
1646 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1648 else
1649 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1652 /* If the i387 is disabled, then do not return values in it. */
1653 if (!TARGET_80387)
1654 target_flags &= ~MASK_FLOAT_RETURNS;
1656 if ((x86_accumulate_outgoing_args & TUNEMASK)
1657 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1658 && !optimize_size)
1659 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1661 /* ??? Unwind info is not correct around the CFG unless either a frame
1662 pointer is present or M_A_O_A is set. Fixing this requires rewriting
1663 unwind info generation to be aware of the CFG and propagating states
1664 around edges. */
1665 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1666 || flag_exceptions || flag_non_call_exceptions)
1667 && flag_omit_frame_pointer
1668 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1670 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1671 warning (0, "unwind tables currently require either a frame pointer "
1672 "or -maccumulate-outgoing-args for correctness");
1673 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1676 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1678 char *p;
1679 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1680 p = strchr (internal_label_prefix, 'X');
1681 internal_label_prefix_len = p - internal_label_prefix;
1682 *p = '\0';
1685 /* When scheduling description is not available, disable scheduler pass
1686 so it won't slow down the compilation and make x87 code slower. */
1687 if (!TARGET_SCHEDULE)
1688 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1691 /* switch to the appropriate section for output of DECL.
1692 DECL is either a `VAR_DECL' node or a constant of some sort.
1693 RELOC indicates whether forming the initial value of DECL requires
1694 link-time relocations. */
1696 static void
1697 x86_64_elf_select_section (tree decl, int reloc,
1698 unsigned HOST_WIDE_INT align)
1700 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1701 && ix86_in_large_data_p (decl))
1703 const char *sname = NULL;
1704 unsigned int flags = SECTION_WRITE;
1705 switch (categorize_decl_for_section (decl, reloc, flag_pic))
1707 case SECCAT_DATA:
1708 sname = ".ldata";
1709 break;
1710 case SECCAT_DATA_REL:
1711 sname = ".ldata.rel";
1712 break;
1713 case SECCAT_DATA_REL_LOCAL:
1714 sname = ".ldata.rel.local";
1715 break;
1716 case SECCAT_DATA_REL_RO:
1717 sname = ".ldata.rel.ro";
1718 break;
1719 case SECCAT_DATA_REL_RO_LOCAL:
1720 sname = ".ldata.rel.ro.local";
1721 break;
1722 case SECCAT_BSS:
1723 sname = ".lbss";
1724 flags |= SECTION_BSS;
1725 break;
1726 case SECCAT_RODATA:
1727 case SECCAT_RODATA_MERGE_STR:
1728 case SECCAT_RODATA_MERGE_STR_INIT:
1729 case SECCAT_RODATA_MERGE_CONST:
1730 sname = ".lrodata";
1731 flags = 0;
1732 break;
1733 case SECCAT_SRODATA:
1734 case SECCAT_SDATA:
1735 case SECCAT_SBSS:
1736 gcc_unreachable ();
1737 case SECCAT_TEXT:
1738 case SECCAT_TDATA:
1739 case SECCAT_TBSS:
1740 /* We don't split these for medium model. Place them into
1741 default sections and hope for best. */
1742 break;
1744 if (sname)
1746 /* We might get called with string constants, but named_section
1747 doesn't like them as they are not DECLs. Also, we need to set
1748 flags in that case. */
1749 if (!DECL_P (decl))
1750 named_section_flags (sname, flags);
1751 else
1752 named_section (decl, sname, reloc);
1753 return;
1756 default_elf_select_section (decl, reloc, align);
1759 /* Build up a unique section name, expressed as a
1760 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
1761 RELOC indicates whether the initial value of EXP requires
1762 link-time relocations. */
1764 static void
1765 x86_64_elf_unique_section (tree decl, int reloc)
1767 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1768 && ix86_in_large_data_p (decl))
1770 const char *prefix = NULL;
1771 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
1772 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
1774 switch (categorize_decl_for_section (decl, reloc, flag_pic))
1776 case SECCAT_DATA:
1777 case SECCAT_DATA_REL:
1778 case SECCAT_DATA_REL_LOCAL:
1779 case SECCAT_DATA_REL_RO:
1780 case SECCAT_DATA_REL_RO_LOCAL:
1781 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
1782 break;
1783 case SECCAT_BSS:
1784 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
1785 break;
1786 case SECCAT_RODATA:
1787 case SECCAT_RODATA_MERGE_STR:
1788 case SECCAT_RODATA_MERGE_STR_INIT:
1789 case SECCAT_RODATA_MERGE_CONST:
1790 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
1791 break;
1792 case SECCAT_SRODATA:
1793 case SECCAT_SDATA:
1794 case SECCAT_SBSS:
1795 gcc_unreachable ();
1796 case SECCAT_TEXT:
1797 case SECCAT_TDATA:
1798 case SECCAT_TBSS:
1799 /* We don't split these for medium model. Place them into
1800 default sections and hope for best. */
1801 break;
1803 if (prefix)
1805 const char *name;
1806 size_t nlen, plen;
1807 char *string;
1808 plen = strlen (prefix);
1810 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
1811 name = targetm.strip_name_encoding (name);
1812 nlen = strlen (name);
1814 string = alloca (nlen + plen + 1);
1815 memcpy (string, prefix, plen);
1816 memcpy (string + plen, name, nlen + 1);
1818 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
1819 return;
1822 default_unique_section (decl, reloc);
1825 #ifdef COMMON_ASM_OP
1826 /* This says how to output assembler code to declare an
1827 uninitialized external linkage data object.
1829 For medium model x86-64 we need to use .largecomm opcode for
1830 large objects. */
1831 void
1832 x86_elf_aligned_common (FILE *file,
1833 const char *name, unsigned HOST_WIDE_INT size,
1834 int align)
1836 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1837 && size > (unsigned int)ix86_section_threshold)
1838 fprintf (file, ".largecomm\t");
1839 else
1840 fprintf (file, "%s", COMMON_ASM_OP);
1841 assemble_name (file, name);
1842 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
1843 size, align / BITS_PER_UNIT);
1846 /* Utility function for targets to use in implementing
1847 ASM_OUTPUT_ALIGNED_BSS. */
1849 void
1850 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
1851 const char *name, unsigned HOST_WIDE_INT size,
1852 int align)
1854 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1855 && size > (unsigned int)ix86_section_threshold)
1856 named_section (decl, ".lbss", 0);
1857 else
1858 bss_section ();
1859 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
1860 #ifdef ASM_DECLARE_OBJECT_NAME
1861 last_assemble_variable_decl = decl;
1862 ASM_DECLARE_OBJECT_NAME (file, name, decl);
1863 #else
1864 /* Standard thing is just output label for the object. */
1865 ASM_OUTPUT_LABEL (file, name);
1866 #endif /* ASM_DECLARE_OBJECT_NAME */
1867 ASM_OUTPUT_SKIP (file, size ? size : 1);
1869 #endif
1871 void
1872 optimization_options (int level, int size ATTRIBUTE_UNUSED)
1874 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1875 make the problem with not enough registers even worse. */
1876 #ifdef INSN_SCHEDULING
1877 if (level > 1)
1878 flag_schedule_insns = 0;
1879 #endif
1881 if (TARGET_MACHO)
1882 /* The Darwin libraries never set errno, so we might as well
1883 avoid calling them when that's the only reason we would. */
1884 flag_errno_math = 0;
1886 /* The default values of these switches depend on the TARGET_64BIT
1887 that is not known at this moment. Mark these values with 2 and
1888 let user the to override these. In case there is no command line option
1889 specifying them, we will set the defaults in override_options. */
1890 if (optimize >= 1)
1891 flag_omit_frame_pointer = 2;
1892 flag_pcc_struct_return = 2;
1893 flag_asynchronous_unwind_tables = 2;
1894 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1895 SUBTARGET_OPTIMIZATION_OPTIONS;
1896 #endif
1899 /* Table of valid machine attributes. */
1900 const struct attribute_spec ix86_attribute_table[] =
1902 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1903 /* Stdcall attribute says callee is responsible for popping arguments
1904 if they are not variable. */
1905 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1906 /* Fastcall attribute says callee is responsible for popping arguments
1907 if they are not variable. */
1908 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1909 /* Cdecl attribute says the callee is a normal C declaration */
1910 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1911 /* Regparm attribute specifies how many integer arguments are to be
1912 passed in registers. */
1913 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
1914 /* Sseregparm attribute says we are using x86_64 calling conventions
1915 for FP arguments. */
1916 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1917 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1918 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1919 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1920 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
1921 #endif
1922 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1923 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
1924 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1925 SUBTARGET_ATTRIBUTE_TABLE,
1926 #endif
1927 { NULL, 0, 0, false, false, false, NULL }
1930 /* Decide whether we can make a sibling call to a function. DECL is the
1931 declaration of the function being targeted by the call and EXP is the
1932 CALL_EXPR representing the call. */
1934 static bool
1935 ix86_function_ok_for_sibcall (tree decl, tree exp)
1937 tree func;
1938 rtx a, b;
1940 /* If we are generating position-independent code, we cannot sibcall
1941 optimize any indirect call, or a direct call to a global function,
1942 as the PLT requires %ebx be live. */
1943 if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1944 return false;
1946 if (decl)
1947 func = decl;
1948 else
1950 func = TREE_TYPE (TREE_OPERAND (exp, 0));
1951 if (POINTER_TYPE_P (func))
1952 func = TREE_TYPE (func);
1955 /* Check that the return value locations are the same. Like
1956 if we are returning floats on the 80387 register stack, we cannot
1957 make a sibcall from a function that doesn't return a float to a
1958 function that does or, conversely, from a function that does return
1959 a float to a function that doesn't; the necessary stack adjustment
1960 would not be executed. This is also the place we notice
1961 differences in the return value ABI. Note that it is ok for one
1962 of the functions to have void return type as long as the return
1963 value of the other is passed in a register. */
1964 a = ix86_function_value (TREE_TYPE (exp), func, false);
1965 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1966 cfun->decl, false);
1967 if (STACK_REG_P (a) || STACK_REG_P (b))
1969 if (!rtx_equal_p (a, b))
1970 return false;
1972 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
1974 else if (!rtx_equal_p (a, b))
1975 return false;
1977 /* If this call is indirect, we'll need to be able to use a call-clobbered
1978 register for the address of the target function. Make sure that all
1979 such registers are not used for passing parameters. */
1980 if (!decl && !TARGET_64BIT)
1982 tree type;
1984 /* We're looking at the CALL_EXPR, we need the type of the function. */
1985 type = TREE_OPERAND (exp, 0); /* pointer expression */
1986 type = TREE_TYPE (type); /* pointer type */
1987 type = TREE_TYPE (type); /* function type */
1989 if (ix86_function_regparm (type, NULL) >= 3)
1991 /* ??? Need to count the actual number of registers to be used,
1992 not the possible number of registers. Fix later. */
1993 return false;
1997 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1998 /* Dllimport'd functions are also called indirectly. */
1999 if (decl && DECL_DLLIMPORT_P (decl)
2000 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2001 return false;
2002 #endif
2004 /* If we forced aligned the stack, then sibcalling would unalign the
2005 stack, which may break the called function. */
2006 if (cfun->machine->force_align_arg_pointer)
2007 return false;
2009 /* Otherwise okay. That also includes certain types of indirect calls. */
2010 return true;
2013 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2014 calling convention attributes;
2015 arguments as in struct attribute_spec.handler. */
2017 static tree
2018 ix86_handle_cconv_attribute (tree *node, tree name,
2019 tree args,
2020 int flags ATTRIBUTE_UNUSED,
2021 bool *no_add_attrs)
2023 if (TREE_CODE (*node) != FUNCTION_TYPE
2024 && TREE_CODE (*node) != METHOD_TYPE
2025 && TREE_CODE (*node) != FIELD_DECL
2026 && TREE_CODE (*node) != TYPE_DECL)
2028 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2029 IDENTIFIER_POINTER (name));
2030 *no_add_attrs = true;
2031 return NULL_TREE;
2034 /* Can combine regparm with all attributes but fastcall. */
2035 if (is_attribute_p ("regparm", name))
2037 tree cst;
2039 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2041 error ("fastcall and regparm attributes are not compatible");
2044 cst = TREE_VALUE (args);
2045 if (TREE_CODE (cst) != INTEGER_CST)
2047 warning (OPT_Wattributes,
2048 "%qs attribute requires an integer constant argument",
2049 IDENTIFIER_POINTER (name));
2050 *no_add_attrs = true;
2052 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2054 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2055 IDENTIFIER_POINTER (name), REGPARM_MAX);
2056 *no_add_attrs = true;
2059 return NULL_TREE;
2062 if (TARGET_64BIT)
2064 warning (OPT_Wattributes, "%qs attribute ignored",
2065 IDENTIFIER_POINTER (name));
2066 *no_add_attrs = true;
2067 return NULL_TREE;
2070 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2071 if (is_attribute_p ("fastcall", name))
2073 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2075 error ("fastcall and cdecl attributes are not compatible");
2077 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2079 error ("fastcall and stdcall attributes are not compatible");
2081 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2083 error ("fastcall and regparm attributes are not compatible");
2087 /* Can combine stdcall with fastcall (redundant), regparm and
2088 sseregparm. */
2089 else if (is_attribute_p ("stdcall", name))
2091 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2093 error ("stdcall and cdecl attributes are not compatible");
2095 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2097 error ("stdcall and fastcall attributes are not compatible");
2101 /* Can combine cdecl with regparm and sseregparm. */
2102 else if (is_attribute_p ("cdecl", name))
2104 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2106 error ("stdcall and cdecl attributes are not compatible");
2108 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2110 error ("fastcall and cdecl attributes are not compatible");
2114 /* Can combine sseregparm with all attributes. */
2116 return NULL_TREE;
2119 /* Return 0 if the attributes for two types are incompatible, 1 if they
2120 are compatible, and 2 if they are nearly compatible (which causes a
2121 warning to be generated). */
2123 static int
2124 ix86_comp_type_attributes (tree type1, tree type2)
2126 /* Check for mismatch of non-default calling convention. */
2127 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2129 if (TREE_CODE (type1) != FUNCTION_TYPE)
2130 return 1;
2132 /* Check for mismatched fastcall/regparm types. */
2133 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2134 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2135 || (ix86_function_regparm (type1, NULL)
2136 != ix86_function_regparm (type2, NULL)))
2137 return 0;
2139 /* Check for mismatched sseregparm types. */
2140 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2141 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2142 return 0;
2144 /* Check for mismatched return types (cdecl vs stdcall). */
2145 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2146 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2147 return 0;
2149 return 1;
2152 /* Return the regparm value for a function with the indicated TYPE and DECL.
2153 DECL may be NULL when calling function indirectly
2154 or considering a libcall. */
2156 static int
2157 ix86_function_regparm (tree type, tree decl)
2159 tree attr;
2160 int regparm = ix86_regparm;
2161 bool user_convention = false;
2163 if (!TARGET_64BIT)
2165 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2166 if (attr)
2168 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2169 user_convention = true;
2172 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2174 regparm = 2;
2175 user_convention = true;
2178 /* Use register calling convention for local functions when possible. */
2179 if (!TARGET_64BIT && !user_convention && decl
2180 && flag_unit_at_a_time && !profile_flag)
2182 struct cgraph_local_info *i = cgraph_local_info (decl);
2183 if (i && i->local)
2185 int local_regparm, globals = 0, regno;
2187 /* Make sure no regparm register is taken by a global register
2188 variable. */
2189 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2190 if (global_regs[local_regparm])
2191 break;
2192 /* We can't use regparm(3) for nested functions as these use
2193 static chain pointer in third argument. */
2194 if (local_regparm == 3
2195 && decl_function_context (decl)
2196 && !DECL_NO_STATIC_CHAIN (decl))
2197 local_regparm = 2;
2198 /* Each global register variable increases register preassure,
2199 so the more global reg vars there are, the smaller regparm
2200 optimization use, unless requested by the user explicitly. */
2201 for (regno = 0; regno < 6; regno++)
2202 if (global_regs[regno])
2203 globals++;
2204 local_regparm
2205 = globals < local_regparm ? local_regparm - globals : 0;
2207 if (local_regparm > regparm)
2208 regparm = local_regparm;
2212 return regparm;
2215 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2216 DFmode (2) arguments in SSE registers for a function with the
2217 indicated TYPE and DECL. DECL may be NULL when calling function
2218 indirectly or considering a libcall. Otherwise return 0. */
2220 static int
2221 ix86_function_sseregparm (tree type, tree decl)
2223 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2224 by the sseregparm attribute. */
2225 if (TARGET_SSEREGPARM
2226 || (type
2227 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2229 if (!TARGET_SSE)
2231 if (decl)
2232 error ("Calling %qD with attribute sseregparm without "
2233 "SSE/SSE2 enabled", decl);
2234 else
2235 error ("Calling %qT with attribute sseregparm without "
2236 "SSE/SSE2 enabled", type);
2237 return 0;
2240 return 2;
2243 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2244 (and DFmode for SSE2) arguments in SSE registers,
2245 even for 32-bit targets. */
2246 if (!TARGET_64BIT && decl
2247 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2249 struct cgraph_local_info *i = cgraph_local_info (decl);
2250 if (i && i->local)
2251 return TARGET_SSE2 ? 2 : 1;
2254 return 0;
2257 /* Return true if EAX is live at the start of the function. Used by
2258 ix86_expand_prologue to determine if we need special help before
2259 calling allocate_stack_worker. */
2261 static bool
2262 ix86_eax_live_at_start_p (void)
2264 /* Cheat. Don't bother working forward from ix86_function_regparm
2265 to the function type to whether an actual argument is located in
2266 eax. Instead just look at cfg info, which is still close enough
2267 to correct at this point. This gives false positives for broken
2268 functions that might use uninitialized data that happens to be
2269 allocated in eax, but who cares? */
2270 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2273 /* Value is the number of bytes of arguments automatically
2274 popped when returning from a subroutine call.
2275 FUNDECL is the declaration node of the function (as a tree),
2276 FUNTYPE is the data type of the function (as a tree),
2277 or for a library call it is an identifier node for the subroutine name.
2278 SIZE is the number of bytes of arguments passed on the stack.
2280 On the 80386, the RTD insn may be used to pop them if the number
2281 of args is fixed, but if the number is variable then the caller
2282 must pop them all. RTD can't be used for library calls now
2283 because the library is compiled with the Unix compiler.
2284 Use of RTD is a selectable option, since it is incompatible with
2285 standard Unix calling sequences. If the option is not selected,
2286 the caller must always pop the args.
2288 The attribute stdcall is equivalent to RTD on a per module basis. */
2291 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2293 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2295 /* Cdecl functions override -mrtd, and never pop the stack. */
2296 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2298 /* Stdcall and fastcall functions will pop the stack if not
2299 variable args. */
2300 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2301 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2302 rtd = 1;
2304 if (rtd
2305 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2306 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2307 == void_type_node)))
2308 return size;
2311 /* Lose any fake structure return argument if it is passed on the stack. */
2312 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2313 && !TARGET_64BIT
2314 && !KEEP_AGGREGATE_RETURN_POINTER)
2316 int nregs = ix86_function_regparm (funtype, fundecl);
2318 if (!nregs)
2319 return GET_MODE_SIZE (Pmode);
2322 return 0;
2325 /* Argument support functions. */
2327 /* Return true when register may be used to pass function parameters. */
2328 bool
2329 ix86_function_arg_regno_p (int regno)
2331 int i;
2332 if (!TARGET_64BIT)
2333 return (regno < REGPARM_MAX
2334 || (TARGET_MMX && MMX_REGNO_P (regno)
2335 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2336 || (TARGET_SSE && SSE_REGNO_P (regno)
2337 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2339 if (TARGET_SSE && SSE_REGNO_P (regno)
2340 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2341 return true;
2342 /* RAX is used as hidden argument to va_arg functions. */
2343 if (!regno)
2344 return true;
2345 for (i = 0; i < REGPARM_MAX; i++)
2346 if (regno == x86_64_int_parameter_registers[i])
2347 return true;
2348 return false;
2351 /* Return if we do not know how to pass TYPE solely in registers. */
2353 static bool
2354 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2356 if (must_pass_in_stack_var_size_or_pad (mode, type))
2357 return true;
2359 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2360 The layout_type routine is crafty and tries to trick us into passing
2361 currently unsupported vector types on the stack by using TImode. */
2362 return (!TARGET_64BIT && mode == TImode
2363 && type && TREE_CODE (type) != VECTOR_TYPE);
2366 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2367 for a call to a function whose data type is FNTYPE.
2368 For a library call, FNTYPE is 0. */
2370 void
2371 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2372 tree fntype, /* tree ptr for function decl */
2373 rtx libname, /* SYMBOL_REF of library name or 0 */
2374 tree fndecl)
2376 static CUMULATIVE_ARGS zero_cum;
2377 tree param, next_param;
2379 if (TARGET_DEBUG_ARG)
2381 fprintf (stderr, "\ninit_cumulative_args (");
2382 if (fntype)
2383 fprintf (stderr, "fntype code = %s, ret code = %s",
2384 tree_code_name[(int) TREE_CODE (fntype)],
2385 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2386 else
2387 fprintf (stderr, "no fntype");
2389 if (libname)
2390 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2393 *cum = zero_cum;
2395 /* Set up the number of registers to use for passing arguments. */
2396 cum->nregs = ix86_regparm;
2397 if (TARGET_SSE)
2398 cum->sse_nregs = SSE_REGPARM_MAX;
2399 if (TARGET_MMX)
2400 cum->mmx_nregs = MMX_REGPARM_MAX;
2401 cum->warn_sse = true;
2402 cum->warn_mmx = true;
2403 cum->maybe_vaarg = false;
2405 /* Use ecx and edx registers if function has fastcall attribute,
2406 else look for regparm information. */
2407 if (fntype && !TARGET_64BIT)
2409 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2411 cum->nregs = 2;
2412 cum->fastcall = 1;
2414 else
2415 cum->nregs = ix86_function_regparm (fntype, fndecl);
2418 /* Set up the number of SSE registers used for passing SFmode
2419 and DFmode arguments. Warn for mismatching ABI. */
2420 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2422 /* Determine if this function has variable arguments. This is
2423 indicated by the last argument being 'void_type_mode' if there
2424 are no variable arguments. If there are variable arguments, then
2425 we won't pass anything in registers in 32-bit mode. */
2427 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2429 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2430 param != 0; param = next_param)
2432 next_param = TREE_CHAIN (param);
2433 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2435 if (!TARGET_64BIT)
2437 cum->nregs = 0;
2438 cum->sse_nregs = 0;
2439 cum->mmx_nregs = 0;
2440 cum->warn_sse = 0;
2441 cum->warn_mmx = 0;
2442 cum->fastcall = 0;
2443 cum->float_in_sse = 0;
2445 cum->maybe_vaarg = true;
2449 if ((!fntype && !libname)
2450 || (fntype && !TYPE_ARG_TYPES (fntype)))
2451 cum->maybe_vaarg = true;
2453 if (TARGET_DEBUG_ARG)
2454 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2456 return;
2459 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
2460 But in the case of vector types, it is some vector mode.
2462 When we have only some of our vector isa extensions enabled, then there
2463 are some modes for which vector_mode_supported_p is false. For these
2464 modes, the generic vector support in gcc will choose some non-vector mode
2465 in order to implement the type. By computing the natural mode, we'll
2466 select the proper ABI location for the operand and not depend on whatever
2467 the middle-end decides to do with these vector types. */
2469 static enum machine_mode
2470 type_natural_mode (tree type)
2472 enum machine_mode mode = TYPE_MODE (type);
2474 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2476 HOST_WIDE_INT size = int_size_in_bytes (type);
2477 if ((size == 8 || size == 16)
2478 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
2479 && TYPE_VECTOR_SUBPARTS (type) > 1)
2481 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2483 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2484 mode = MIN_MODE_VECTOR_FLOAT;
2485 else
2486 mode = MIN_MODE_VECTOR_INT;
2488 /* Get the mode which has this inner mode and number of units. */
2489 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2490 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2491 && GET_MODE_INNER (mode) == innermode)
2492 return mode;
2494 gcc_unreachable ();
2498 return mode;
2501 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2502 this may not agree with the mode that the type system has chosen for the
2503 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2504 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2506 static rtx
2507 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2508 unsigned int regno)
2510 rtx tmp;
2512 if (orig_mode != BLKmode)
2513 tmp = gen_rtx_REG (orig_mode, regno);
2514 else
2516 tmp = gen_rtx_REG (mode, regno);
2517 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2518 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2521 return tmp;
2524 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2525 of this code is to classify each 8bytes of incoming argument by the register
2526 class and assign registers accordingly. */
2528 /* Return the union class of CLASS1 and CLASS2.
2529 See the x86-64 PS ABI for details. */
2531 static enum x86_64_reg_class
2532 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2534 /* Rule #1: If both classes are equal, this is the resulting class. */
2535 if (class1 == class2)
2536 return class1;
2538 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2539 the other class. */
2540 if (class1 == X86_64_NO_CLASS)
2541 return class2;
2542 if (class2 == X86_64_NO_CLASS)
2543 return class1;
2545 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2546 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2547 return X86_64_MEMORY_CLASS;
2549 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2550 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2551 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2552 return X86_64_INTEGERSI_CLASS;
2553 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2554 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2555 return X86_64_INTEGER_CLASS;
2557 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2558 MEMORY is used. */
2559 if (class1 == X86_64_X87_CLASS
2560 || class1 == X86_64_X87UP_CLASS
2561 || class1 == X86_64_COMPLEX_X87_CLASS
2562 || class2 == X86_64_X87_CLASS
2563 || class2 == X86_64_X87UP_CLASS
2564 || class2 == X86_64_COMPLEX_X87_CLASS)
2565 return X86_64_MEMORY_CLASS;
2567 /* Rule #6: Otherwise class SSE is used. */
2568 return X86_64_SSE_CLASS;
2571 /* Classify the argument of type TYPE and mode MODE.
2572 CLASSES will be filled by the register class used to pass each word
2573 of the operand. The number of words is returned. In case the parameter
2574 should be passed in memory, 0 is returned. As a special case for zero
2575 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2577 BIT_OFFSET is used internally for handling records and specifies offset
2578 of the offset in bits modulo 256 to avoid overflow cases.
2580 See the x86-64 PS ABI for details.
2583 static int
2584 classify_argument (enum machine_mode mode, tree type,
2585 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2587 HOST_WIDE_INT bytes =
2588 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2589 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2591 /* Variable sized entities are always passed/returned in memory. */
2592 if (bytes < 0)
2593 return 0;
2595 if (mode != VOIDmode
2596 && targetm.calls.must_pass_in_stack (mode, type))
2597 return 0;
2599 if (type && AGGREGATE_TYPE_P (type))
2601 int i;
2602 tree field;
2603 enum x86_64_reg_class subclasses[MAX_CLASSES];
2605 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
2606 if (bytes > 16)
2607 return 0;
2609 for (i = 0; i < words; i++)
2610 classes[i] = X86_64_NO_CLASS;
2612 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2613 signalize memory class, so handle it as special case. */
2614 if (!words)
2616 classes[0] = X86_64_NO_CLASS;
2617 return 1;
2620 /* Classify each field of record and merge classes. */
2621 switch (TREE_CODE (type))
2623 case RECORD_TYPE:
2624 /* For classes first merge in the field of the subclasses. */
2625 if (TYPE_BINFO (type))
2627 tree binfo, base_binfo;
2628 int basenum;
2630 for (binfo = TYPE_BINFO (type), basenum = 0;
2631 BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2633 int num;
2634 int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2635 tree type = BINFO_TYPE (base_binfo);
2637 num = classify_argument (TYPE_MODE (type),
2638 type, subclasses,
2639 (offset + bit_offset) % 256);
2640 if (!num)
2641 return 0;
2642 for (i = 0; i < num; i++)
2644 int pos = (offset + (bit_offset % 64)) / 8 / 8;
2645 classes[i + pos] =
2646 merge_classes (subclasses[i], classes[i + pos]);
2650 /* And now merge the fields of structure. */
2651 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2653 if (TREE_CODE (field) == FIELD_DECL)
2655 int num;
2657 if (TREE_TYPE (field) == error_mark_node)
2658 continue;
2660 /* Bitfields are always classified as integer. Handle them
2661 early, since later code would consider them to be
2662 misaligned integers. */
2663 if (DECL_BIT_FIELD (field))
2665 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2666 i < ((int_bit_position (field) + (bit_offset % 64))
2667 + tree_low_cst (DECL_SIZE (field), 0)
2668 + 63) / 8 / 8; i++)
2669 classes[i] =
2670 merge_classes (X86_64_INTEGER_CLASS,
2671 classes[i]);
2673 else
2675 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2676 TREE_TYPE (field), subclasses,
2677 (int_bit_position (field)
2678 + bit_offset) % 256);
2679 if (!num)
2680 return 0;
2681 for (i = 0; i < num; i++)
2683 int pos =
2684 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2685 classes[i + pos] =
2686 merge_classes (subclasses[i], classes[i + pos]);
2691 break;
2693 case ARRAY_TYPE:
2694 /* Arrays are handled as small records. */
2696 int num;
2697 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2698 TREE_TYPE (type), subclasses, bit_offset);
2699 if (!num)
2700 return 0;
2702 /* The partial classes are now full classes. */
2703 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2704 subclasses[0] = X86_64_SSE_CLASS;
2705 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2706 subclasses[0] = X86_64_INTEGER_CLASS;
2708 for (i = 0; i < words; i++)
2709 classes[i] = subclasses[i % num];
2711 break;
2713 case UNION_TYPE:
2714 case QUAL_UNION_TYPE:
2715 /* Unions are similar to RECORD_TYPE but offset is always 0.
2718 /* Unions are not derived. */
2719 gcc_assert (!TYPE_BINFO (type)
2720 || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
2721 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2723 if (TREE_CODE (field) == FIELD_DECL)
2725 int num;
2727 if (TREE_TYPE (field) == error_mark_node)
2728 continue;
2730 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2731 TREE_TYPE (field), subclasses,
2732 bit_offset);
2733 if (!num)
2734 return 0;
2735 for (i = 0; i < num; i++)
2736 classes[i] = merge_classes (subclasses[i], classes[i]);
2739 break;
2741 default:
2742 gcc_unreachable ();
2745 /* Final merger cleanup. */
2746 for (i = 0; i < words; i++)
2748 /* If one class is MEMORY, everything should be passed in
2749 memory. */
2750 if (classes[i] == X86_64_MEMORY_CLASS)
2751 return 0;
2753 /* The X86_64_SSEUP_CLASS should be always preceded by
2754 X86_64_SSE_CLASS. */
2755 if (classes[i] == X86_64_SSEUP_CLASS
2756 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2757 classes[i] = X86_64_SSE_CLASS;
2759 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
2760 if (classes[i] == X86_64_X87UP_CLASS
2761 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2762 classes[i] = X86_64_SSE_CLASS;
2764 return words;
2767 /* Compute alignment needed. We align all types to natural boundaries with
2768 exception of XFmode that is aligned to 64bits. */
2769 if (mode != VOIDmode && mode != BLKmode)
2771 int mode_alignment = GET_MODE_BITSIZE (mode);
2773 if (mode == XFmode)
2774 mode_alignment = 128;
2775 else if (mode == XCmode)
2776 mode_alignment = 256;
2777 if (COMPLEX_MODE_P (mode))
2778 mode_alignment /= 2;
2779 /* Misaligned fields are always returned in memory. */
2780 if (bit_offset % mode_alignment)
2781 return 0;
2784 /* for V1xx modes, just use the base mode */
2785 if (VECTOR_MODE_P (mode)
2786 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2787 mode = GET_MODE_INNER (mode);
2789 /* Classification of atomic types. */
2790 switch (mode)
2792 case DImode:
2793 case SImode:
2794 case HImode:
2795 case QImode:
2796 case CSImode:
2797 case CHImode:
2798 case CQImode:
2799 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2800 classes[0] = X86_64_INTEGERSI_CLASS;
2801 else
2802 classes[0] = X86_64_INTEGER_CLASS;
2803 return 1;
2804 case CDImode:
2805 case TImode:
2806 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2807 return 2;
2808 case CTImode:
2809 return 0;
2810 case SFmode:
2811 if (!(bit_offset % 64))
2812 classes[0] = X86_64_SSESF_CLASS;
2813 else
2814 classes[0] = X86_64_SSE_CLASS;
2815 return 1;
2816 case DFmode:
2817 classes[0] = X86_64_SSEDF_CLASS;
2818 return 1;
2819 case XFmode:
2820 classes[0] = X86_64_X87_CLASS;
2821 classes[1] = X86_64_X87UP_CLASS;
2822 return 2;
2823 case TFmode:
2824 classes[0] = X86_64_SSE_CLASS;
2825 classes[1] = X86_64_SSEUP_CLASS;
2826 return 2;
2827 case SCmode:
2828 classes[0] = X86_64_SSE_CLASS;
2829 return 1;
2830 case DCmode:
2831 classes[0] = X86_64_SSEDF_CLASS;
2832 classes[1] = X86_64_SSEDF_CLASS;
2833 return 2;
2834 case XCmode:
2835 classes[0] = X86_64_COMPLEX_X87_CLASS;
2836 return 1;
2837 case TCmode:
2838 /* This modes is larger than 16 bytes. */
2839 return 0;
2840 case V4SFmode:
2841 case V4SImode:
2842 case V16QImode:
2843 case V8HImode:
2844 case V2DFmode:
2845 case V2DImode:
2846 classes[0] = X86_64_SSE_CLASS;
2847 classes[1] = X86_64_SSEUP_CLASS;
2848 return 2;
2849 case V2SFmode:
2850 case V2SImode:
2851 case V4HImode:
2852 case V8QImode:
2853 classes[0] = X86_64_SSE_CLASS;
2854 return 1;
2855 case BLKmode:
2856 case VOIDmode:
2857 return 0;
2858 default:
2859 gcc_assert (VECTOR_MODE_P (mode));
2861 if (bytes > 16)
2862 return 0;
2864 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2866 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2867 classes[0] = X86_64_INTEGERSI_CLASS;
2868 else
2869 classes[0] = X86_64_INTEGER_CLASS;
2870 classes[1] = X86_64_INTEGER_CLASS;
2871 return 1 + (bytes > 8);
2875 /* Examine the argument and return set number of register required in each
2876 class. Return 0 iff parameter should be passed in memory. */
2877 static int
2878 examine_argument (enum machine_mode mode, tree type, int in_return,
2879 int *int_nregs, int *sse_nregs)
2881 enum x86_64_reg_class class[MAX_CLASSES];
2882 int n = classify_argument (mode, type, class, 0);
2884 *int_nregs = 0;
2885 *sse_nregs = 0;
2886 if (!n)
2887 return 0;
2888 for (n--; n >= 0; n--)
2889 switch (class[n])
2891 case X86_64_INTEGER_CLASS:
2892 case X86_64_INTEGERSI_CLASS:
2893 (*int_nregs)++;
2894 break;
2895 case X86_64_SSE_CLASS:
2896 case X86_64_SSESF_CLASS:
2897 case X86_64_SSEDF_CLASS:
2898 (*sse_nregs)++;
2899 break;
2900 case X86_64_NO_CLASS:
2901 case X86_64_SSEUP_CLASS:
2902 break;
2903 case X86_64_X87_CLASS:
2904 case X86_64_X87UP_CLASS:
2905 if (!in_return)
2906 return 0;
2907 break;
2908 case X86_64_COMPLEX_X87_CLASS:
2909 return in_return ? 2 : 0;
2910 case X86_64_MEMORY_CLASS:
2911 gcc_unreachable ();
2913 return 1;
2916 /* Construct container for the argument used by GCC interface. See
2917 FUNCTION_ARG for the detailed description. */
2919 static rtx
2920 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2921 tree type, int in_return, int nintregs, int nsseregs,
2922 const int *intreg, int sse_regno)
2924 /* The following variables hold the static issued_error state. */
2925 static bool issued_sse_arg_error;
2926 static bool issued_sse_ret_error;
2927 static bool issued_x87_ret_error;
2929 enum machine_mode tmpmode;
2930 int bytes =
2931 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2932 enum x86_64_reg_class class[MAX_CLASSES];
2933 int n;
2934 int i;
2935 int nexps = 0;
2936 int needed_sseregs, needed_intregs;
2937 rtx exp[MAX_CLASSES];
2938 rtx ret;
2940 n = classify_argument (mode, type, class, 0);
2941 if (TARGET_DEBUG_ARG)
2943 if (!n)
2944 fprintf (stderr, "Memory class\n");
2945 else
2947 fprintf (stderr, "Classes:");
2948 for (i = 0; i < n; i++)
2950 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2952 fprintf (stderr, "\n");
2955 if (!n)
2956 return NULL;
2957 if (!examine_argument (mode, type, in_return, &needed_intregs,
2958 &needed_sseregs))
2959 return NULL;
2960 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2961 return NULL;
2963 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2964 some less clueful developer tries to use floating-point anyway. */
2965 if (needed_sseregs && !TARGET_SSE)
2967 if (in_return)
2969 if (!issued_sse_ret_error)
2971 error ("SSE register return with SSE disabled");
2972 issued_sse_ret_error = true;
2975 else if (!issued_sse_arg_error)
2977 error ("SSE register argument with SSE disabled");
2978 issued_sse_arg_error = true;
2980 return NULL;
2983 /* Likewise, error if the ABI requires us to return values in the
2984 x87 registers and the user specified -mno-80387. */
2985 if (!TARGET_80387 && in_return)
2986 for (i = 0; i < n; i++)
2987 if (class[i] == X86_64_X87_CLASS
2988 || class[i] == X86_64_X87UP_CLASS
2989 || class[i] == X86_64_COMPLEX_X87_CLASS)
2991 if (!issued_x87_ret_error)
2993 error ("x87 register return with x87 disabled");
2994 issued_x87_ret_error = true;
2996 return NULL;
2999 /* First construct simple cases. Avoid SCmode, since we want to use
3000 single register to pass this type. */
3001 if (n == 1 && mode != SCmode)
3002 switch (class[0])
3004 case X86_64_INTEGER_CLASS:
3005 case X86_64_INTEGERSI_CLASS:
3006 return gen_rtx_REG (mode, intreg[0]);
3007 case X86_64_SSE_CLASS:
3008 case X86_64_SSESF_CLASS:
3009 case X86_64_SSEDF_CLASS:
3010 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3011 case X86_64_X87_CLASS:
3012 case X86_64_COMPLEX_X87_CLASS:
3013 return gen_rtx_REG (mode, FIRST_STACK_REG);
3014 case X86_64_NO_CLASS:
3015 /* Zero sized array, struct or class. */
3016 return NULL;
3017 default:
3018 gcc_unreachable ();
3020 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3021 && mode != BLKmode)
3022 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3023 if (n == 2
3024 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3025 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3026 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3027 && class[1] == X86_64_INTEGER_CLASS
3028 && (mode == CDImode || mode == TImode || mode == TFmode)
3029 && intreg[0] + 1 == intreg[1])
3030 return gen_rtx_REG (mode, intreg[0]);
3032 /* Otherwise figure out the entries of the PARALLEL. */
3033 for (i = 0; i < n; i++)
3035 switch (class[i])
3037 case X86_64_NO_CLASS:
3038 break;
3039 case X86_64_INTEGER_CLASS:
3040 case X86_64_INTEGERSI_CLASS:
3041 /* Merge TImodes on aligned occasions here too. */
3042 if (i * 8 + 8 > bytes)
3043 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3044 else if (class[i] == X86_64_INTEGERSI_CLASS)
3045 tmpmode = SImode;
3046 else
3047 tmpmode = DImode;
3048 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3049 if (tmpmode == BLKmode)
3050 tmpmode = DImode;
3051 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3052 gen_rtx_REG (tmpmode, *intreg),
3053 GEN_INT (i*8));
3054 intreg++;
3055 break;
3056 case X86_64_SSESF_CLASS:
3057 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3058 gen_rtx_REG (SFmode,
3059 SSE_REGNO (sse_regno)),
3060 GEN_INT (i*8));
3061 sse_regno++;
3062 break;
3063 case X86_64_SSEDF_CLASS:
3064 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3065 gen_rtx_REG (DFmode,
3066 SSE_REGNO (sse_regno)),
3067 GEN_INT (i*8));
3068 sse_regno++;
3069 break;
3070 case X86_64_SSE_CLASS:
3071 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3072 tmpmode = TImode;
3073 else
3074 tmpmode = DImode;
3075 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3076 gen_rtx_REG (tmpmode,
3077 SSE_REGNO (sse_regno)),
3078 GEN_INT (i*8));
3079 if (tmpmode == TImode)
3080 i++;
3081 sse_regno++;
3082 break;
3083 default:
3084 gcc_unreachable ();
3088 /* Empty aligned struct, union or class. */
3089 if (nexps == 0)
3090 return NULL;
3092 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3093 for (i = 0; i < nexps; i++)
3094 XVECEXP (ret, 0, i) = exp [i];
3095 return ret;
3098 /* Update the data in CUM to advance over an argument
3099 of mode MODE and data type TYPE.
3100 (TYPE is null for libcalls where that information may not be available.) */
3102 void
3103 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3104 tree type, int named)
3106 int bytes =
3107 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3108 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3110 if (type)
3111 mode = type_natural_mode (type);
3113 if (TARGET_DEBUG_ARG)
3114 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3115 "mode=%s, named=%d)\n\n",
3116 words, cum->words, cum->nregs, cum->sse_nregs,
3117 GET_MODE_NAME (mode), named);
3119 if (TARGET_64BIT)
3121 int int_nregs, sse_nregs;
3122 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3123 cum->words += words;
3124 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3126 cum->nregs -= int_nregs;
3127 cum->sse_nregs -= sse_nregs;
3128 cum->regno += int_nregs;
3129 cum->sse_regno += sse_nregs;
3131 else
3132 cum->words += words;
3134 else
3136 switch (mode)
3138 default:
3139 break;
3141 case BLKmode:
3142 if (bytes < 0)
3143 break;
3144 /* FALLTHRU */
3146 case DImode:
3147 case SImode:
3148 case HImode:
3149 case QImode:
3150 cum->words += words;
3151 cum->nregs -= words;
3152 cum->regno += words;
3154 if (cum->nregs <= 0)
3156 cum->nregs = 0;
3157 cum->regno = 0;
3159 break;
3161 case DFmode:
3162 if (cum->float_in_sse < 2)
3163 break;
3164 case SFmode:
3165 if (cum->float_in_sse < 1)
3166 break;
3167 /* FALLTHRU */
3169 case TImode:
3170 case V16QImode:
3171 case V8HImode:
3172 case V4SImode:
3173 case V2DImode:
3174 case V4SFmode:
3175 case V2DFmode:
3176 if (!type || !AGGREGATE_TYPE_P (type))
3178 cum->sse_words += words;
3179 cum->sse_nregs -= 1;
3180 cum->sse_regno += 1;
3181 if (cum->sse_nregs <= 0)
3183 cum->sse_nregs = 0;
3184 cum->sse_regno = 0;
3187 break;
3189 case V8QImode:
3190 case V4HImode:
3191 case V2SImode:
3192 case V2SFmode:
3193 if (!type || !AGGREGATE_TYPE_P (type))
3195 cum->mmx_words += words;
3196 cum->mmx_nregs -= 1;
3197 cum->mmx_regno += 1;
3198 if (cum->mmx_nregs <= 0)
3200 cum->mmx_nregs = 0;
3201 cum->mmx_regno = 0;
3204 break;
3209 /* Define where to put the arguments to a function.
3210 Value is zero to push the argument on the stack,
3211 or a hard register in which to store the argument.
3213 MODE is the argument's machine mode.
3214 TYPE is the data type of the argument (as a tree).
3215 This is null for libcalls where that information may
3216 not be available.
3217 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3218 the preceding args and about the function being called.
3219 NAMED is nonzero if this argument is a named parameter
3220 (otherwise it is an extra parameter matching an ellipsis). */
3223 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3224 tree type, int named)
3226 enum machine_mode mode = orig_mode;
3227 rtx ret = NULL_RTX;
3228 int bytes =
3229 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3230 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3231 static bool warnedsse, warnedmmx;
3233 /* To simplify the code below, represent vector types with a vector mode
3234 even if MMX/SSE are not active. */
3235 if (type && TREE_CODE (type) == VECTOR_TYPE)
3236 mode = type_natural_mode (type);
3238 /* Handle a hidden AL argument containing number of registers for varargs
3239 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3240 any AL settings. */
3241 if (mode == VOIDmode)
3243 if (TARGET_64BIT)
3244 return GEN_INT (cum->maybe_vaarg
3245 ? (cum->sse_nregs < 0
3246 ? SSE_REGPARM_MAX
3247 : cum->sse_regno)
3248 : -1);
3249 else
3250 return constm1_rtx;
3252 if (TARGET_64BIT)
3253 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3254 cum->sse_nregs,
3255 &x86_64_int_parameter_registers [cum->regno],
3256 cum->sse_regno);
3257 else
3258 switch (mode)
3260 /* For now, pass fp/complex values on the stack. */
3261 default:
3262 break;
3264 case BLKmode:
3265 if (bytes < 0)
3266 break;
3267 /* FALLTHRU */
3268 case DImode:
3269 case SImode:
3270 case HImode:
3271 case QImode:
3272 if (words <= cum->nregs)
3274 int regno = cum->regno;
3276 /* Fastcall allocates the first two DWORD (SImode) or
3277 smaller arguments to ECX and EDX. */
3278 if (cum->fastcall)
3280 if (mode == BLKmode || mode == DImode)
3281 break;
3283 /* ECX not EAX is the first allocated register. */
3284 if (regno == 0)
3285 regno = 2;
3287 ret = gen_rtx_REG (mode, regno);
3289 break;
3290 case DFmode:
3291 if (cum->float_in_sse < 2)
3292 break;
3293 case SFmode:
3294 if (cum->float_in_sse < 1)
3295 break;
3296 /* FALLTHRU */
3297 case TImode:
3298 case V16QImode:
3299 case V8HImode:
3300 case V4SImode:
3301 case V2DImode:
3302 case V4SFmode:
3303 case V2DFmode:
3304 if (!type || !AGGREGATE_TYPE_P (type))
3306 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3308 warnedsse = true;
3309 warning (0, "SSE vector argument without SSE enabled "
3310 "changes the ABI");
3312 if (cum->sse_nregs)
3313 ret = gen_reg_or_parallel (mode, orig_mode,
3314 cum->sse_regno + FIRST_SSE_REG);
3316 break;
3317 case V8QImode:
3318 case V4HImode:
3319 case V2SImode:
3320 case V2SFmode:
3321 if (!type || !AGGREGATE_TYPE_P (type))
3323 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3325 warnedmmx = true;
3326 warning (0, "MMX vector argument without MMX enabled "
3327 "changes the ABI");
3329 if (cum->mmx_nregs)
3330 ret = gen_reg_or_parallel (mode, orig_mode,
3331 cum->mmx_regno + FIRST_MMX_REG);
3333 break;
3336 if (TARGET_DEBUG_ARG)
3338 fprintf (stderr,
3339 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3340 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3342 if (ret)
3343 print_simple_rtl (stderr, ret);
3344 else
3345 fprintf (stderr, ", stack");
3347 fprintf (stderr, " )\n");
3350 return ret;
3353 /* A C expression that indicates when an argument must be passed by
3354 reference. If nonzero for an argument, a copy of that argument is
3355 made in memory and a pointer to the argument is passed instead of
3356 the argument itself. The pointer is passed in whatever way is
3357 appropriate for passing a pointer to that type. */
3359 static bool
3360 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3361 enum machine_mode mode ATTRIBUTE_UNUSED,
3362 tree type, bool named ATTRIBUTE_UNUSED)
3364 if (!TARGET_64BIT)
3365 return 0;
3367 if (type && int_size_in_bytes (type) == -1)
3369 if (TARGET_DEBUG_ARG)
3370 fprintf (stderr, "function_arg_pass_by_reference\n");
3371 return 1;
3374 return 0;
3377 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3378 ABI. Only called if TARGET_SSE. */
3379 static bool
3380 contains_128bit_aligned_vector_p (tree type)
3382 enum machine_mode mode = TYPE_MODE (type);
3383 if (SSE_REG_MODE_P (mode)
3384 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3385 return true;
3386 if (TYPE_ALIGN (type) < 128)
3387 return false;
3389 if (AGGREGATE_TYPE_P (type))
3391 /* Walk the aggregates recursively. */
3392 switch (TREE_CODE (type))
3394 case RECORD_TYPE:
3395 case UNION_TYPE:
3396 case QUAL_UNION_TYPE:
3398 tree field;
3400 if (TYPE_BINFO (type))
3402 tree binfo, base_binfo;
3403 int i;
3405 for (binfo = TYPE_BINFO (type), i = 0;
3406 BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3407 if (contains_128bit_aligned_vector_p
3408 (BINFO_TYPE (base_binfo)))
3409 return true;
3411 /* And now merge the fields of structure. */
3412 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3414 if (TREE_CODE (field) == FIELD_DECL
3415 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3416 return true;
3418 break;
3421 case ARRAY_TYPE:
3422 /* Just for use if some languages passes arrays by value. */
3423 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3424 return true;
3425 break;
3427 default:
3428 gcc_unreachable ();
3431 return false;
3434 /* Gives the alignment boundary, in bits, of an argument with the
3435 specified mode and type. */
3438 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3440 int align;
3441 if (type)
3442 align = TYPE_ALIGN (type);
3443 else
3444 align = GET_MODE_ALIGNMENT (mode);
3445 if (align < PARM_BOUNDARY)
3446 align = PARM_BOUNDARY;
3447 if (!TARGET_64BIT)
3449 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3450 make an exception for SSE modes since these require 128bit
3451 alignment.
3453 The handling here differs from field_alignment. ICC aligns MMX
3454 arguments to 4 byte boundaries, while structure fields are aligned
3455 to 8 byte boundaries. */
3456 if (!TARGET_SSE)
3457 align = PARM_BOUNDARY;
3458 else if (!type)
3460 if (!SSE_REG_MODE_P (mode))
3461 align = PARM_BOUNDARY;
3463 else
3465 if (!contains_128bit_aligned_vector_p (type))
3466 align = PARM_BOUNDARY;
3469 if (align > 128)
3470 align = 128;
3471 return align;
3474 /* Return true if N is a possible register number of function value. */
3475 bool
3476 ix86_function_value_regno_p (int regno)
3478 if (regno == 0
3479 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3480 || (regno == FIRST_SSE_REG && TARGET_SSE))
3481 return true;
3483 if (!TARGET_64BIT
3484 && (regno == FIRST_MMX_REG && TARGET_MMX))
3485 return true;
3487 return false;
3490 /* Define how to find the value returned by a function.
3491 VALTYPE is the data type of the value (as a tree).
3492 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3493 otherwise, FUNC is 0. */
3495 ix86_function_value (tree valtype, tree fntype_or_decl,
3496 bool outgoing ATTRIBUTE_UNUSED)
3498 enum machine_mode natmode = type_natural_mode (valtype);
3500 if (TARGET_64BIT)
3502 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3503 1, REGPARM_MAX, SSE_REGPARM_MAX,
3504 x86_64_int_return_registers, 0);
3505 /* For zero sized structures, construct_container return NULL, but we
3506 need to keep rest of compiler happy by returning meaningful value. */
3507 if (!ret)
3508 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3509 return ret;
3511 else
3513 tree fn = NULL_TREE, fntype;
3514 if (fntype_or_decl
3515 && DECL_P (fntype_or_decl))
3516 fn = fntype_or_decl;
3517 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3518 return gen_rtx_REG (TYPE_MODE (valtype),
3519 ix86_value_regno (natmode, fn, fntype));
3523 /* Return false iff type is returned in memory. */
3525 ix86_return_in_memory (tree type)
3527 int needed_intregs, needed_sseregs, size;
3528 enum machine_mode mode = type_natural_mode (type);
3530 if (TARGET_64BIT)
3531 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3533 if (mode == BLKmode)
3534 return 1;
3536 size = int_size_in_bytes (type);
3538 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3539 return 0;
3541 if (VECTOR_MODE_P (mode) || mode == TImode)
3543 /* User-created vectors small enough to fit in EAX. */
3544 if (size < 8)
3545 return 0;
3547 /* MMX/3dNow values are returned in MM0,
3548 except when it doesn't exits. */
3549 if (size == 8)
3550 return (TARGET_MMX ? 0 : 1);
3552 /* SSE values are returned in XMM0, except when it doesn't exist. */
3553 if (size == 16)
3554 return (TARGET_SSE ? 0 : 1);
3557 if (mode == XFmode)
3558 return 0;
3560 if (size > 12)
3561 return 1;
3562 return 0;
3565 /* When returning SSE vector types, we have a choice of either
3566 (1) being abi incompatible with a -march switch, or
3567 (2) generating an error.
3568 Given no good solution, I think the safest thing is one warning.
3569 The user won't be able to use -Werror, but....
3571 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3572 called in response to actually generating a caller or callee that
3573 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
3574 via aggregate_value_p for general type probing from tree-ssa. */
3576 static rtx
3577 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3579 static bool warnedsse, warnedmmx;
3581 if (type)
3583 /* Look at the return type of the function, not the function type. */
3584 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3586 if (!TARGET_SSE && !warnedsse)
3588 if (mode == TImode
3589 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3591 warnedsse = true;
3592 warning (0, "SSE vector return without SSE enabled "
3593 "changes the ABI");
3597 if (!TARGET_MMX && !warnedmmx)
3599 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3601 warnedmmx = true;
3602 warning (0, "MMX vector return without MMX enabled "
3603 "changes the ABI");
3608 return NULL;
3611 /* Define how to find the value returned by a library function
3612 assuming the value has mode MODE. */
3614 ix86_libcall_value (enum machine_mode mode)
3616 if (TARGET_64BIT)
3618 switch (mode)
3620 case SFmode:
3621 case SCmode:
3622 case DFmode:
3623 case DCmode:
3624 case TFmode:
3625 return gen_rtx_REG (mode, FIRST_SSE_REG);
3626 case XFmode:
3627 case XCmode:
3628 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3629 case TCmode:
3630 return NULL;
3631 default:
3632 return gen_rtx_REG (mode, 0);
3635 else
3636 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3639 /* Given a mode, return the register to use for a return value. */
3641 static int
3642 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3644 gcc_assert (!TARGET_64BIT);
3646 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3647 we prevent this case when mmx is not available. */
3648 if ((VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8))
3649 return FIRST_MMX_REG;
3651 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3652 we prevent this case when sse is not available. */
3653 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3654 return FIRST_SSE_REG;
3656 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
3657 if (GET_MODE_CLASS (mode) != MODE_FLOAT || !TARGET_FLOAT_RETURNS_IN_80387)
3658 return 0;
3660 /* Floating point return values in %st(0), except for local functions when
3661 SSE math is enabled or for functions with sseregparm attribute. */
3662 if ((func || fntype)
3663 && (mode == SFmode || mode == DFmode))
3665 int sse_level = ix86_function_sseregparm (fntype, func);
3666 if ((sse_level >= 1 && mode == SFmode)
3667 || (sse_level == 2 && mode == DFmode))
3668 return FIRST_SSE_REG;
3671 return FIRST_FLOAT_REG;
3674 /* Create the va_list data type. */
3676 static tree
3677 ix86_build_builtin_va_list (void)
3679 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3681 /* For i386 we use plain pointer to argument area. */
3682 if (!TARGET_64BIT)
3683 return build_pointer_type (char_type_node);
3685 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3686 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3688 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3689 unsigned_type_node);
3690 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3691 unsigned_type_node);
3692 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3693 ptr_type_node);
3694 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3695 ptr_type_node);
3697 va_list_gpr_counter_field = f_gpr;
3698 va_list_fpr_counter_field = f_fpr;
3700 DECL_FIELD_CONTEXT (f_gpr) = record;
3701 DECL_FIELD_CONTEXT (f_fpr) = record;
3702 DECL_FIELD_CONTEXT (f_ovf) = record;
3703 DECL_FIELD_CONTEXT (f_sav) = record;
3705 TREE_CHAIN (record) = type_decl;
3706 TYPE_NAME (record) = type_decl;
3707 TYPE_FIELDS (record) = f_gpr;
3708 TREE_CHAIN (f_gpr) = f_fpr;
3709 TREE_CHAIN (f_fpr) = f_ovf;
3710 TREE_CHAIN (f_ovf) = f_sav;
3712 layout_type (record);
3714 /* The correct type is an array type of one element. */
3715 return build_array_type (record, build_index_type (size_zero_node));
3718 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3720 static void
3721 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3722 tree type, int *pretend_size ATTRIBUTE_UNUSED,
3723 int no_rtl)
3725 CUMULATIVE_ARGS next_cum;
3726 rtx save_area = NULL_RTX, mem;
3727 rtx label;
3728 rtx label_ref;
3729 rtx tmp_reg;
3730 rtx nsse_reg;
3731 int set;
3732 tree fntype;
3733 int stdarg_p;
3734 int i;
3736 if (!TARGET_64BIT)
3737 return;
3739 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
3740 return;
3742 /* Indicate to allocate space on the stack for varargs save area. */
3743 ix86_save_varrargs_registers = 1;
3745 cfun->stack_alignment_needed = 128;
3747 fntype = TREE_TYPE (current_function_decl);
3748 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3749 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3750 != void_type_node));
3752 /* For varargs, we do not want to skip the dummy va_dcl argument.
3753 For stdargs, we do want to skip the last named argument. */
3754 next_cum = *cum;
3755 if (stdarg_p)
3756 function_arg_advance (&next_cum, mode, type, 1);
3758 if (!no_rtl)
3759 save_area = frame_pointer_rtx;
3761 set = get_varargs_alias_set ();
3763 for (i = next_cum.regno;
3764 i < ix86_regparm
3765 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
3766 i++)
3768 mem = gen_rtx_MEM (Pmode,
3769 plus_constant (save_area, i * UNITS_PER_WORD));
3770 MEM_NOTRAP_P (mem) = 1;
3771 set_mem_alias_set (mem, set);
3772 emit_move_insn (mem, gen_rtx_REG (Pmode,
3773 x86_64_int_parameter_registers[i]));
3776 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
3778 /* Now emit code to save SSE registers. The AX parameter contains number
3779 of SSE parameter registers used to call this function. We use
3780 sse_prologue_save insn template that produces computed jump across
3781 SSE saves. We need some preparation work to get this working. */
3783 label = gen_label_rtx ();
3784 label_ref = gen_rtx_LABEL_REF (Pmode, label);
3786 /* Compute address to jump to :
3787 label - 5*eax + nnamed_sse_arguments*5 */
3788 tmp_reg = gen_reg_rtx (Pmode);
3789 nsse_reg = gen_reg_rtx (Pmode);
3790 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3791 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3792 gen_rtx_MULT (Pmode, nsse_reg,
3793 GEN_INT (4))));
3794 if (next_cum.sse_regno)
3795 emit_move_insn
3796 (nsse_reg,
3797 gen_rtx_CONST (DImode,
3798 gen_rtx_PLUS (DImode,
3799 label_ref,
3800 GEN_INT (next_cum.sse_regno * 4))));
3801 else
3802 emit_move_insn (nsse_reg, label_ref);
3803 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3805 /* Compute address of memory block we save into. We always use pointer
3806 pointing 127 bytes after first byte to store - this is needed to keep
3807 instruction size limited by 4 bytes. */
3808 tmp_reg = gen_reg_rtx (Pmode);
3809 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3810 plus_constant (save_area,
3811 8 * REGPARM_MAX + 127)));
3812 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3813 MEM_NOTRAP_P (mem) = 1;
3814 set_mem_alias_set (mem, set);
3815 set_mem_align (mem, BITS_PER_WORD);
3817 /* And finally do the dirty job! */
3818 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3819 GEN_INT (next_cum.sse_regno), label));
3824 /* Implement va_start. */
3826 void
3827 ix86_va_start (tree valist, rtx nextarg)
3829 HOST_WIDE_INT words, n_gpr, n_fpr;
3830 tree f_gpr, f_fpr, f_ovf, f_sav;
3831 tree gpr, fpr, ovf, sav, t;
3833 /* Only 64bit target needs something special. */
3834 if (!TARGET_64BIT)
3836 std_expand_builtin_va_start (valist, nextarg);
3837 return;
3840 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3841 f_fpr = TREE_CHAIN (f_gpr);
3842 f_ovf = TREE_CHAIN (f_fpr);
3843 f_sav = TREE_CHAIN (f_ovf);
3845 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3846 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3847 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3848 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3849 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3851 /* Count number of gp and fp argument registers used. */
3852 words = current_function_args_info.words;
3853 n_gpr = current_function_args_info.regno;
3854 n_fpr = current_function_args_info.sse_regno;
3856 if (TARGET_DEBUG_ARG)
3857 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3858 (int) words, (int) n_gpr, (int) n_fpr);
3860 if (cfun->va_list_gpr_size)
3862 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3863 build_int_cst (NULL_TREE, n_gpr * 8));
3864 TREE_SIDE_EFFECTS (t) = 1;
3865 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3868 if (cfun->va_list_fpr_size)
3870 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3871 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3872 TREE_SIDE_EFFECTS (t) = 1;
3873 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3876 /* Find the overflow area. */
3877 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3878 if (words != 0)
3879 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3880 build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3881 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3882 TREE_SIDE_EFFECTS (t) = 1;
3883 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3885 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
3887 /* Find the register save area.
3888 Prologue of the function save it right above stack frame. */
3889 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3890 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3891 TREE_SIDE_EFFECTS (t) = 1;
3892 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3896 /* Implement va_arg. */
3898 tree
3899 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3901 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3902 tree f_gpr, f_fpr, f_ovf, f_sav;
3903 tree gpr, fpr, ovf, sav, t;
3904 int size, rsize;
3905 tree lab_false, lab_over = NULL_TREE;
3906 tree addr, t2;
3907 rtx container;
3908 int indirect_p = 0;
3909 tree ptrtype;
3910 enum machine_mode nat_mode;
3912 /* Only 64bit target needs something special. */
3913 if (!TARGET_64BIT)
3914 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3916 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3917 f_fpr = TREE_CHAIN (f_gpr);
3918 f_ovf = TREE_CHAIN (f_fpr);
3919 f_sav = TREE_CHAIN (f_ovf);
3921 valist = build_va_arg_indirect_ref (valist);
3922 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3923 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3924 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3925 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3927 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3928 if (indirect_p)
3929 type = build_pointer_type (type);
3930 size = int_size_in_bytes (type);
3931 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3933 nat_mode = type_natural_mode (type);
3934 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3935 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3937 /* Pull the value out of the saved registers. */
3939 addr = create_tmp_var (ptr_type_node, "addr");
3940 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3942 if (container)
3944 int needed_intregs, needed_sseregs;
3945 bool need_temp;
3946 tree int_addr, sse_addr;
3948 lab_false = create_artificial_label ();
3949 lab_over = create_artificial_label ();
3951 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3953 need_temp = (!REG_P (container)
3954 && ((needed_intregs && TYPE_ALIGN (type) > 64)
3955 || TYPE_ALIGN (type) > 128));
3957 /* In case we are passing structure, verify that it is consecutive block
3958 on the register save area. If not we need to do moves. */
3959 if (!need_temp && !REG_P (container))
3961 /* Verify that all registers are strictly consecutive */
3962 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3964 int i;
3966 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3968 rtx slot = XVECEXP (container, 0, i);
3969 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3970 || INTVAL (XEXP (slot, 1)) != i * 16)
3971 need_temp = 1;
3974 else
3976 int i;
3978 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3980 rtx slot = XVECEXP (container, 0, i);
3981 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3982 || INTVAL (XEXP (slot, 1)) != i * 8)
3983 need_temp = 1;
3987 if (!need_temp)
3989 int_addr = addr;
3990 sse_addr = addr;
3992 else
3994 int_addr = create_tmp_var (ptr_type_node, "int_addr");
3995 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3996 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3997 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4000 /* First ensure that we fit completely in registers. */
4001 if (needed_intregs)
4003 t = build_int_cst (TREE_TYPE (gpr),
4004 (REGPARM_MAX - needed_intregs + 1) * 8);
4005 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4006 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4007 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4008 gimplify_and_add (t, pre_p);
4010 if (needed_sseregs)
4012 t = build_int_cst (TREE_TYPE (fpr),
4013 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4014 + REGPARM_MAX * 8);
4015 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4016 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4017 t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4018 gimplify_and_add (t, pre_p);
4021 /* Compute index to start of area used for integer regs. */
4022 if (needed_intregs)
4024 /* int_addr = gpr + sav; */
4025 t = fold_convert (ptr_type_node, gpr);
4026 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4027 t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4028 gimplify_and_add (t, pre_p);
4030 if (needed_sseregs)
4032 /* sse_addr = fpr + sav; */
4033 t = fold_convert (ptr_type_node, fpr);
4034 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4035 t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4036 gimplify_and_add (t, pre_p);
4038 if (need_temp)
4040 int i;
4041 tree temp = create_tmp_var (type, "va_arg_tmp");
4043 /* addr = &temp; */
4044 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4045 t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4046 gimplify_and_add (t, pre_p);
4048 for (i = 0; i < XVECLEN (container, 0); i++)
4050 rtx slot = XVECEXP (container, 0, i);
4051 rtx reg = XEXP (slot, 0);
4052 enum machine_mode mode = GET_MODE (reg);
4053 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4054 tree addr_type = build_pointer_type (piece_type);
4055 tree src_addr, src;
4056 int src_offset;
4057 tree dest_addr, dest;
4059 if (SSE_REGNO_P (REGNO (reg)))
4061 src_addr = sse_addr;
4062 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4064 else
4066 src_addr = int_addr;
4067 src_offset = REGNO (reg) * 8;
4069 src_addr = fold_convert (addr_type, src_addr);
4070 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4071 size_int (src_offset)));
4072 src = build_va_arg_indirect_ref (src_addr);
4074 dest_addr = fold_convert (addr_type, addr);
4075 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4076 size_int (INTVAL (XEXP (slot, 1)))));
4077 dest = build_va_arg_indirect_ref (dest_addr);
4079 t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4080 gimplify_and_add (t, pre_p);
4084 if (needed_intregs)
4086 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4087 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4088 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4089 gimplify_and_add (t, pre_p);
4091 if (needed_sseregs)
4093 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4094 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4095 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4096 gimplify_and_add (t, pre_p);
4099 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4100 gimplify_and_add (t, pre_p);
4102 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4103 append_to_statement_list (t, pre_p);
4106 /* ... otherwise out of the overflow area. */
4108 /* Care for on-stack alignment if needed. */
4109 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4110 || integer_zerop (TYPE_SIZE (type)))
4111 t = ovf;
4112 else
4114 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4115 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4116 build_int_cst (TREE_TYPE (ovf), align - 1));
4117 t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
4118 build_int_cst (TREE_TYPE (t), -align));
4120 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4122 t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4123 gimplify_and_add (t2, pre_p);
4125 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4126 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4127 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4128 gimplify_and_add (t, pre_p);
4130 if (container)
4132 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4133 append_to_statement_list (t, pre_p);
4136 ptrtype = build_pointer_type (type);
4137 addr = fold_convert (ptrtype, addr);
4139 if (indirect_p)
4140 addr = build_va_arg_indirect_ref (addr);
4141 return build_va_arg_indirect_ref (addr);
4144 /* Return nonzero if OPNUM's MEM should be matched
4145 in movabs* patterns. */
4148 ix86_check_movabs (rtx insn, int opnum)
4150 rtx set, mem;
4152 set = PATTERN (insn);
4153 if (GET_CODE (set) == PARALLEL)
4154 set = XVECEXP (set, 0, 0);
4155 gcc_assert (GET_CODE (set) == SET);
4156 mem = XEXP (set, opnum);
4157 while (GET_CODE (mem) == SUBREG)
4158 mem = SUBREG_REG (mem);
4159 gcc_assert (GET_CODE (mem) == MEM);
4160 return (volatile_ok || !MEM_VOLATILE_P (mem));
4163 /* Initialize the table of extra 80387 mathematical constants. */
4165 static void
4166 init_ext_80387_constants (void)
4168 static const char * cst[5] =
4170 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4171 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4172 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4173 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4174 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4176 int i;
4178 for (i = 0; i < 5; i++)
4180 real_from_string (&ext_80387_constants_table[i], cst[i]);
4181 /* Ensure each constant is rounded to XFmode precision. */
4182 real_convert (&ext_80387_constants_table[i],
4183 XFmode, &ext_80387_constants_table[i]);
4186 ext_80387_constants_init = 1;
4189 /* Return true if the constant is something that can be loaded with
4190 a special instruction. */
4193 standard_80387_constant_p (rtx x)
4195 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4196 return -1;
4198 if (x == CONST0_RTX (GET_MODE (x)))
4199 return 1;
4200 if (x == CONST1_RTX (GET_MODE (x)))
4201 return 2;
4203 /* For XFmode constants, try to find a special 80387 instruction when
4204 optimizing for size or on those CPUs that benefit from them. */
4205 if (GET_MODE (x) == XFmode
4206 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4208 REAL_VALUE_TYPE r;
4209 int i;
4211 if (! ext_80387_constants_init)
4212 init_ext_80387_constants ();
4214 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4215 for (i = 0; i < 5; i++)
4216 if (real_identical (&r, &ext_80387_constants_table[i]))
4217 return i + 3;
4220 return 0;
4223 /* Return the opcode of the special instruction to be used to load
4224 the constant X. */
4226 const char *
4227 standard_80387_constant_opcode (rtx x)
4229 switch (standard_80387_constant_p (x))
4231 case 1:
4232 return "fldz";
4233 case 2:
4234 return "fld1";
4235 case 3:
4236 return "fldlg2";
4237 case 4:
4238 return "fldln2";
4239 case 5:
4240 return "fldl2e";
4241 case 6:
4242 return "fldl2t";
4243 case 7:
4244 return "fldpi";
4245 default:
4246 gcc_unreachable ();
4250 /* Return the CONST_DOUBLE representing the 80387 constant that is
4251 loaded by the specified special instruction. The argument IDX
4252 matches the return value from standard_80387_constant_p. */
4255 standard_80387_constant_rtx (int idx)
4257 int i;
4259 if (! ext_80387_constants_init)
4260 init_ext_80387_constants ();
4262 switch (idx)
4264 case 3:
4265 case 4:
4266 case 5:
4267 case 6:
4268 case 7:
4269 i = idx - 3;
4270 break;
4272 default:
4273 gcc_unreachable ();
4276 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4277 XFmode);
4280 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4283 standard_sse_constant_p (rtx x)
4285 if (x == const0_rtx)
4286 return 1;
4287 return (x == CONST0_RTX (GET_MODE (x)));
4290 /* Returns 1 if OP contains a symbol reference */
4293 symbolic_reference_mentioned_p (rtx op)
4295 const char *fmt;
4296 int i;
4298 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4299 return 1;
4301 fmt = GET_RTX_FORMAT (GET_CODE (op));
4302 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4304 if (fmt[i] == 'E')
4306 int j;
4308 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4309 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4310 return 1;
4313 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4314 return 1;
4317 return 0;
4320 /* Return 1 if it is appropriate to emit `ret' instructions in the
4321 body of a function. Do this only if the epilogue is simple, needing a
4322 couple of insns. Prior to reloading, we can't tell how many registers
4323 must be saved, so return 0 then. Return 0 if there is no frame
4324 marker to de-allocate. */
4327 ix86_can_use_return_insn_p (void)
4329 struct ix86_frame frame;
4331 if (! reload_completed || frame_pointer_needed)
4332 return 0;
4334 /* Don't allow more than 32 pop, since that's all we can do
4335 with one instruction. */
4336 if (current_function_pops_args
4337 && current_function_args_size >= 32768)
4338 return 0;
4340 ix86_compute_frame_layout (&frame);
4341 return frame.to_allocate == 0 && frame.nregs == 0;
4344 /* Value should be nonzero if functions must have frame pointers.
4345 Zero means the frame pointer need not be set up (and parms may
4346 be accessed via the stack pointer) in functions that seem suitable. */
4349 ix86_frame_pointer_required (void)
4351 /* If we accessed previous frames, then the generated code expects
4352 to be able to access the saved ebp value in our frame. */
4353 if (cfun->machine->accesses_prev_frame)
4354 return 1;
4356 /* Several x86 os'es need a frame pointer for other reasons,
4357 usually pertaining to setjmp. */
4358 if (SUBTARGET_FRAME_POINTER_REQUIRED)
4359 return 1;
4361 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4362 the frame pointer by default. Turn it back on now if we've not
4363 got a leaf function. */
4364 if (TARGET_OMIT_LEAF_FRAME_POINTER
4365 && (!current_function_is_leaf))
4366 return 1;
4368 if (current_function_profile)
4369 return 1;
4371 return 0;
4374 /* Record that the current function accesses previous call frames. */
4376 void
4377 ix86_setup_frame_addresses (void)
4379 cfun->machine->accesses_prev_frame = 1;
4382 #if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
4383 # define USE_HIDDEN_LINKONCE 1
4384 #else
4385 # define USE_HIDDEN_LINKONCE 0
4386 #endif
4388 static int pic_labels_used;
4390 /* Fills in the label name that should be used for a pc thunk for
4391 the given register. */
4393 static void
4394 get_pc_thunk_name (char name[32], unsigned int regno)
4396 if (USE_HIDDEN_LINKONCE)
4397 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4398 else
4399 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4403 /* This function generates code for -fpic that loads %ebx with
4404 the return address of the caller and then returns. */
4406 void
4407 ix86_file_end (void)
4409 rtx xops[2];
4410 int regno;
4412 for (regno = 0; regno < 8; ++regno)
4414 char name[32];
4416 if (! ((pic_labels_used >> regno) & 1))
4417 continue;
4419 get_pc_thunk_name (name, regno);
4421 if (USE_HIDDEN_LINKONCE)
4423 tree decl;
4425 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4426 error_mark_node);
4427 TREE_PUBLIC (decl) = 1;
4428 TREE_STATIC (decl) = 1;
4429 DECL_ONE_ONLY (decl) = 1;
4431 (*targetm.asm_out.unique_section) (decl, 0);
4432 named_section (decl, NULL, 0);
4434 (*targetm.asm_out.globalize_label) (asm_out_file, name);
4435 fputs ("\t.hidden\t", asm_out_file);
4436 assemble_name (asm_out_file, name);
4437 fputc ('\n', asm_out_file);
4438 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4440 else
4442 text_section ();
4443 ASM_OUTPUT_LABEL (asm_out_file, name);
4446 xops[0] = gen_rtx_REG (SImode, regno);
4447 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4448 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4449 output_asm_insn ("ret", xops);
4452 if (NEED_INDICATE_EXEC_STACK)
4453 file_end_indicate_exec_stack ();
4456 /* Emit code for the SET_GOT patterns. */
4458 const char *
4459 output_set_got (rtx dest)
4461 rtx xops[3];
4463 xops[0] = dest;
4464 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4466 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4468 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4470 if (!flag_pic)
4471 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4472 else
4473 output_asm_insn ("call\t%a2", xops);
4475 #if TARGET_MACHO
4476 /* Output the "canonical" label name ("Lxx$pb") here too. This
4477 is what will be referred to by the Mach-O PIC subsystem. */
4478 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4479 #endif
4480 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4481 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4483 if (flag_pic)
4484 output_asm_insn ("pop{l}\t%0", xops);
4486 else
4488 char name[32];
4489 get_pc_thunk_name (name, REGNO (dest));
4490 pic_labels_used |= 1 << REGNO (dest);
4492 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4493 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4494 output_asm_insn ("call\t%X2", xops);
4497 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4498 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4499 else if (!TARGET_MACHO)
4500 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4502 return "";
4505 /* Generate an "push" pattern for input ARG. */
4507 static rtx
4508 gen_push (rtx arg)
4510 return gen_rtx_SET (VOIDmode,
4511 gen_rtx_MEM (Pmode,
4512 gen_rtx_PRE_DEC (Pmode,
4513 stack_pointer_rtx)),
4514 arg);
4517 /* Return >= 0 if there is an unused call-clobbered register available
4518 for the entire function. */
4520 static unsigned int
4521 ix86_select_alt_pic_regnum (void)
4523 if (current_function_is_leaf && !current_function_profile)
4525 int i;
4526 for (i = 2; i >= 0; --i)
4527 if (!regs_ever_live[i])
4528 return i;
4531 return INVALID_REGNUM;
4534 /* Return 1 if we need to save REGNO. */
4535 static int
4536 ix86_save_reg (unsigned int regno, int maybe_eh_return)
4538 if (pic_offset_table_rtx
4539 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4540 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4541 || current_function_profile
4542 || current_function_calls_eh_return
4543 || current_function_uses_const_pool))
4545 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4546 return 0;
4547 return 1;
4550 if (current_function_calls_eh_return && maybe_eh_return)
4552 unsigned i;
4553 for (i = 0; ; i++)
4555 unsigned test = EH_RETURN_DATA_REGNO (i);
4556 if (test == INVALID_REGNUM)
4557 break;
4558 if (test == regno)
4559 return 1;
4563 if (cfun->machine->force_align_arg_pointer
4564 && regno == REGNO (cfun->machine->force_align_arg_pointer))
4565 return 1;
4567 return (regs_ever_live[regno]
4568 && !call_used_regs[regno]
4569 && !fixed_regs[regno]
4570 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4573 /* Return number of registers to be saved on the stack. */
4575 static int
4576 ix86_nsaved_regs (void)
4578 int nregs = 0;
4579 int regno;
4581 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4582 if (ix86_save_reg (regno, true))
4583 nregs++;
4584 return nregs;
4587 /* Return the offset between two registers, one to be eliminated, and the other
4588 its replacement, at the start of a routine. */
4590 HOST_WIDE_INT
4591 ix86_initial_elimination_offset (int from, int to)
4593 struct ix86_frame frame;
4594 ix86_compute_frame_layout (&frame);
4596 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4597 return frame.hard_frame_pointer_offset;
4598 else if (from == FRAME_POINTER_REGNUM
4599 && to == HARD_FRAME_POINTER_REGNUM)
4600 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4601 else
4603 gcc_assert (to == STACK_POINTER_REGNUM);
4605 if (from == ARG_POINTER_REGNUM)
4606 return frame.stack_pointer_offset;
4608 gcc_assert (from == FRAME_POINTER_REGNUM);
4609 return frame.stack_pointer_offset - frame.frame_pointer_offset;
4613 /* Fill structure ix86_frame about frame of currently computed function. */
4615 static void
4616 ix86_compute_frame_layout (struct ix86_frame *frame)
4618 HOST_WIDE_INT total_size;
4619 unsigned int stack_alignment_needed;
4620 HOST_WIDE_INT offset;
4621 unsigned int preferred_alignment;
4622 HOST_WIDE_INT size = get_frame_size ();
4624 frame->nregs = ix86_nsaved_regs ();
4625 total_size = size;
4627 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4628 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4630 /* During reload iteration the amount of registers saved can change.
4631 Recompute the value as needed. Do not recompute when amount of registers
4632 didn't change as reload does multiple calls to the function and does not
4633 expect the decision to change within single iteration. */
4634 if (!optimize_size
4635 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4637 int count = frame->nregs;
4639 cfun->machine->use_fast_prologue_epilogue_nregs = count;
4640 /* The fast prologue uses move instead of push to save registers. This
4641 is significantly longer, but also executes faster as modern hardware
4642 can execute the moves in parallel, but can't do that for push/pop.
4644 Be careful about choosing what prologue to emit: When function takes
4645 many instructions to execute we may use slow version as well as in
4646 case function is known to be outside hot spot (this is known with
4647 feedback only). Weight the size of function by number of registers
4648 to save as it is cheap to use one or two push instructions but very
4649 slow to use many of them. */
4650 if (count)
4651 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4652 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4653 || (flag_branch_probabilities
4654 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4655 cfun->machine->use_fast_prologue_epilogue = false;
4656 else
4657 cfun->machine->use_fast_prologue_epilogue
4658 = !expensive_function_p (count);
4660 if (TARGET_PROLOGUE_USING_MOVE
4661 && cfun->machine->use_fast_prologue_epilogue)
4662 frame->save_regs_using_mov = true;
4663 else
4664 frame->save_regs_using_mov = false;
4667 /* Skip return address and saved base pointer. */
4668 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4670 frame->hard_frame_pointer_offset = offset;
4672 /* Do some sanity checking of stack_alignment_needed and
4673 preferred_alignment, since i386 port is the only using those features
4674 that may break easily. */
4676 gcc_assert (!size || stack_alignment_needed);
4677 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
4678 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4679 gcc_assert (stack_alignment_needed
4680 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4682 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4683 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4685 /* Register save area */
4686 offset += frame->nregs * UNITS_PER_WORD;
4688 /* Va-arg area */
4689 if (ix86_save_varrargs_registers)
4691 offset += X86_64_VARARGS_SIZE;
4692 frame->va_arg_size = X86_64_VARARGS_SIZE;
4694 else
4695 frame->va_arg_size = 0;
4697 /* Align start of frame for local function. */
4698 frame->padding1 = ((offset + stack_alignment_needed - 1)
4699 & -stack_alignment_needed) - offset;
4701 offset += frame->padding1;
4703 /* Frame pointer points here. */
4704 frame->frame_pointer_offset = offset;
4706 offset += size;
4708 /* Add outgoing arguments area. Can be skipped if we eliminated
4709 all the function calls as dead code.
4710 Skipping is however impossible when function calls alloca. Alloca
4711 expander assumes that last current_function_outgoing_args_size
4712 of stack frame are unused. */
4713 if (ACCUMULATE_OUTGOING_ARGS
4714 && (!current_function_is_leaf || current_function_calls_alloca))
4716 offset += current_function_outgoing_args_size;
4717 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4719 else
4720 frame->outgoing_arguments_size = 0;
4722 /* Align stack boundary. Only needed if we're calling another function
4723 or using alloca. */
4724 if (!current_function_is_leaf || current_function_calls_alloca)
4725 frame->padding2 = ((offset + preferred_alignment - 1)
4726 & -preferred_alignment) - offset;
4727 else
4728 frame->padding2 = 0;
4730 offset += frame->padding2;
4732 /* We've reached end of stack frame. */
4733 frame->stack_pointer_offset = offset;
4735 /* Size prologue needs to allocate. */
4736 frame->to_allocate =
4737 (size + frame->padding1 + frame->padding2
4738 + frame->outgoing_arguments_size + frame->va_arg_size);
4740 if ((!frame->to_allocate && frame->nregs <= 1)
4741 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4742 frame->save_regs_using_mov = false;
4744 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4745 && current_function_is_leaf)
4747 frame->red_zone_size = frame->to_allocate;
4748 if (frame->save_regs_using_mov)
4749 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4750 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4751 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4753 else
4754 frame->red_zone_size = 0;
4755 frame->to_allocate -= frame->red_zone_size;
4756 frame->stack_pointer_offset -= frame->red_zone_size;
4757 #if 0
4758 fprintf (stderr, "nregs: %i\n", frame->nregs);
4759 fprintf (stderr, "size: %i\n", size);
4760 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4761 fprintf (stderr, "padding1: %i\n", frame->padding1);
4762 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4763 fprintf (stderr, "padding2: %i\n", frame->padding2);
4764 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4765 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4766 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4767 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4768 frame->hard_frame_pointer_offset);
4769 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4770 #endif
4773 /* Emit code to save registers in the prologue. */
4775 static void
4776 ix86_emit_save_regs (void)
4778 unsigned int regno;
4779 rtx insn;
4781 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
4782 if (ix86_save_reg (regno, true))
4784 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4785 RTX_FRAME_RELATED_P (insn) = 1;
4789 /* Emit code to save registers using MOV insns. First register
4790 is restored from POINTER + OFFSET. */
4791 static void
4792 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4794 unsigned int regno;
4795 rtx insn;
4797 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4798 if (ix86_save_reg (regno, true))
4800 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4801 Pmode, offset),
4802 gen_rtx_REG (Pmode, regno));
4803 RTX_FRAME_RELATED_P (insn) = 1;
4804 offset += UNITS_PER_WORD;
4808 /* Expand prologue or epilogue stack adjustment.
4809 The pattern exist to put a dependency on all ebp-based memory accesses.
4810 STYLE should be negative if instructions should be marked as frame related,
4811 zero if %r11 register is live and cannot be freely used and positive
4812 otherwise. */
4814 static void
4815 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4817 rtx insn;
4819 if (! TARGET_64BIT)
4820 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4821 else if (x86_64_immediate_operand (offset, DImode))
4822 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4823 else
4825 rtx r11;
4826 /* r11 is used by indirect sibcall return as well, set before the
4827 epilogue and used after the epilogue. ATM indirect sibcall
4828 shouldn't be used together with huge frame sizes in one
4829 function because of the frame_size check in sibcall.c. */
4830 gcc_assert (style);
4831 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4832 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4833 if (style < 0)
4834 RTX_FRAME_RELATED_P (insn) = 1;
4835 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4836 offset));
4838 if (style < 0)
4839 RTX_FRAME_RELATED_P (insn) = 1;
4842 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
4844 static rtx
4845 ix86_internal_arg_pointer (void)
4847 if (FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
4848 && DECL_NAME (current_function_decl)
4849 && MAIN_NAME_P (DECL_NAME (current_function_decl))
4850 && DECL_FILE_SCOPE_P (current_function_decl))
4852 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
4853 return copy_to_reg (cfun->machine->force_align_arg_pointer);
4855 else
4856 return virtual_incoming_args_rtx;
4859 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
4860 This is called from dwarf2out.c to emit call frame instructions
4861 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
4862 static void
4863 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
4865 rtx unspec = SET_SRC (pattern);
4866 gcc_assert (GET_CODE (unspec) == UNSPEC);
4868 switch (index)
4870 case UNSPEC_REG_SAVE:
4871 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
4872 SET_DEST (pattern));
4873 break;
4874 case UNSPEC_DEF_CFA:
4875 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
4876 INTVAL (XVECEXP (unspec, 0, 0)));
4877 break;
4878 default:
4879 gcc_unreachable ();
4883 /* Expand the prologue into a bunch of separate insns. */
4885 void
4886 ix86_expand_prologue (void)
4888 rtx insn;
4889 bool pic_reg_used;
4890 struct ix86_frame frame;
4891 HOST_WIDE_INT allocate;
4893 ix86_compute_frame_layout (&frame);
4895 if (cfun->machine->force_align_arg_pointer)
4897 rtx x, y;
4899 /* Grab the argument pointer. */
4900 x = plus_constant (stack_pointer_rtx, 4);
4901 y = cfun->machine->force_align_arg_pointer;
4902 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
4903 RTX_FRAME_RELATED_P (insn) = 1;
4905 /* The unwind info consists of two parts: install the fafp as the cfa,
4906 and record the fafp as the "save register" of the stack pointer.
4907 The later is there in order that the unwinder can see where it
4908 should restore the stack pointer across the and insn. */
4909 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
4910 x = gen_rtx_SET (VOIDmode, y, x);
4911 RTX_FRAME_RELATED_P (x) = 1;
4912 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
4913 UNSPEC_REG_SAVE);
4914 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
4915 RTX_FRAME_RELATED_P (y) = 1;
4916 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
4917 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
4918 REG_NOTES (insn) = x;
4920 /* Align the stack. */
4921 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
4922 GEN_INT (-16)));
4924 /* And here we cheat like madmen with the unwind info. We force the
4925 cfa register back to sp+4, which is exactly what it was at the
4926 start of the function. Re-pushing the return address results in
4927 the return at the same spot relative to the cfa, and thus is
4928 correct wrt the unwind info. */
4929 x = cfun->machine->force_align_arg_pointer;
4930 x = gen_frame_mem (Pmode, plus_constant (x, -4));
4931 insn = emit_insn (gen_push (x));
4932 RTX_FRAME_RELATED_P (insn) = 1;
4934 x = GEN_INT (4);
4935 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
4936 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
4937 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
4938 REG_NOTES (insn) = x;
4941 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4942 slower on all targets. Also sdb doesn't like it. */
4944 if (frame_pointer_needed)
4946 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4947 RTX_FRAME_RELATED_P (insn) = 1;
4949 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4950 RTX_FRAME_RELATED_P (insn) = 1;
4953 allocate = frame.to_allocate;
4955 if (!frame.save_regs_using_mov)
4956 ix86_emit_save_regs ();
4957 else
4958 allocate += frame.nregs * UNITS_PER_WORD;
4960 /* When using red zone we may start register saving before allocating
4961 the stack frame saving one cycle of the prologue. */
4962 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4963 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4964 : stack_pointer_rtx,
4965 -frame.nregs * UNITS_PER_WORD);
4967 if (allocate == 0)
4969 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4970 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4971 GEN_INT (-allocate), -1);
4972 else
4974 /* Only valid for Win32. */
4975 rtx eax = gen_rtx_REG (SImode, 0);
4976 bool eax_live = ix86_eax_live_at_start_p ();
4977 rtx t;
4979 gcc_assert (!TARGET_64BIT);
4981 if (eax_live)
4983 emit_insn (gen_push (eax));
4984 allocate -= 4;
4987 emit_move_insn (eax, GEN_INT (allocate));
4989 insn = emit_insn (gen_allocate_stack_worker (eax));
4990 RTX_FRAME_RELATED_P (insn) = 1;
4991 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4992 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4993 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4994 t, REG_NOTES (insn));
4996 if (eax_live)
4998 if (frame_pointer_needed)
4999 t = plus_constant (hard_frame_pointer_rtx,
5000 allocate
5001 - frame.to_allocate
5002 - frame.nregs * UNITS_PER_WORD);
5003 else
5004 t = plus_constant (stack_pointer_rtx, allocate);
5005 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5009 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5011 if (!frame_pointer_needed || !frame.to_allocate)
5012 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5013 else
5014 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5015 -frame.nregs * UNITS_PER_WORD);
5018 pic_reg_used = false;
5019 if (pic_offset_table_rtx
5020 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5021 || current_function_profile))
5023 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5025 if (alt_pic_reg_used != INVALID_REGNUM)
5026 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5028 pic_reg_used = true;
5031 if (pic_reg_used)
5033 if (TARGET_64BIT)
5034 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5035 else
5036 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5038 /* Even with accurate pre-reload life analysis, we can wind up
5039 deleting all references to the pic register after reload.
5040 Consider if cross-jumping unifies two sides of a branch
5041 controlled by a comparison vs the only read from a global.
5042 In which case, allow the set_got to be deleted, though we're
5043 too late to do anything about the ebx save in the prologue. */
5044 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5047 /* Prevent function calls from be scheduled before the call to mcount.
5048 In the pic_reg_used case, make sure that the got load isn't deleted. */
5049 if (current_function_profile)
5050 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5053 /* Emit code to restore saved registers using MOV insns. First register
5054 is restored from POINTER + OFFSET. */
5055 static void
5056 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5057 int maybe_eh_return)
5059 int regno;
5060 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5062 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5063 if (ix86_save_reg (regno, maybe_eh_return))
5065 /* Ensure that adjust_address won't be forced to produce pointer
5066 out of range allowed by x86-64 instruction set. */
5067 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5069 rtx r11;
5071 r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5072 emit_move_insn (r11, GEN_INT (offset));
5073 emit_insn (gen_adddi3 (r11, r11, pointer));
5074 base_address = gen_rtx_MEM (Pmode, r11);
5075 offset = 0;
5077 emit_move_insn (gen_rtx_REG (Pmode, regno),
5078 adjust_address (base_address, Pmode, offset));
5079 offset += UNITS_PER_WORD;
5083 /* Restore function stack, frame, and registers. */
5085 void
5086 ix86_expand_epilogue (int style)
5088 int regno;
5089 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5090 struct ix86_frame frame;
5091 HOST_WIDE_INT offset;
5093 ix86_compute_frame_layout (&frame);
5095 /* Calculate start of saved registers relative to ebp. Special care
5096 must be taken for the normal return case of a function using
5097 eh_return: the eax and edx registers are marked as saved, but not
5098 restored along this path. */
5099 offset = frame.nregs;
5100 if (current_function_calls_eh_return && style != 2)
5101 offset -= 2;
5102 offset *= -UNITS_PER_WORD;
5104 /* If we're only restoring one register and sp is not valid then
5105 using a move instruction to restore the register since it's
5106 less work than reloading sp and popping the register.
5108 The default code result in stack adjustment using add/lea instruction,
5109 while this code results in LEAVE instruction (or discrete equivalent),
5110 so it is profitable in some other cases as well. Especially when there
5111 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5112 and there is exactly one register to pop. This heuristic may need some
5113 tuning in future. */
5114 if ((!sp_valid && frame.nregs <= 1)
5115 || (TARGET_EPILOGUE_USING_MOVE
5116 && cfun->machine->use_fast_prologue_epilogue
5117 && (frame.nregs > 1 || frame.to_allocate))
5118 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5119 || (frame_pointer_needed && TARGET_USE_LEAVE
5120 && cfun->machine->use_fast_prologue_epilogue
5121 && frame.nregs == 1)
5122 || current_function_calls_eh_return)
5124 /* Restore registers. We can use ebp or esp to address the memory
5125 locations. If both are available, default to ebp, since offsets
5126 are known to be small. Only exception is esp pointing directly to the
5127 end of block of saved registers, where we may simplify addressing
5128 mode. */
5130 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5131 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5132 frame.to_allocate, style == 2);
5133 else
5134 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5135 offset, style == 2);
5137 /* eh_return epilogues need %ecx added to the stack pointer. */
5138 if (style == 2)
5140 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5142 if (frame_pointer_needed)
5144 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5145 tmp = plus_constant (tmp, UNITS_PER_WORD);
5146 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5148 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5149 emit_move_insn (hard_frame_pointer_rtx, tmp);
5151 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5152 const0_rtx, style);
5154 else
5156 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5157 tmp = plus_constant (tmp, (frame.to_allocate
5158 + frame.nregs * UNITS_PER_WORD));
5159 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5162 else if (!frame_pointer_needed)
5163 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5164 GEN_INT (frame.to_allocate
5165 + frame.nregs * UNITS_PER_WORD),
5166 style);
5167 /* If not an i386, mov & pop is faster than "leave". */
5168 else if (TARGET_USE_LEAVE || optimize_size
5169 || !cfun->machine->use_fast_prologue_epilogue)
5170 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5171 else
5173 pro_epilogue_adjust_stack (stack_pointer_rtx,
5174 hard_frame_pointer_rtx,
5175 const0_rtx, style);
5176 if (TARGET_64BIT)
5177 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5178 else
5179 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5182 else
5184 /* First step is to deallocate the stack frame so that we can
5185 pop the registers. */
5186 if (!sp_valid)
5188 gcc_assert (frame_pointer_needed);
5189 pro_epilogue_adjust_stack (stack_pointer_rtx,
5190 hard_frame_pointer_rtx,
5191 GEN_INT (offset), style);
5193 else if (frame.to_allocate)
5194 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5195 GEN_INT (frame.to_allocate), style);
5197 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5198 if (ix86_save_reg (regno, false))
5200 if (TARGET_64BIT)
5201 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5202 else
5203 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5205 if (frame_pointer_needed)
5207 /* Leave results in shorter dependency chains on CPUs that are
5208 able to grok it fast. */
5209 if (TARGET_USE_LEAVE)
5210 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5211 else if (TARGET_64BIT)
5212 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5213 else
5214 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5218 if (cfun->machine->force_align_arg_pointer)
5220 emit_insn (gen_addsi3 (stack_pointer_rtx,
5221 cfun->machine->force_align_arg_pointer,
5222 GEN_INT (-4)));
5225 /* Sibcall epilogues don't want a return instruction. */
5226 if (style == 0)
5227 return;
5229 if (current_function_pops_args && current_function_args_size)
5231 rtx popc = GEN_INT (current_function_pops_args);
5233 /* i386 can only pop 64K bytes. If asked to pop more, pop
5234 return address, do explicit add, and jump indirectly to the
5235 caller. */
5237 if (current_function_pops_args >= 65536)
5239 rtx ecx = gen_rtx_REG (SImode, 2);
5241 /* There is no "pascal" calling convention in 64bit ABI. */
5242 gcc_assert (!TARGET_64BIT);
5244 emit_insn (gen_popsi1 (ecx));
5245 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5246 emit_jump_insn (gen_return_indirect_internal (ecx));
5248 else
5249 emit_jump_insn (gen_return_pop_internal (popc));
5251 else
5252 emit_jump_insn (gen_return_internal ());
5255 /* Reset from the function's potential modifications. */
5257 static void
5258 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5259 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5261 if (pic_offset_table_rtx)
5262 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5265 /* Extract the parts of an RTL expression that is a valid memory address
5266 for an instruction. Return 0 if the structure of the address is
5267 grossly off. Return -1 if the address contains ASHIFT, so it is not
5268 strictly valid, but still used for computing length of lea instruction. */
5271 ix86_decompose_address (rtx addr, struct ix86_address *out)
5273 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5274 rtx base_reg, index_reg;
5275 HOST_WIDE_INT scale = 1;
5276 rtx scale_rtx = NULL_RTX;
5277 int retval = 1;
5278 enum ix86_address_seg seg = SEG_DEFAULT;
5280 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5281 base = addr;
5282 else if (GET_CODE (addr) == PLUS)
5284 rtx addends[4], op;
5285 int n = 0, i;
5287 op = addr;
5290 if (n >= 4)
5291 return 0;
5292 addends[n++] = XEXP (op, 1);
5293 op = XEXP (op, 0);
5295 while (GET_CODE (op) == PLUS);
5296 if (n >= 4)
5297 return 0;
5298 addends[n] = op;
5300 for (i = n; i >= 0; --i)
5302 op = addends[i];
5303 switch (GET_CODE (op))
5305 case MULT:
5306 if (index)
5307 return 0;
5308 index = XEXP (op, 0);
5309 scale_rtx = XEXP (op, 1);
5310 break;
5312 case UNSPEC:
5313 if (XINT (op, 1) == UNSPEC_TP
5314 && TARGET_TLS_DIRECT_SEG_REFS
5315 && seg == SEG_DEFAULT)
5316 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5317 else
5318 return 0;
5319 break;
5321 case REG:
5322 case SUBREG:
5323 if (!base)
5324 base = op;
5325 else if (!index)
5326 index = op;
5327 else
5328 return 0;
5329 break;
5331 case CONST:
5332 case CONST_INT:
5333 case SYMBOL_REF:
5334 case LABEL_REF:
5335 if (disp)
5336 return 0;
5337 disp = op;
5338 break;
5340 default:
5341 return 0;
5345 else if (GET_CODE (addr) == MULT)
5347 index = XEXP (addr, 0); /* index*scale */
5348 scale_rtx = XEXP (addr, 1);
5350 else if (GET_CODE (addr) == ASHIFT)
5352 rtx tmp;
5354 /* We're called for lea too, which implements ashift on occasion. */
5355 index = XEXP (addr, 0);
5356 tmp = XEXP (addr, 1);
5357 if (GET_CODE (tmp) != CONST_INT)
5358 return 0;
5359 scale = INTVAL (tmp);
5360 if ((unsigned HOST_WIDE_INT) scale > 3)
5361 return 0;
5362 scale = 1 << scale;
5363 retval = -1;
5365 else
5366 disp = addr; /* displacement */
5368 /* Extract the integral value of scale. */
5369 if (scale_rtx)
5371 if (GET_CODE (scale_rtx) != CONST_INT)
5372 return 0;
5373 scale = INTVAL (scale_rtx);
5376 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5377 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5379 /* Allow arg pointer and stack pointer as index if there is not scaling. */
5380 if (base_reg && index_reg && scale == 1
5381 && (index_reg == arg_pointer_rtx
5382 || index_reg == frame_pointer_rtx
5383 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5385 rtx tmp;
5386 tmp = base, base = index, index = tmp;
5387 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5390 /* Special case: %ebp cannot be encoded as a base without a displacement. */
5391 if ((base_reg == hard_frame_pointer_rtx
5392 || base_reg == frame_pointer_rtx
5393 || base_reg == arg_pointer_rtx) && !disp)
5394 disp = const0_rtx;
5396 /* Special case: on K6, [%esi] makes the instruction vector decoded.
5397 Avoid this by transforming to [%esi+0]. */
5398 if (ix86_tune == PROCESSOR_K6 && !optimize_size
5399 && base_reg && !index_reg && !disp
5400 && REG_P (base_reg)
5401 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5402 disp = const0_rtx;
5404 /* Special case: encode reg+reg instead of reg*2. */
5405 if (!base && index && scale && scale == 2)
5406 base = index, base_reg = index_reg, scale = 1;
5408 /* Special case: scaling cannot be encoded without base or displacement. */
5409 if (!base && !disp && index && scale != 1)
5410 disp = const0_rtx;
5412 out->base = base;
5413 out->index = index;
5414 out->disp = disp;
5415 out->scale = scale;
5416 out->seg = seg;
5418 return retval;
5421 /* Return cost of the memory address x.
5422 For i386, it is better to use a complex address than let gcc copy
5423 the address into a reg and make a new pseudo. But not if the address
5424 requires to two regs - that would mean more pseudos with longer
5425 lifetimes. */
5426 static int
5427 ix86_address_cost (rtx x)
5429 struct ix86_address parts;
5430 int cost = 1;
5431 int ok = ix86_decompose_address (x, &parts);
5433 gcc_assert (ok);
5435 if (parts.base && GET_CODE (parts.base) == SUBREG)
5436 parts.base = SUBREG_REG (parts.base);
5437 if (parts.index && GET_CODE (parts.index) == SUBREG)
5438 parts.index = SUBREG_REG (parts.index);
5440 /* More complex memory references are better. */
5441 if (parts.disp && parts.disp != const0_rtx)
5442 cost--;
5443 if (parts.seg != SEG_DEFAULT)
5444 cost--;
5446 /* Attempt to minimize number of registers in the address. */
5447 if ((parts.base
5448 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5449 || (parts.index
5450 && (!REG_P (parts.index)
5451 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5452 cost++;
5454 if (parts.base
5455 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5456 && parts.index
5457 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5458 && parts.base != parts.index)
5459 cost++;
5461 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5462 since it's predecode logic can't detect the length of instructions
5463 and it degenerates to vector decoded. Increase cost of such
5464 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
5465 to split such addresses or even refuse such addresses at all.
5467 Following addressing modes are affected:
5468 [base+scale*index]
5469 [scale*index+disp]
5470 [base+index]
5472 The first and last case may be avoidable by explicitly coding the zero in
5473 memory address, but I don't have AMD-K6 machine handy to check this
5474 theory. */
5476 if (TARGET_K6
5477 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5478 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5479 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5480 cost += 10;
5482 return cost;
5485 /* If X is a machine specific address (i.e. a symbol or label being
5486 referenced as a displacement from the GOT implemented using an
5487 UNSPEC), then return the base term. Otherwise return X. */
5490 ix86_find_base_term (rtx x)
5492 rtx term;
5494 if (TARGET_64BIT)
5496 if (GET_CODE (x) != CONST)
5497 return x;
5498 term = XEXP (x, 0);
5499 if (GET_CODE (term) == PLUS
5500 && (GET_CODE (XEXP (term, 1)) == CONST_INT
5501 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5502 term = XEXP (term, 0);
5503 if (GET_CODE (term) != UNSPEC
5504 || XINT (term, 1) != UNSPEC_GOTPCREL)
5505 return x;
5507 term = XVECEXP (term, 0, 0);
5509 if (GET_CODE (term) != SYMBOL_REF
5510 && GET_CODE (term) != LABEL_REF)
5511 return x;
5513 return term;
5516 term = ix86_delegitimize_address (x);
5518 if (GET_CODE (term) != SYMBOL_REF
5519 && GET_CODE (term) != LABEL_REF)
5520 return x;
5522 return term;
5525 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5526 this is used for to form addresses to local data when -fPIC is in
5527 use. */
5529 static bool
5530 darwin_local_data_pic (rtx disp)
5532 if (GET_CODE (disp) == MINUS)
5534 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5535 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5536 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5538 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5539 if (! strcmp (sym_name, "<pic base>"))
5540 return true;
5544 return false;
5547 /* Determine if a given RTX is a valid constant. We already know this
5548 satisfies CONSTANT_P. */
5550 bool
5551 legitimate_constant_p (rtx x)
5553 switch (GET_CODE (x))
5555 case CONST:
5556 x = XEXP (x, 0);
5558 if (GET_CODE (x) == PLUS)
5560 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5561 return false;
5562 x = XEXP (x, 0);
5565 if (TARGET_MACHO && darwin_local_data_pic (x))
5566 return true;
5568 /* Only some unspecs are valid as "constants". */
5569 if (GET_CODE (x) == UNSPEC)
5570 switch (XINT (x, 1))
5572 case UNSPEC_GOTOFF:
5573 return TARGET_64BIT;
5574 case UNSPEC_TPOFF:
5575 case UNSPEC_NTPOFF:
5576 x = XVECEXP (x, 0, 0);
5577 return (GET_CODE (x) == SYMBOL_REF
5578 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5579 case UNSPEC_DTPOFF:
5580 x = XVECEXP (x, 0, 0);
5581 return (GET_CODE (x) == SYMBOL_REF
5582 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
5583 default:
5584 return false;
5587 /* We must have drilled down to a symbol. */
5588 if (GET_CODE (x) == LABEL_REF)
5589 return true;
5590 if (GET_CODE (x) != SYMBOL_REF)
5591 return false;
5592 /* FALLTHRU */
5594 case SYMBOL_REF:
5595 /* TLS symbols are never valid. */
5596 if (SYMBOL_REF_TLS_MODEL (x))
5597 return false;
5598 break;
5600 case CONST_DOUBLE:
5601 if (GET_MODE (x) == TImode
5602 && x != CONST0_RTX (TImode)
5603 && !TARGET_64BIT)
5604 return false;
5605 break;
5607 case CONST_VECTOR:
5608 if (x == CONST0_RTX (GET_MODE (x)))
5609 return true;
5610 return false;
5612 default:
5613 break;
5616 /* Otherwise we handle everything else in the move patterns. */
5617 return true;
5620 /* Determine if it's legal to put X into the constant pool. This
5621 is not possible for the address of thread-local symbols, which
5622 is checked above. */
5624 static bool
5625 ix86_cannot_force_const_mem (rtx x)
5627 /* We can always put integral constants and vectors in memory. */
5628 switch (GET_CODE (x))
5630 case CONST_INT:
5631 case CONST_DOUBLE:
5632 case CONST_VECTOR:
5633 return false;
5635 default:
5636 break;
5638 return !legitimate_constant_p (x);
5641 /* Determine if a given RTX is a valid constant address. */
5643 bool
5644 constant_address_p (rtx x)
5646 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5649 /* Nonzero if the constant value X is a legitimate general operand
5650 when generating PIC code. It is given that flag_pic is on and
5651 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5653 bool
5654 legitimate_pic_operand_p (rtx x)
5656 rtx inner;
5658 switch (GET_CODE (x))
5660 case CONST:
5661 inner = XEXP (x, 0);
5662 if (GET_CODE (inner) == PLUS
5663 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
5664 inner = XEXP (inner, 0);
5666 /* Only some unspecs are valid as "constants". */
5667 if (GET_CODE (inner) == UNSPEC)
5668 switch (XINT (inner, 1))
5670 case UNSPEC_GOTOFF:
5671 return TARGET_64BIT;
5672 case UNSPEC_TPOFF:
5673 x = XVECEXP (inner, 0, 0);
5674 return (GET_CODE (x) == SYMBOL_REF
5675 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5676 default:
5677 return false;
5679 /* FALLTHRU */
5681 case SYMBOL_REF:
5682 case LABEL_REF:
5683 return legitimate_pic_address_disp_p (x);
5685 default:
5686 return true;
5690 /* Determine if a given CONST RTX is a valid memory displacement
5691 in PIC mode. */
5694 legitimate_pic_address_disp_p (rtx disp)
5696 bool saw_plus;
5698 /* In 64bit mode we can allow direct addresses of symbols and labels
5699 when they are not dynamic symbols. */
5700 if (TARGET_64BIT)
5702 rtx op0 = disp, op1;
5704 switch (GET_CODE (disp))
5706 case LABEL_REF:
5707 return true;
5709 case CONST:
5710 if (GET_CODE (XEXP (disp, 0)) != PLUS)
5711 break;
5712 op0 = XEXP (XEXP (disp, 0), 0);
5713 op1 = XEXP (XEXP (disp, 0), 1);
5714 if (GET_CODE (op1) != CONST_INT
5715 || INTVAL (op1) >= 16*1024*1024
5716 || INTVAL (op1) < -16*1024*1024)
5717 break;
5718 if (GET_CODE (op0) == LABEL_REF)
5719 return true;
5720 if (GET_CODE (op0) != SYMBOL_REF)
5721 break;
5722 /* FALLTHRU */
5724 case SYMBOL_REF:
5725 /* TLS references should always be enclosed in UNSPEC. */
5726 if (SYMBOL_REF_TLS_MODEL (op0))
5727 return false;
5728 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
5729 return true;
5730 break;
5732 default:
5733 break;
5736 if (GET_CODE (disp) != CONST)
5737 return 0;
5738 disp = XEXP (disp, 0);
5740 if (TARGET_64BIT)
5742 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5743 of GOT tables. We should not need these anyway. */
5744 if (GET_CODE (disp) != UNSPEC
5745 || (XINT (disp, 1) != UNSPEC_GOTPCREL
5746 && XINT (disp, 1) != UNSPEC_GOTOFF))
5747 return 0;
5749 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5750 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5751 return 0;
5752 return 1;
5755 saw_plus = false;
5756 if (GET_CODE (disp) == PLUS)
5758 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5759 return 0;
5760 disp = XEXP (disp, 0);
5761 saw_plus = true;
5764 if (TARGET_MACHO && darwin_local_data_pic (disp))
5765 return 1;
5767 if (GET_CODE (disp) != UNSPEC)
5768 return 0;
5770 switch (XINT (disp, 1))
5772 case UNSPEC_GOT:
5773 if (saw_plus)
5774 return false;
5775 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5776 case UNSPEC_GOTOFF:
5777 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
5778 While ABI specify also 32bit relocation but we don't produce it in
5779 small PIC model at all. */
5780 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5781 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5782 && !TARGET_64BIT)
5783 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5784 return false;
5785 case UNSPEC_GOTTPOFF:
5786 case UNSPEC_GOTNTPOFF:
5787 case UNSPEC_INDNTPOFF:
5788 if (saw_plus)
5789 return false;
5790 disp = XVECEXP (disp, 0, 0);
5791 return (GET_CODE (disp) == SYMBOL_REF
5792 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
5793 case UNSPEC_NTPOFF:
5794 disp = XVECEXP (disp, 0, 0);
5795 return (GET_CODE (disp) == SYMBOL_REF
5796 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
5797 case UNSPEC_DTPOFF:
5798 disp = XVECEXP (disp, 0, 0);
5799 return (GET_CODE (disp) == SYMBOL_REF
5800 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
5803 return 0;
5806 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5807 memory address for an instruction. The MODE argument is the machine mode
5808 for the MEM expression that wants to use this address.
5810 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5811 convert common non-canonical forms to canonical form so that they will
5812 be recognized. */
5815 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5817 struct ix86_address parts;
5818 rtx base, index, disp;
5819 HOST_WIDE_INT scale;
5820 const char *reason = NULL;
5821 rtx reason_rtx = NULL_RTX;
5823 if (TARGET_DEBUG_ADDR)
5825 fprintf (stderr,
5826 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5827 GET_MODE_NAME (mode), strict);
5828 debug_rtx (addr);
5831 if (ix86_decompose_address (addr, &parts) <= 0)
5833 reason = "decomposition failed";
5834 goto report_error;
5837 base = parts.base;
5838 index = parts.index;
5839 disp = parts.disp;
5840 scale = parts.scale;
5842 /* Validate base register.
5844 Don't allow SUBREG's that span more than a word here. It can lead to spill
5845 failures when the base is one word out of a two word structure, which is
5846 represented internally as a DImode int. */
5848 if (base)
5850 rtx reg;
5851 reason_rtx = base;
5853 if (REG_P (base))
5854 reg = base;
5855 else if (GET_CODE (base) == SUBREG
5856 && REG_P (SUBREG_REG (base))
5857 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
5858 <= UNITS_PER_WORD)
5859 reg = SUBREG_REG (base);
5860 else
5862 reason = "base is not a register";
5863 goto report_error;
5866 if (GET_MODE (base) != Pmode)
5868 reason = "base is not in Pmode";
5869 goto report_error;
5872 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5873 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5875 reason = "base is not valid";
5876 goto report_error;
5880 /* Validate index register.
5882 Don't allow SUBREG's that span more than a word here -- same as above. */
5884 if (index)
5886 rtx reg;
5887 reason_rtx = index;
5889 if (REG_P (index))
5890 reg = index;
5891 else if (GET_CODE (index) == SUBREG
5892 && REG_P (SUBREG_REG (index))
5893 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
5894 <= UNITS_PER_WORD)
5895 reg = SUBREG_REG (index);
5896 else
5898 reason = "index is not a register";
5899 goto report_error;
5902 if (GET_MODE (index) != Pmode)
5904 reason = "index is not in Pmode";
5905 goto report_error;
5908 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5909 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5911 reason = "index is not valid";
5912 goto report_error;
5916 /* Validate scale factor. */
5917 if (scale != 1)
5919 reason_rtx = GEN_INT (scale);
5920 if (!index)
5922 reason = "scale without index";
5923 goto report_error;
5926 if (scale != 2 && scale != 4 && scale != 8)
5928 reason = "scale is not a valid multiplier";
5929 goto report_error;
5933 /* Validate displacement. */
5934 if (disp)
5936 reason_rtx = disp;
5938 if (GET_CODE (disp) == CONST
5939 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5940 switch (XINT (XEXP (disp, 0), 1))
5942 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
5943 used. While ABI specify also 32bit relocations, we don't produce
5944 them at all and use IP relative instead. */
5945 case UNSPEC_GOT:
5946 case UNSPEC_GOTOFF:
5947 gcc_assert (flag_pic);
5948 if (!TARGET_64BIT)
5949 goto is_legitimate_pic;
5950 reason = "64bit address unspec";
5951 goto report_error;
5953 case UNSPEC_GOTPCREL:
5954 gcc_assert (flag_pic);
5955 goto is_legitimate_pic;
5957 case UNSPEC_GOTTPOFF:
5958 case UNSPEC_GOTNTPOFF:
5959 case UNSPEC_INDNTPOFF:
5960 case UNSPEC_NTPOFF:
5961 case UNSPEC_DTPOFF:
5962 break;
5964 default:
5965 reason = "invalid address unspec";
5966 goto report_error;
5969 else if (flag_pic && (SYMBOLIC_CONST (disp)
5970 #if TARGET_MACHO
5971 && !machopic_operand_p (disp)
5972 #endif
5975 is_legitimate_pic:
5976 if (TARGET_64BIT && (index || base))
5978 /* foo@dtpoff(%rX) is ok. */
5979 if (GET_CODE (disp) != CONST
5980 || GET_CODE (XEXP (disp, 0)) != PLUS
5981 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5982 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5983 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5984 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5986 reason = "non-constant pic memory reference";
5987 goto report_error;
5990 else if (! legitimate_pic_address_disp_p (disp))
5992 reason = "displacement is an invalid pic construct";
5993 goto report_error;
5996 /* This code used to verify that a symbolic pic displacement
5997 includes the pic_offset_table_rtx register.
5999 While this is good idea, unfortunately these constructs may
6000 be created by "adds using lea" optimization for incorrect
6001 code like:
6003 int a;
6004 int foo(int i)
6006 return *(&a+i);
6009 This code is nonsensical, but results in addressing
6010 GOT table with pic_offset_table_rtx base. We can't
6011 just refuse it easily, since it gets matched by
6012 "addsi3" pattern, that later gets split to lea in the
6013 case output register differs from input. While this
6014 can be handled by separate addsi pattern for this case
6015 that never results in lea, this seems to be easier and
6016 correct fix for crash to disable this test. */
6018 else if (GET_CODE (disp) != LABEL_REF
6019 && GET_CODE (disp) != CONST_INT
6020 && (GET_CODE (disp) != CONST
6021 || !legitimate_constant_p (disp))
6022 && (GET_CODE (disp) != SYMBOL_REF
6023 || !legitimate_constant_p (disp)))
6025 reason = "displacement is not constant";
6026 goto report_error;
6028 else if (TARGET_64BIT
6029 && !x86_64_immediate_operand (disp, VOIDmode))
6031 reason = "displacement is out of range";
6032 goto report_error;
6036 /* Everything looks valid. */
6037 if (TARGET_DEBUG_ADDR)
6038 fprintf (stderr, "Success.\n");
6039 return TRUE;
6041 report_error:
6042 if (TARGET_DEBUG_ADDR)
6044 fprintf (stderr, "Error: %s\n", reason);
6045 debug_rtx (reason_rtx);
6047 return FALSE;
6050 /* Return a unique alias set for the GOT. */
6052 static HOST_WIDE_INT
6053 ix86_GOT_alias_set (void)
6055 static HOST_WIDE_INT set = -1;
6056 if (set == -1)
6057 set = new_alias_set ();
6058 return set;
6061 /* Return a legitimate reference for ORIG (an address) using the
6062 register REG. If REG is 0, a new pseudo is generated.
6064 There are two types of references that must be handled:
6066 1. Global data references must load the address from the GOT, via
6067 the PIC reg. An insn is emitted to do this load, and the reg is
6068 returned.
6070 2. Static data references, constant pool addresses, and code labels
6071 compute the address as an offset from the GOT, whose base is in
6072 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6073 differentiate them from global data objects. The returned
6074 address is the PIC reg + an unspec constant.
6076 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6077 reg also appears in the address. */
6079 static rtx
6080 legitimize_pic_address (rtx orig, rtx reg)
6082 rtx addr = orig;
6083 rtx new = orig;
6084 rtx base;
6086 #if TARGET_MACHO
6087 if (reg == 0)
6088 reg = gen_reg_rtx (Pmode);
6089 /* Use the generic Mach-O PIC machinery. */
6090 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6091 #endif
6093 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6094 new = addr;
6095 else if (TARGET_64BIT
6096 && ix86_cmodel != CM_SMALL_PIC
6097 && local_symbolic_operand (addr, Pmode))
6099 rtx tmpreg;
6100 /* This symbol may be referenced via a displacement from the PIC
6101 base address (@GOTOFF). */
6103 if (reload_in_progress)
6104 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6105 if (GET_CODE (addr) == CONST)
6106 addr = XEXP (addr, 0);
6107 if (GET_CODE (addr) == PLUS)
6109 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6110 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6112 else
6113 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6114 new = gen_rtx_CONST (Pmode, new);
6115 if (!reg)
6116 tmpreg = gen_reg_rtx (Pmode);
6117 else
6118 tmpreg = reg;
6119 emit_move_insn (tmpreg, new);
6121 if (reg != 0)
6123 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6124 tmpreg, 1, OPTAB_DIRECT);
6125 new = reg;
6127 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6129 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6131 /* This symbol may be referenced via a displacement from the PIC
6132 base address (@GOTOFF). */
6134 if (reload_in_progress)
6135 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6136 if (GET_CODE (addr) == CONST)
6137 addr = XEXP (addr, 0);
6138 if (GET_CODE (addr) == PLUS)
6140 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6141 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6143 else
6144 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6145 new = gen_rtx_CONST (Pmode, new);
6146 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6148 if (reg != 0)
6150 emit_move_insn (reg, new);
6151 new = reg;
6154 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6156 if (TARGET_64BIT)
6158 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6159 new = gen_rtx_CONST (Pmode, new);
6160 new = gen_const_mem (Pmode, new);
6161 set_mem_alias_set (new, ix86_GOT_alias_set ());
6163 if (reg == 0)
6164 reg = gen_reg_rtx (Pmode);
6165 /* Use directly gen_movsi, otherwise the address is loaded
6166 into register for CSE. We don't want to CSE this addresses,
6167 instead we CSE addresses from the GOT table, so skip this. */
6168 emit_insn (gen_movsi (reg, new));
6169 new = reg;
6171 else
6173 /* This symbol must be referenced via a load from the
6174 Global Offset Table (@GOT). */
6176 if (reload_in_progress)
6177 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6178 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6179 new = gen_rtx_CONST (Pmode, new);
6180 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6181 new = gen_const_mem (Pmode, new);
6182 set_mem_alias_set (new, ix86_GOT_alias_set ());
6184 if (reg == 0)
6185 reg = gen_reg_rtx (Pmode);
6186 emit_move_insn (reg, new);
6187 new = reg;
6190 else
6192 if (GET_CODE (addr) == CONST_INT
6193 && !x86_64_immediate_operand (addr, VOIDmode))
6195 if (reg)
6197 emit_move_insn (reg, addr);
6198 new = reg;
6200 else
6201 new = force_reg (Pmode, addr);
6203 else if (GET_CODE (addr) == CONST)
6205 addr = XEXP (addr, 0);
6207 /* We must match stuff we generate before. Assume the only
6208 unspecs that can get here are ours. Not that we could do
6209 anything with them anyway.... */
6210 if (GET_CODE (addr) == UNSPEC
6211 || (GET_CODE (addr) == PLUS
6212 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6213 return orig;
6214 gcc_assert (GET_CODE (addr) == PLUS);
6216 if (GET_CODE (addr) == PLUS)
6218 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6220 /* Check first to see if this is a constant offset from a @GOTOFF
6221 symbol reference. */
6222 if (local_symbolic_operand (op0, Pmode)
6223 && GET_CODE (op1) == CONST_INT)
6225 if (!TARGET_64BIT)
6227 if (reload_in_progress)
6228 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6229 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6230 UNSPEC_GOTOFF);
6231 new = gen_rtx_PLUS (Pmode, new, op1);
6232 new = gen_rtx_CONST (Pmode, new);
6233 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6235 if (reg != 0)
6237 emit_move_insn (reg, new);
6238 new = reg;
6241 else
6243 if (INTVAL (op1) < -16*1024*1024
6244 || INTVAL (op1) >= 16*1024*1024)
6246 if (!x86_64_immediate_operand (op1, Pmode))
6247 op1 = force_reg (Pmode, op1);
6248 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6252 else
6254 base = legitimize_pic_address (XEXP (addr, 0), reg);
6255 new = legitimize_pic_address (XEXP (addr, 1),
6256 base == reg ? NULL_RTX : reg);
6258 if (GET_CODE (new) == CONST_INT)
6259 new = plus_constant (base, INTVAL (new));
6260 else
6262 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6264 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6265 new = XEXP (new, 1);
6267 new = gen_rtx_PLUS (Pmode, base, new);
6272 return new;
6275 /* Load the thread pointer. If TO_REG is true, force it into a register. */
6277 static rtx
6278 get_thread_pointer (int to_reg)
6280 rtx tp, reg, insn;
6282 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6283 if (!to_reg)
6284 return tp;
6286 reg = gen_reg_rtx (Pmode);
6287 insn = gen_rtx_SET (VOIDmode, reg, tp);
6288 insn = emit_insn (insn);
6290 return reg;
6293 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
6294 false if we expect this to be used for a memory address and true if
6295 we expect to load the address into a register. */
6297 static rtx
6298 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6300 rtx dest, base, off, pic;
6301 int type;
6303 switch (model)
6305 case TLS_MODEL_GLOBAL_DYNAMIC:
6306 dest = gen_reg_rtx (Pmode);
6307 if (TARGET_64BIT)
6309 rtx rax = gen_rtx_REG (Pmode, 0), insns;
6311 start_sequence ();
6312 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6313 insns = get_insns ();
6314 end_sequence ();
6316 emit_libcall_block (insns, dest, rax, x);
6318 else
6319 emit_insn (gen_tls_global_dynamic_32 (dest, x));
6320 break;
6322 case TLS_MODEL_LOCAL_DYNAMIC:
6323 base = gen_reg_rtx (Pmode);
6324 if (TARGET_64BIT)
6326 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6328 start_sequence ();
6329 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6330 insns = get_insns ();
6331 end_sequence ();
6333 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6334 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6335 emit_libcall_block (insns, base, rax, note);
6337 else
6338 emit_insn (gen_tls_local_dynamic_base_32 (base));
6340 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6341 off = gen_rtx_CONST (Pmode, off);
6343 return gen_rtx_PLUS (Pmode, base, off);
6345 case TLS_MODEL_INITIAL_EXEC:
6346 if (TARGET_64BIT)
6348 pic = NULL;
6349 type = UNSPEC_GOTNTPOFF;
6351 else if (flag_pic)
6353 if (reload_in_progress)
6354 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6355 pic = pic_offset_table_rtx;
6356 type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6358 else if (!TARGET_GNU_TLS)
6360 pic = gen_reg_rtx (Pmode);
6361 emit_insn (gen_set_got (pic));
6362 type = UNSPEC_GOTTPOFF;
6364 else
6366 pic = NULL;
6367 type = UNSPEC_INDNTPOFF;
6370 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6371 off = gen_rtx_CONST (Pmode, off);
6372 if (pic)
6373 off = gen_rtx_PLUS (Pmode, pic, off);
6374 off = gen_const_mem (Pmode, off);
6375 set_mem_alias_set (off, ix86_GOT_alias_set ());
6377 if (TARGET_64BIT || TARGET_GNU_TLS)
6379 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6380 off = force_reg (Pmode, off);
6381 return gen_rtx_PLUS (Pmode, base, off);
6383 else
6385 base = get_thread_pointer (true);
6386 dest = gen_reg_rtx (Pmode);
6387 emit_insn (gen_subsi3 (dest, base, off));
6389 break;
6391 case TLS_MODEL_LOCAL_EXEC:
6392 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6393 (TARGET_64BIT || TARGET_GNU_TLS)
6394 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6395 off = gen_rtx_CONST (Pmode, off);
6397 if (TARGET_64BIT || TARGET_GNU_TLS)
6399 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6400 return gen_rtx_PLUS (Pmode, base, off);
6402 else
6404 base = get_thread_pointer (true);
6405 dest = gen_reg_rtx (Pmode);
6406 emit_insn (gen_subsi3 (dest, base, off));
6408 break;
6410 default:
6411 gcc_unreachable ();
6414 return dest;
6417 /* Try machine-dependent ways of modifying an illegitimate address
6418 to be legitimate. If we find one, return the new, valid address.
6419 This macro is used in only one place: `memory_address' in explow.c.
6421 OLDX is the address as it was before break_out_memory_refs was called.
6422 In some cases it is useful to look at this to decide what needs to be done.
6424 MODE and WIN are passed so that this macro can use
6425 GO_IF_LEGITIMATE_ADDRESS.
6427 It is always safe for this macro to do nothing. It exists to recognize
6428 opportunities to optimize the output.
6430 For the 80386, we handle X+REG by loading X into a register R and
6431 using R+REG. R will go in a general reg and indexing will be used.
6432 However, if REG is a broken-out memory address or multiplication,
6433 nothing needs to be done because REG can certainly go in a general reg.
6435 When -fpic is used, special handling is needed for symbolic references.
6436 See comments by legitimize_pic_address in i386.c for details. */
6439 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6441 int changed = 0;
6442 unsigned log;
6444 if (TARGET_DEBUG_ADDR)
6446 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6447 GET_MODE_NAME (mode));
6448 debug_rtx (x);
6451 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6452 if (log)
6453 return legitimize_tls_address (x, log, false);
6454 if (GET_CODE (x) == CONST
6455 && GET_CODE (XEXP (x, 0)) == PLUS
6456 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6457 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6459 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6460 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6463 if (flag_pic && SYMBOLIC_CONST (x))
6464 return legitimize_pic_address (x, 0);
6466 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6467 if (GET_CODE (x) == ASHIFT
6468 && GET_CODE (XEXP (x, 1)) == CONST_INT
6469 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
6471 changed = 1;
6472 log = INTVAL (XEXP (x, 1));
6473 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6474 GEN_INT (1 << log));
6477 if (GET_CODE (x) == PLUS)
6479 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
6481 if (GET_CODE (XEXP (x, 0)) == ASHIFT
6482 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6483 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
6485 changed = 1;
6486 log = INTVAL (XEXP (XEXP (x, 0), 1));
6487 XEXP (x, 0) = gen_rtx_MULT (Pmode,
6488 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6489 GEN_INT (1 << log));
6492 if (GET_CODE (XEXP (x, 1)) == ASHIFT
6493 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6494 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
6496 changed = 1;
6497 log = INTVAL (XEXP (XEXP (x, 1), 1));
6498 XEXP (x, 1) = gen_rtx_MULT (Pmode,
6499 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6500 GEN_INT (1 << log));
6503 /* Put multiply first if it isn't already. */
6504 if (GET_CODE (XEXP (x, 1)) == MULT)
6506 rtx tmp = XEXP (x, 0);
6507 XEXP (x, 0) = XEXP (x, 1);
6508 XEXP (x, 1) = tmp;
6509 changed = 1;
6512 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6513 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
6514 created by virtual register instantiation, register elimination, and
6515 similar optimizations. */
6516 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6518 changed = 1;
6519 x = gen_rtx_PLUS (Pmode,
6520 gen_rtx_PLUS (Pmode, XEXP (x, 0),
6521 XEXP (XEXP (x, 1), 0)),
6522 XEXP (XEXP (x, 1), 1));
6525 /* Canonicalize
6526 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6527 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
6528 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6529 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6530 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6531 && CONSTANT_P (XEXP (x, 1)))
6533 rtx constant;
6534 rtx other = NULL_RTX;
6536 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6538 constant = XEXP (x, 1);
6539 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6541 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6543 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6544 other = XEXP (x, 1);
6546 else
6547 constant = 0;
6549 if (constant)
6551 changed = 1;
6552 x = gen_rtx_PLUS (Pmode,
6553 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6554 XEXP (XEXP (XEXP (x, 0), 1), 0)),
6555 plus_constant (other, INTVAL (constant)));
6559 if (changed && legitimate_address_p (mode, x, FALSE))
6560 return x;
6562 if (GET_CODE (XEXP (x, 0)) == MULT)
6564 changed = 1;
6565 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6568 if (GET_CODE (XEXP (x, 1)) == MULT)
6570 changed = 1;
6571 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6574 if (changed
6575 && GET_CODE (XEXP (x, 1)) == REG
6576 && GET_CODE (XEXP (x, 0)) == REG)
6577 return x;
6579 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6581 changed = 1;
6582 x = legitimize_pic_address (x, 0);
6585 if (changed && legitimate_address_p (mode, x, FALSE))
6586 return x;
6588 if (GET_CODE (XEXP (x, 0)) == REG)
6590 rtx temp = gen_reg_rtx (Pmode);
6591 rtx val = force_operand (XEXP (x, 1), temp);
6592 if (val != temp)
6593 emit_move_insn (temp, val);
6595 XEXP (x, 1) = temp;
6596 return x;
6599 else if (GET_CODE (XEXP (x, 1)) == REG)
6601 rtx temp = gen_reg_rtx (Pmode);
6602 rtx val = force_operand (XEXP (x, 0), temp);
6603 if (val != temp)
6604 emit_move_insn (temp, val);
6606 XEXP (x, 0) = temp;
6607 return x;
6611 return x;
6614 /* Print an integer constant expression in assembler syntax. Addition
6615 and subtraction are the only arithmetic that may appear in these
6616 expressions. FILE is the stdio stream to write to, X is the rtx, and
6617 CODE is the operand print code from the output string. */
6619 static void
6620 output_pic_addr_const (FILE *file, rtx x, int code)
6622 char buf[256];
6624 switch (GET_CODE (x))
6626 case PC:
6627 gcc_assert (flag_pic);
6628 putc ('.', file);
6629 break;
6631 case SYMBOL_REF:
6632 output_addr_const (file, x);
6633 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6634 fputs ("@PLT", file);
6635 break;
6637 case LABEL_REF:
6638 x = XEXP (x, 0);
6639 /* FALLTHRU */
6640 case CODE_LABEL:
6641 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6642 assemble_name (asm_out_file, buf);
6643 break;
6645 case CONST_INT:
6646 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6647 break;
6649 case CONST:
6650 /* This used to output parentheses around the expression,
6651 but that does not work on the 386 (either ATT or BSD assembler). */
6652 output_pic_addr_const (file, XEXP (x, 0), code);
6653 break;
6655 case CONST_DOUBLE:
6656 if (GET_MODE (x) == VOIDmode)
6658 /* We can use %d if the number is <32 bits and positive. */
6659 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6660 fprintf (file, "0x%lx%08lx",
6661 (unsigned long) CONST_DOUBLE_HIGH (x),
6662 (unsigned long) CONST_DOUBLE_LOW (x));
6663 else
6664 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6666 else
6667 /* We can't handle floating point constants;
6668 PRINT_OPERAND must handle them. */
6669 output_operand_lossage ("floating constant misused");
6670 break;
6672 case PLUS:
6673 /* Some assemblers need integer constants to appear first. */
6674 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6676 output_pic_addr_const (file, XEXP (x, 0), code);
6677 putc ('+', file);
6678 output_pic_addr_const (file, XEXP (x, 1), code);
6680 else
6682 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
6683 output_pic_addr_const (file, XEXP (x, 1), code);
6684 putc ('+', file);
6685 output_pic_addr_const (file, XEXP (x, 0), code);
6687 break;
6689 case MINUS:
6690 if (!TARGET_MACHO)
6691 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6692 output_pic_addr_const (file, XEXP (x, 0), code);
6693 putc ('-', file);
6694 output_pic_addr_const (file, XEXP (x, 1), code);
6695 if (!TARGET_MACHO)
6696 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6697 break;
6699 case UNSPEC:
6700 gcc_assert (XVECLEN (x, 0) == 1);
6701 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6702 switch (XINT (x, 1))
6704 case UNSPEC_GOT:
6705 fputs ("@GOT", file);
6706 break;
6707 case UNSPEC_GOTOFF:
6708 fputs ("@GOTOFF", file);
6709 break;
6710 case UNSPEC_GOTPCREL:
6711 fputs ("@GOTPCREL(%rip)", file);
6712 break;
6713 case UNSPEC_GOTTPOFF:
6714 /* FIXME: This might be @TPOFF in Sun ld too. */
6715 fputs ("@GOTTPOFF", file);
6716 break;
6717 case UNSPEC_TPOFF:
6718 fputs ("@TPOFF", file);
6719 break;
6720 case UNSPEC_NTPOFF:
6721 if (TARGET_64BIT)
6722 fputs ("@TPOFF", file);
6723 else
6724 fputs ("@NTPOFF", file);
6725 break;
6726 case UNSPEC_DTPOFF:
6727 fputs ("@DTPOFF", file);
6728 break;
6729 case UNSPEC_GOTNTPOFF:
6730 if (TARGET_64BIT)
6731 fputs ("@GOTTPOFF(%rip)", file);
6732 else
6733 fputs ("@GOTNTPOFF", file);
6734 break;
6735 case UNSPEC_INDNTPOFF:
6736 fputs ("@INDNTPOFF", file);
6737 break;
6738 default:
6739 output_operand_lossage ("invalid UNSPEC as operand");
6740 break;
6742 break;
6744 default:
6745 output_operand_lossage ("invalid expression as operand");
6749 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6750 We need to emit DTP-relative relocations. */
6752 static void
6753 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6755 fputs (ASM_LONG, file);
6756 output_addr_const (file, x);
6757 fputs ("@DTPOFF", file);
6758 switch (size)
6760 case 4:
6761 break;
6762 case 8:
6763 fputs (", 0", file);
6764 break;
6765 default:
6766 gcc_unreachable ();
6770 /* In the name of slightly smaller debug output, and to cater to
6771 general assembler lossage, recognize PIC+GOTOFF and turn it back
6772 into a direct symbol reference. */
6774 static rtx
6775 ix86_delegitimize_address (rtx orig_x)
6777 rtx x = orig_x, y;
6779 if (GET_CODE (x) == MEM)
6780 x = XEXP (x, 0);
6782 if (TARGET_64BIT)
6784 if (GET_CODE (x) != CONST
6785 || GET_CODE (XEXP (x, 0)) != UNSPEC
6786 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6787 || GET_CODE (orig_x) != MEM)
6788 return orig_x;
6789 return XVECEXP (XEXP (x, 0), 0, 0);
6792 if (GET_CODE (x) != PLUS
6793 || GET_CODE (XEXP (x, 1)) != CONST)
6794 return orig_x;
6796 if (GET_CODE (XEXP (x, 0)) == REG
6797 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6798 /* %ebx + GOT/GOTOFF */
6799 y = NULL;
6800 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6802 /* %ebx + %reg * scale + GOT/GOTOFF */
6803 y = XEXP (x, 0);
6804 if (GET_CODE (XEXP (y, 0)) == REG
6805 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6806 y = XEXP (y, 1);
6807 else if (GET_CODE (XEXP (y, 1)) == REG
6808 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6809 y = XEXP (y, 0);
6810 else
6811 return orig_x;
6812 if (GET_CODE (y) != REG
6813 && GET_CODE (y) != MULT
6814 && GET_CODE (y) != ASHIFT)
6815 return orig_x;
6817 else
6818 return orig_x;
6820 x = XEXP (XEXP (x, 1), 0);
6821 if (GET_CODE (x) == UNSPEC
6822 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6823 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6825 if (y)
6826 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6827 return XVECEXP (x, 0, 0);
6830 if (GET_CODE (x) == PLUS
6831 && GET_CODE (XEXP (x, 0)) == UNSPEC
6832 && GET_CODE (XEXP (x, 1)) == CONST_INT
6833 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6834 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6835 && GET_CODE (orig_x) != MEM)))
6837 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6838 if (y)
6839 return gen_rtx_PLUS (Pmode, y, x);
6840 return x;
6843 return orig_x;
6846 static void
6847 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6848 int fp, FILE *file)
6850 const char *suffix;
6852 if (mode == CCFPmode || mode == CCFPUmode)
6854 enum rtx_code second_code, bypass_code;
6855 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6856 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
6857 code = ix86_fp_compare_code_to_integer (code);
6858 mode = CCmode;
6860 if (reverse)
6861 code = reverse_condition (code);
6863 switch (code)
6865 case EQ:
6866 suffix = "e";
6867 break;
6868 case NE:
6869 suffix = "ne";
6870 break;
6871 case GT:
6872 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
6873 suffix = "g";
6874 break;
6875 case GTU:
6876 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
6877 Those same assemblers have the same but opposite lossage on cmov. */
6878 gcc_assert (mode == CCmode);
6879 suffix = fp ? "nbe" : "a";
6880 break;
6881 case LT:
6882 switch (mode)
6884 case CCNOmode:
6885 case CCGOCmode:
6886 suffix = "s";
6887 break;
6889 case CCmode:
6890 case CCGCmode:
6891 suffix = "l";
6892 break;
6894 default:
6895 gcc_unreachable ();
6897 break;
6898 case LTU:
6899 gcc_assert (mode == CCmode);
6900 suffix = "b";
6901 break;
6902 case GE:
6903 switch (mode)
6905 case CCNOmode:
6906 case CCGOCmode:
6907 suffix = "ns";
6908 break;
6910 case CCmode:
6911 case CCGCmode:
6912 suffix = "ge";
6913 break;
6915 default:
6916 gcc_unreachable ();
6918 break;
6919 case GEU:
6920 /* ??? As above. */
6921 gcc_assert (mode == CCmode);
6922 suffix = fp ? "nb" : "ae";
6923 break;
6924 case LE:
6925 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
6926 suffix = "le";
6927 break;
6928 case LEU:
6929 gcc_assert (mode == CCmode);
6930 suffix = "be";
6931 break;
6932 case UNORDERED:
6933 suffix = fp ? "u" : "p";
6934 break;
6935 case ORDERED:
6936 suffix = fp ? "nu" : "np";
6937 break;
6938 default:
6939 gcc_unreachable ();
6941 fputs (suffix, file);
6944 /* Print the name of register X to FILE based on its machine mode and number.
6945 If CODE is 'w', pretend the mode is HImode.
6946 If CODE is 'b', pretend the mode is QImode.
6947 If CODE is 'k', pretend the mode is SImode.
6948 If CODE is 'q', pretend the mode is DImode.
6949 If CODE is 'h', pretend the reg is the 'high' byte register.
6950 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
6952 void
6953 print_reg (rtx x, int code, FILE *file)
6955 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
6956 && REGNO (x) != FRAME_POINTER_REGNUM
6957 && REGNO (x) != FLAGS_REG
6958 && REGNO (x) != FPSR_REG);
6960 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6961 putc ('%', file);
6963 if (code == 'w' || MMX_REG_P (x))
6964 code = 2;
6965 else if (code == 'b')
6966 code = 1;
6967 else if (code == 'k')
6968 code = 4;
6969 else if (code == 'q')
6970 code = 8;
6971 else if (code == 'y')
6972 code = 3;
6973 else if (code == 'h')
6974 code = 0;
6975 else
6976 code = GET_MODE_SIZE (GET_MODE (x));
6978 /* Irritatingly, AMD extended registers use different naming convention
6979 from the normal registers. */
6980 if (REX_INT_REG_P (x))
6982 gcc_assert (TARGET_64BIT);
6983 switch (code)
6985 case 0:
6986 error ("extended registers have no high halves");
6987 break;
6988 case 1:
6989 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6990 break;
6991 case 2:
6992 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6993 break;
6994 case 4:
6995 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6996 break;
6997 case 8:
6998 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6999 break;
7000 default:
7001 error ("unsupported operand size for extended register");
7002 break;
7004 return;
7006 switch (code)
7008 case 3:
7009 if (STACK_TOP_P (x))
7011 fputs ("st(0)", file);
7012 break;
7014 /* FALLTHRU */
7015 case 8:
7016 case 4:
7017 case 12:
7018 if (! ANY_FP_REG_P (x))
7019 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7020 /* FALLTHRU */
7021 case 16:
7022 case 2:
7023 normal:
7024 fputs (hi_reg_name[REGNO (x)], file);
7025 break;
7026 case 1:
7027 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7028 goto normal;
7029 fputs (qi_reg_name[REGNO (x)], file);
7030 break;
7031 case 0:
7032 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7033 goto normal;
7034 fputs (qi_high_reg_name[REGNO (x)], file);
7035 break;
7036 default:
7037 gcc_unreachable ();
7041 /* Locate some local-dynamic symbol still in use by this function
7042 so that we can print its name in some tls_local_dynamic_base
7043 pattern. */
7045 static const char *
7046 get_some_local_dynamic_name (void)
7048 rtx insn;
7050 if (cfun->machine->some_ld_name)
7051 return cfun->machine->some_ld_name;
7053 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7054 if (INSN_P (insn)
7055 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7056 return cfun->machine->some_ld_name;
7058 gcc_unreachable ();
7061 static int
7062 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7064 rtx x = *px;
7066 if (GET_CODE (x) == SYMBOL_REF
7067 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7069 cfun->machine->some_ld_name = XSTR (x, 0);
7070 return 1;
7073 return 0;
7076 /* Meaning of CODE:
7077 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7078 C -- print opcode suffix for set/cmov insn.
7079 c -- like C, but print reversed condition
7080 F,f -- likewise, but for floating-point.
7081 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7082 otherwise nothing
7083 R -- print the prefix for register names.
7084 z -- print the opcode suffix for the size of the current operand.
7085 * -- print a star (in certain assembler syntax)
7086 A -- print an absolute memory reference.
7087 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7088 s -- print a shift double count, followed by the assemblers argument
7089 delimiter.
7090 b -- print the QImode name of the register for the indicated operand.
7091 %b0 would print %al if operands[0] is reg 0.
7092 w -- likewise, print the HImode name of the register.
7093 k -- likewise, print the SImode name of the register.
7094 q -- likewise, print the DImode name of the register.
7095 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7096 y -- print "st(0)" instead of "st" as a register.
7097 D -- print condition for SSE cmp instruction.
7098 P -- if PIC, print an @PLT suffix.
7099 X -- don't print any sort of PIC '@' suffix for a symbol.
7100 & -- print some in-use local-dynamic symbol name.
7101 H -- print a memory address offset by 8; used for sse high-parts
7104 void
7105 print_operand (FILE *file, rtx x, int code)
7107 if (code)
7109 switch (code)
7111 case '*':
7112 if (ASSEMBLER_DIALECT == ASM_ATT)
7113 putc ('*', file);
7114 return;
7116 case '&':
7117 assemble_name (file, get_some_local_dynamic_name ());
7118 return;
7120 case 'A':
7121 switch (ASSEMBLER_DIALECT)
7123 case ASM_ATT:
7124 putc ('*', file);
7125 break;
7127 case ASM_INTEL:
7128 /* Intel syntax. For absolute addresses, registers should not
7129 be surrounded by braces. */
7130 if (GET_CODE (x) != REG)
7132 putc ('[', file);
7133 PRINT_OPERAND (file, x, 0);
7134 putc (']', file);
7135 return;
7137 break;
7139 default:
7140 gcc_unreachable ();
7143 PRINT_OPERAND (file, x, 0);
7144 return;
7147 case 'L':
7148 if (ASSEMBLER_DIALECT == ASM_ATT)
7149 putc ('l', file);
7150 return;
7152 case 'W':
7153 if (ASSEMBLER_DIALECT == ASM_ATT)
7154 putc ('w', file);
7155 return;
7157 case 'B':
7158 if (ASSEMBLER_DIALECT == ASM_ATT)
7159 putc ('b', file);
7160 return;
7162 case 'Q':
7163 if (ASSEMBLER_DIALECT == ASM_ATT)
7164 putc ('l', file);
7165 return;
7167 case 'S':
7168 if (ASSEMBLER_DIALECT == ASM_ATT)
7169 putc ('s', file);
7170 return;
7172 case 'T':
7173 if (ASSEMBLER_DIALECT == ASM_ATT)
7174 putc ('t', file);
7175 return;
7177 case 'z':
7178 /* 387 opcodes don't get size suffixes if the operands are
7179 registers. */
7180 if (STACK_REG_P (x))
7181 return;
7183 /* Likewise if using Intel opcodes. */
7184 if (ASSEMBLER_DIALECT == ASM_INTEL)
7185 return;
7187 /* This is the size of op from size of operand. */
7188 switch (GET_MODE_SIZE (GET_MODE (x)))
7190 case 2:
7191 #ifdef HAVE_GAS_FILDS_FISTS
7192 putc ('s', file);
7193 #endif
7194 return;
7196 case 4:
7197 if (GET_MODE (x) == SFmode)
7199 putc ('s', file);
7200 return;
7202 else
7203 putc ('l', file);
7204 return;
7206 case 12:
7207 case 16:
7208 putc ('t', file);
7209 return;
7211 case 8:
7212 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7214 #ifdef GAS_MNEMONICS
7215 putc ('q', file);
7216 #else
7217 putc ('l', file);
7218 putc ('l', file);
7219 #endif
7221 else
7222 putc ('l', file);
7223 return;
7225 default:
7226 gcc_unreachable ();
7229 case 'b':
7230 case 'w':
7231 case 'k':
7232 case 'q':
7233 case 'h':
7234 case 'y':
7235 case 'X':
7236 case 'P':
7237 break;
7239 case 's':
7240 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7242 PRINT_OPERAND (file, x, 0);
7243 putc (',', file);
7245 return;
7247 case 'D':
7248 /* Little bit of braindamage here. The SSE compare instructions
7249 does use completely different names for the comparisons that the
7250 fp conditional moves. */
7251 switch (GET_CODE (x))
7253 case EQ:
7254 case UNEQ:
7255 fputs ("eq", file);
7256 break;
7257 case LT:
7258 case UNLT:
7259 fputs ("lt", file);
7260 break;
7261 case LE:
7262 case UNLE:
7263 fputs ("le", file);
7264 break;
7265 case UNORDERED:
7266 fputs ("unord", file);
7267 break;
7268 case NE:
7269 case LTGT:
7270 fputs ("neq", file);
7271 break;
7272 case UNGE:
7273 case GE:
7274 fputs ("nlt", file);
7275 break;
7276 case UNGT:
7277 case GT:
7278 fputs ("nle", file);
7279 break;
7280 case ORDERED:
7281 fputs ("ord", file);
7282 break;
7283 default:
7284 gcc_unreachable ();
7286 return;
7287 case 'O':
7288 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7289 if (ASSEMBLER_DIALECT == ASM_ATT)
7291 switch (GET_MODE (x))
7293 case HImode: putc ('w', file); break;
7294 case SImode:
7295 case SFmode: putc ('l', file); break;
7296 case DImode:
7297 case DFmode: putc ('q', file); break;
7298 default: gcc_unreachable ();
7300 putc ('.', file);
7302 #endif
7303 return;
7304 case 'C':
7305 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7306 return;
7307 case 'F':
7308 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7309 if (ASSEMBLER_DIALECT == ASM_ATT)
7310 putc ('.', file);
7311 #endif
7312 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7313 return;
7315 /* Like above, but reverse condition */
7316 case 'c':
7317 /* Check to see if argument to %c is really a constant
7318 and not a condition code which needs to be reversed. */
7319 if (!COMPARISON_P (x))
7321 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7322 return;
7324 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7325 return;
7326 case 'f':
7327 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7328 if (ASSEMBLER_DIALECT == ASM_ATT)
7329 putc ('.', file);
7330 #endif
7331 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7332 return;
7334 case 'H':
7335 /* It doesn't actually matter what mode we use here, as we're
7336 only going to use this for printing. */
7337 x = adjust_address_nv (x, DImode, 8);
7338 break;
7340 case '+':
7342 rtx x;
7344 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7345 return;
7347 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7348 if (x)
7350 int pred_val = INTVAL (XEXP (x, 0));
7352 if (pred_val < REG_BR_PROB_BASE * 45 / 100
7353 || pred_val > REG_BR_PROB_BASE * 55 / 100)
7355 int taken = pred_val > REG_BR_PROB_BASE / 2;
7356 int cputaken = final_forward_branch_p (current_output_insn) == 0;
7358 /* Emit hints only in the case default branch prediction
7359 heuristics would fail. */
7360 if (taken != cputaken)
7362 /* We use 3e (DS) prefix for taken branches and
7363 2e (CS) prefix for not taken branches. */
7364 if (taken)
7365 fputs ("ds ; ", file);
7366 else
7367 fputs ("cs ; ", file);
7371 return;
7373 default:
7374 output_operand_lossage ("invalid operand code '%c'", code);
7378 if (GET_CODE (x) == REG)
7379 print_reg (x, code, file);
7381 else if (GET_CODE (x) == MEM)
7383 /* No `byte ptr' prefix for call instructions. */
7384 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7386 const char * size;
7387 switch (GET_MODE_SIZE (GET_MODE (x)))
7389 case 1: size = "BYTE"; break;
7390 case 2: size = "WORD"; break;
7391 case 4: size = "DWORD"; break;
7392 case 8: size = "QWORD"; break;
7393 case 12: size = "XWORD"; break;
7394 case 16: size = "XMMWORD"; break;
7395 default:
7396 gcc_unreachable ();
7399 /* Check for explicit size override (codes 'b', 'w' and 'k') */
7400 if (code == 'b')
7401 size = "BYTE";
7402 else if (code == 'w')
7403 size = "WORD";
7404 else if (code == 'k')
7405 size = "DWORD";
7407 fputs (size, file);
7408 fputs (" PTR ", file);
7411 x = XEXP (x, 0);
7412 /* Avoid (%rip) for call operands. */
7413 if (CONSTANT_ADDRESS_P (x) && code == 'P'
7414 && GET_CODE (x) != CONST_INT)
7415 output_addr_const (file, x);
7416 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7417 output_operand_lossage ("invalid constraints for operand");
7418 else
7419 output_address (x);
7422 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7424 REAL_VALUE_TYPE r;
7425 long l;
7427 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7428 REAL_VALUE_TO_TARGET_SINGLE (r, l);
7430 if (ASSEMBLER_DIALECT == ASM_ATT)
7431 putc ('$', file);
7432 fprintf (file, "0x%08lx", l);
7435 /* These float cases don't actually occur as immediate operands. */
7436 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7438 char dstr[30];
7440 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7441 fprintf (file, "%s", dstr);
7444 else if (GET_CODE (x) == CONST_DOUBLE
7445 && GET_MODE (x) == XFmode)
7447 char dstr[30];
7449 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7450 fprintf (file, "%s", dstr);
7453 else
7455 /* We have patterns that allow zero sets of memory, for instance.
7456 In 64-bit mode, we should probably support all 8-byte vectors,
7457 since we can in fact encode that into an immediate. */
7458 if (GET_CODE (x) == CONST_VECTOR)
7460 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7461 x = const0_rtx;
7464 if (code != 'P')
7466 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7468 if (ASSEMBLER_DIALECT == ASM_ATT)
7469 putc ('$', file);
7471 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7472 || GET_CODE (x) == LABEL_REF)
7474 if (ASSEMBLER_DIALECT == ASM_ATT)
7475 putc ('$', file);
7476 else
7477 fputs ("OFFSET FLAT:", file);
7480 if (GET_CODE (x) == CONST_INT)
7481 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7482 else if (flag_pic)
7483 output_pic_addr_const (file, x, code);
7484 else
7485 output_addr_const (file, x);
7489 /* Print a memory operand whose address is ADDR. */
7491 void
7492 print_operand_address (FILE *file, rtx addr)
7494 struct ix86_address parts;
7495 rtx base, index, disp;
7496 int scale;
7497 int ok = ix86_decompose_address (addr, &parts);
7499 gcc_assert (ok);
7501 base = parts.base;
7502 index = parts.index;
7503 disp = parts.disp;
7504 scale = parts.scale;
7506 switch (parts.seg)
7508 case SEG_DEFAULT:
7509 break;
7510 case SEG_FS:
7511 case SEG_GS:
7512 if (USER_LABEL_PREFIX[0] == 0)
7513 putc ('%', file);
7514 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7515 break;
7516 default:
7517 gcc_unreachable ();
7520 if (!base && !index)
7522 /* Displacement only requires special attention. */
7524 if (GET_CODE (disp) == CONST_INT)
7526 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7528 if (USER_LABEL_PREFIX[0] == 0)
7529 putc ('%', file);
7530 fputs ("ds:", file);
7532 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7534 else if (flag_pic)
7535 output_pic_addr_const (file, disp, 0);
7536 else
7537 output_addr_const (file, disp);
7539 /* Use one byte shorter RIP relative addressing for 64bit mode. */
7540 if (TARGET_64BIT)
7542 if (GET_CODE (disp) == CONST
7543 && GET_CODE (XEXP (disp, 0)) == PLUS
7544 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7545 disp = XEXP (XEXP (disp, 0), 0);
7546 if (GET_CODE (disp) == LABEL_REF
7547 || (GET_CODE (disp) == SYMBOL_REF
7548 && SYMBOL_REF_TLS_MODEL (disp) == 0))
7549 fputs ("(%rip)", file);
7552 else
7554 if (ASSEMBLER_DIALECT == ASM_ATT)
7556 if (disp)
7558 if (flag_pic)
7559 output_pic_addr_const (file, disp, 0);
7560 else if (GET_CODE (disp) == LABEL_REF)
7561 output_asm_label (disp);
7562 else
7563 output_addr_const (file, disp);
7566 putc ('(', file);
7567 if (base)
7568 print_reg (base, 0, file);
7569 if (index)
7571 putc (',', file);
7572 print_reg (index, 0, file);
7573 if (scale != 1)
7574 fprintf (file, ",%d", scale);
7576 putc (')', file);
7578 else
7580 rtx offset = NULL_RTX;
7582 if (disp)
7584 /* Pull out the offset of a symbol; print any symbol itself. */
7585 if (GET_CODE (disp) == CONST
7586 && GET_CODE (XEXP (disp, 0)) == PLUS
7587 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7589 offset = XEXP (XEXP (disp, 0), 1);
7590 disp = gen_rtx_CONST (VOIDmode,
7591 XEXP (XEXP (disp, 0), 0));
7594 if (flag_pic)
7595 output_pic_addr_const (file, disp, 0);
7596 else if (GET_CODE (disp) == LABEL_REF)
7597 output_asm_label (disp);
7598 else if (GET_CODE (disp) == CONST_INT)
7599 offset = disp;
7600 else
7601 output_addr_const (file, disp);
7604 putc ('[', file);
7605 if (base)
7607 print_reg (base, 0, file);
7608 if (offset)
7610 if (INTVAL (offset) >= 0)
7611 putc ('+', file);
7612 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7615 else if (offset)
7616 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7617 else
7618 putc ('0', file);
7620 if (index)
7622 putc ('+', file);
7623 print_reg (index, 0, file);
7624 if (scale != 1)
7625 fprintf (file, "*%d", scale);
7627 putc (']', file);
7632 bool
7633 output_addr_const_extra (FILE *file, rtx x)
7635 rtx op;
7637 if (GET_CODE (x) != UNSPEC)
7638 return false;
7640 op = XVECEXP (x, 0, 0);
7641 switch (XINT (x, 1))
7643 case UNSPEC_GOTTPOFF:
7644 output_addr_const (file, op);
7645 /* FIXME: This might be @TPOFF in Sun ld. */
7646 fputs ("@GOTTPOFF", file);
7647 break;
7648 case UNSPEC_TPOFF:
7649 output_addr_const (file, op);
7650 fputs ("@TPOFF", file);
7651 break;
7652 case UNSPEC_NTPOFF:
7653 output_addr_const (file, op);
7654 if (TARGET_64BIT)
7655 fputs ("@TPOFF", file);
7656 else
7657 fputs ("@NTPOFF", file);
7658 break;
7659 case UNSPEC_DTPOFF:
7660 output_addr_const (file, op);
7661 fputs ("@DTPOFF", file);
7662 break;
7663 case UNSPEC_GOTNTPOFF:
7664 output_addr_const (file, op);
7665 if (TARGET_64BIT)
7666 fputs ("@GOTTPOFF(%rip)", file);
7667 else
7668 fputs ("@GOTNTPOFF", file);
7669 break;
7670 case UNSPEC_INDNTPOFF:
7671 output_addr_const (file, op);
7672 fputs ("@INDNTPOFF", file);
7673 break;
7675 default:
7676 return false;
7679 return true;
7682 /* Split one or more DImode RTL references into pairs of SImode
7683 references. The RTL can be REG, offsettable MEM, integer constant, or
7684 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7685 split and "num" is its length. lo_half and hi_half are output arrays
7686 that parallel "operands". */
7688 void
7689 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7691 while (num--)
7693 rtx op = operands[num];
7695 /* simplify_subreg refuse to split volatile memory addresses,
7696 but we still have to handle it. */
7697 if (GET_CODE (op) == MEM)
7699 lo_half[num] = adjust_address (op, SImode, 0);
7700 hi_half[num] = adjust_address (op, SImode, 4);
7702 else
7704 lo_half[num] = simplify_gen_subreg (SImode, op,
7705 GET_MODE (op) == VOIDmode
7706 ? DImode : GET_MODE (op), 0);
7707 hi_half[num] = simplify_gen_subreg (SImode, op,
7708 GET_MODE (op) == VOIDmode
7709 ? DImode : GET_MODE (op), 4);
7713 /* Split one or more TImode RTL references into pairs of DImode
7714 references. The RTL can be REG, offsettable MEM, integer constant, or
7715 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7716 split and "num" is its length. lo_half and hi_half are output arrays
7717 that parallel "operands". */
7719 void
7720 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7722 while (num--)
7724 rtx op = operands[num];
7726 /* simplify_subreg refuse to split volatile memory addresses, but we
7727 still have to handle it. */
7728 if (GET_CODE (op) == MEM)
7730 lo_half[num] = adjust_address (op, DImode, 0);
7731 hi_half[num] = adjust_address (op, DImode, 8);
7733 else
7735 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7736 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7741 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
7742 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7743 is the expression of the binary operation. The output may either be
7744 emitted here, or returned to the caller, like all output_* functions.
7746 There is no guarantee that the operands are the same mode, as they
7747 might be within FLOAT or FLOAT_EXTEND expressions. */
7749 #ifndef SYSV386_COMPAT
7750 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
7751 wants to fix the assemblers because that causes incompatibility
7752 with gcc. No-one wants to fix gcc because that causes
7753 incompatibility with assemblers... You can use the option of
7754 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7755 #define SYSV386_COMPAT 1
7756 #endif
7758 const char *
7759 output_387_binary_op (rtx insn, rtx *operands)
7761 static char buf[30];
7762 const char *p;
7763 const char *ssep;
7764 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
7766 #ifdef ENABLE_CHECKING
7767 /* Even if we do not want to check the inputs, this documents input
7768 constraints. Which helps in understanding the following code. */
7769 if (STACK_REG_P (operands[0])
7770 && ((REG_P (operands[1])
7771 && REGNO (operands[0]) == REGNO (operands[1])
7772 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7773 || (REG_P (operands[2])
7774 && REGNO (operands[0]) == REGNO (operands[2])
7775 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7776 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7777 ; /* ok */
7778 else
7779 gcc_assert (is_sse);
7780 #endif
7782 switch (GET_CODE (operands[3]))
7784 case PLUS:
7785 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7786 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7787 p = "fiadd";
7788 else
7789 p = "fadd";
7790 ssep = "add";
7791 break;
7793 case MINUS:
7794 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7795 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7796 p = "fisub";
7797 else
7798 p = "fsub";
7799 ssep = "sub";
7800 break;
7802 case MULT:
7803 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7804 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7805 p = "fimul";
7806 else
7807 p = "fmul";
7808 ssep = "mul";
7809 break;
7811 case DIV:
7812 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7813 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7814 p = "fidiv";
7815 else
7816 p = "fdiv";
7817 ssep = "div";
7818 break;
7820 default:
7821 gcc_unreachable ();
7824 if (is_sse)
7826 strcpy (buf, ssep);
7827 if (GET_MODE (operands[0]) == SFmode)
7828 strcat (buf, "ss\t{%2, %0|%0, %2}");
7829 else
7830 strcat (buf, "sd\t{%2, %0|%0, %2}");
7831 return buf;
7833 strcpy (buf, p);
7835 switch (GET_CODE (operands[3]))
7837 case MULT:
7838 case PLUS:
7839 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7841 rtx temp = operands[2];
7842 operands[2] = operands[1];
7843 operands[1] = temp;
7846 /* know operands[0] == operands[1]. */
7848 if (GET_CODE (operands[2]) == MEM)
7850 p = "%z2\t%2";
7851 break;
7854 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7856 if (STACK_TOP_P (operands[0]))
7857 /* How is it that we are storing to a dead operand[2]?
7858 Well, presumably operands[1] is dead too. We can't
7859 store the result to st(0) as st(0) gets popped on this
7860 instruction. Instead store to operands[2] (which I
7861 think has to be st(1)). st(1) will be popped later.
7862 gcc <= 2.8.1 didn't have this check and generated
7863 assembly code that the Unixware assembler rejected. */
7864 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7865 else
7866 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7867 break;
7870 if (STACK_TOP_P (operands[0]))
7871 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7872 else
7873 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7874 break;
7876 case MINUS:
7877 case DIV:
7878 if (GET_CODE (operands[1]) == MEM)
7880 p = "r%z1\t%1";
7881 break;
7884 if (GET_CODE (operands[2]) == MEM)
7886 p = "%z2\t%2";
7887 break;
7890 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7892 #if SYSV386_COMPAT
7893 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7894 derived assemblers, confusingly reverse the direction of
7895 the operation for fsub{r} and fdiv{r} when the
7896 destination register is not st(0). The Intel assembler
7897 doesn't have this brain damage. Read !SYSV386_COMPAT to
7898 figure out what the hardware really does. */
7899 if (STACK_TOP_P (operands[0]))
7900 p = "{p\t%0, %2|rp\t%2, %0}";
7901 else
7902 p = "{rp\t%2, %0|p\t%0, %2}";
7903 #else
7904 if (STACK_TOP_P (operands[0]))
7905 /* As above for fmul/fadd, we can't store to st(0). */
7906 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
7907 else
7908 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7909 #endif
7910 break;
7913 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7915 #if SYSV386_COMPAT
7916 if (STACK_TOP_P (operands[0]))
7917 p = "{rp\t%0, %1|p\t%1, %0}";
7918 else
7919 p = "{p\t%1, %0|rp\t%0, %1}";
7920 #else
7921 if (STACK_TOP_P (operands[0]))
7922 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7923 else
7924 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7925 #endif
7926 break;
7929 if (STACK_TOP_P (operands[0]))
7931 if (STACK_TOP_P (operands[1]))
7932 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
7933 else
7934 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
7935 break;
7937 else if (STACK_TOP_P (operands[1]))
7939 #if SYSV386_COMPAT
7940 p = "{\t%1, %0|r\t%0, %1}";
7941 #else
7942 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7943 #endif
7945 else
7947 #if SYSV386_COMPAT
7948 p = "{r\t%2, %0|\t%0, %2}";
7949 #else
7950 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7951 #endif
7953 break;
7955 default:
7956 gcc_unreachable ();
7959 strcat (buf, p);
7960 return buf;
7963 /* Return needed mode for entity in optimize_mode_switching pass. */
7966 ix86_mode_needed (int entity, rtx insn)
7968 enum attr_i387_cw mode;
7970 /* The mode UNINITIALIZED is used to store control word after a
7971 function call or ASM pattern. The mode ANY specify that function
7972 has no requirements on the control word and make no changes in the
7973 bits we are interested in. */
7975 if (CALL_P (insn)
7976 || (NONJUMP_INSN_P (insn)
7977 && (asm_noperands (PATTERN (insn)) >= 0
7978 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
7979 return I387_CW_UNINITIALIZED;
7981 if (recog_memoized (insn) < 0)
7982 return I387_CW_ANY;
7984 mode = get_attr_i387_cw (insn);
7986 switch (entity)
7988 case I387_TRUNC:
7989 if (mode == I387_CW_TRUNC)
7990 return mode;
7991 break;
7993 case I387_FLOOR:
7994 if (mode == I387_CW_FLOOR)
7995 return mode;
7996 break;
7998 case I387_CEIL:
7999 if (mode == I387_CW_CEIL)
8000 return mode;
8001 break;
8003 case I387_MASK_PM:
8004 if (mode == I387_CW_MASK_PM)
8005 return mode;
8006 break;
8008 default:
8009 gcc_unreachable ();
8012 return I387_CW_ANY;
8015 /* Output code to initialize control word copies used by trunc?f?i and
8016 rounding patterns. CURRENT_MODE is set to current control word,
8017 while NEW_MODE is set to new control word. */
8019 void
8020 emit_i387_cw_initialization (int mode)
8022 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8023 rtx new_mode;
8025 int slot;
8027 rtx reg = gen_reg_rtx (HImode);
8029 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8030 emit_move_insn (reg, stored_mode);
8032 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8034 switch (mode)
8036 case I387_CW_TRUNC:
8037 /* round toward zero (truncate) */
8038 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8039 slot = SLOT_CW_TRUNC;
8040 break;
8042 case I387_CW_FLOOR:
8043 /* round down toward -oo */
8044 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8045 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8046 slot = SLOT_CW_FLOOR;
8047 break;
8049 case I387_CW_CEIL:
8050 /* round up toward +oo */
8051 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8052 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8053 slot = SLOT_CW_CEIL;
8054 break;
8056 case I387_CW_MASK_PM:
8057 /* mask precision exception for nearbyint() */
8058 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8059 slot = SLOT_CW_MASK_PM;
8060 break;
8062 default:
8063 gcc_unreachable ();
8066 else
8068 switch (mode)
8070 case I387_CW_TRUNC:
8071 /* round toward zero (truncate) */
8072 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8073 slot = SLOT_CW_TRUNC;
8074 break;
8076 case I387_CW_FLOOR:
8077 /* round down toward -oo */
8078 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8079 slot = SLOT_CW_FLOOR;
8080 break;
8082 case I387_CW_CEIL:
8083 /* round up toward +oo */
8084 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8085 slot = SLOT_CW_CEIL;
8086 break;
8088 case I387_CW_MASK_PM:
8089 /* mask precision exception for nearbyint() */
8090 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8091 slot = SLOT_CW_MASK_PM;
8092 break;
8094 default:
8095 gcc_unreachable ();
8099 gcc_assert (slot < MAX_386_STACK_LOCALS);
8101 new_mode = assign_386_stack_local (HImode, slot);
8102 emit_move_insn (new_mode, reg);
8105 /* Output code for INSN to convert a float to a signed int. OPERANDS
8106 are the insn operands. The output may be [HSD]Imode and the input
8107 operand may be [SDX]Fmode. */
8109 const char *
8110 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8112 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8113 int dimode_p = GET_MODE (operands[0]) == DImode;
8114 int round_mode = get_attr_i387_cw (insn);
8116 /* Jump through a hoop or two for DImode, since the hardware has no
8117 non-popping instruction. We used to do this a different way, but
8118 that was somewhat fragile and broke with post-reload splitters. */
8119 if ((dimode_p || fisttp) && !stack_top_dies)
8120 output_asm_insn ("fld\t%y1", operands);
8122 gcc_assert (STACK_TOP_P (operands[1]));
8123 gcc_assert (GET_CODE (operands[0]) == MEM);
8125 if (fisttp)
8126 output_asm_insn ("fisttp%z0\t%0", operands);
8127 else
8129 if (round_mode != I387_CW_ANY)
8130 output_asm_insn ("fldcw\t%3", operands);
8131 if (stack_top_dies || dimode_p)
8132 output_asm_insn ("fistp%z0\t%0", operands);
8133 else
8134 output_asm_insn ("fist%z0\t%0", operands);
8135 if (round_mode != I387_CW_ANY)
8136 output_asm_insn ("fldcw\t%2", operands);
8139 return "";
8142 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
8143 should be used. UNORDERED_P is true when fucom should be used. */
8145 const char *
8146 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8148 int stack_top_dies;
8149 rtx cmp_op0, cmp_op1;
8150 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8152 if (eflags_p)
8154 cmp_op0 = operands[0];
8155 cmp_op1 = operands[1];
8157 else
8159 cmp_op0 = operands[1];
8160 cmp_op1 = operands[2];
8163 if (is_sse)
8165 if (GET_MODE (operands[0]) == SFmode)
8166 if (unordered_p)
8167 return "ucomiss\t{%1, %0|%0, %1}";
8168 else
8169 return "comiss\t{%1, %0|%0, %1}";
8170 else
8171 if (unordered_p)
8172 return "ucomisd\t{%1, %0|%0, %1}";
8173 else
8174 return "comisd\t{%1, %0|%0, %1}";
8177 gcc_assert (STACK_TOP_P (cmp_op0));
8179 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8181 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8183 if (stack_top_dies)
8185 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8186 return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
8188 else
8189 return "ftst\n\tfnstsw\t%0";
8192 if (STACK_REG_P (cmp_op1)
8193 && stack_top_dies
8194 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8195 && REGNO (cmp_op1) != FIRST_STACK_REG)
8197 /* If both the top of the 387 stack dies, and the other operand
8198 is also a stack register that dies, then this must be a
8199 `fcompp' float compare */
8201 if (eflags_p)
8203 /* There is no double popping fcomi variant. Fortunately,
8204 eflags is immune from the fstp's cc clobbering. */
8205 if (unordered_p)
8206 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8207 else
8208 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8209 return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
8211 else
8213 if (unordered_p)
8214 return "fucompp\n\tfnstsw\t%0";
8215 else
8216 return "fcompp\n\tfnstsw\t%0";
8219 else
8221 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
8223 static const char * const alt[16] =
8225 "fcom%z2\t%y2\n\tfnstsw\t%0",
8226 "fcomp%z2\t%y2\n\tfnstsw\t%0",
8227 "fucom%z2\t%y2\n\tfnstsw\t%0",
8228 "fucomp%z2\t%y2\n\tfnstsw\t%0",
8230 "ficom%z2\t%y2\n\tfnstsw\t%0",
8231 "ficomp%z2\t%y2\n\tfnstsw\t%0",
8232 NULL,
8233 NULL,
8235 "fcomi\t{%y1, %0|%0, %y1}",
8236 "fcomip\t{%y1, %0|%0, %y1}",
8237 "fucomi\t{%y1, %0|%0, %y1}",
8238 "fucomip\t{%y1, %0|%0, %y1}",
8240 NULL,
8241 NULL,
8242 NULL,
8243 NULL
8246 int mask;
8247 const char *ret;
8249 mask = eflags_p << 3;
8250 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8251 mask |= unordered_p << 1;
8252 mask |= stack_top_dies;
8254 gcc_assert (mask < 16);
8255 ret = alt[mask];
8256 gcc_assert (ret);
8258 return ret;
8262 void
8263 ix86_output_addr_vec_elt (FILE *file, int value)
8265 const char *directive = ASM_LONG;
8267 #ifdef ASM_QUAD
8268 if (TARGET_64BIT)
8269 directive = ASM_QUAD;
8270 #else
8271 gcc_assert (!TARGET_64BIT);
8272 #endif
8274 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8277 void
8278 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8280 if (TARGET_64BIT)
8281 fprintf (file, "%s%s%d-%s%d\n",
8282 ASM_LONG, LPREFIX, value, LPREFIX, rel);
8283 else if (HAVE_AS_GOTOFF_IN_DATA)
8284 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8285 #if TARGET_MACHO
8286 else if (TARGET_MACHO)
8288 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8289 machopic_output_function_base_name (file);
8290 fprintf(file, "\n");
8292 #endif
8293 else
8294 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8295 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8298 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8299 for the target. */
8301 void
8302 ix86_expand_clear (rtx dest)
8304 rtx tmp;
8306 /* We play register width games, which are only valid after reload. */
8307 gcc_assert (reload_completed);
8309 /* Avoid HImode and its attendant prefix byte. */
8310 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8311 dest = gen_rtx_REG (SImode, REGNO (dest));
8313 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8315 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
8316 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8318 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8319 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8322 emit_insn (tmp);
8325 /* X is an unchanging MEM. If it is a constant pool reference, return
8326 the constant pool rtx, else NULL. */
8329 maybe_get_pool_constant (rtx x)
8331 x = ix86_delegitimize_address (XEXP (x, 0));
8333 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8334 return get_pool_constant (x);
8336 return NULL_RTX;
8339 void
8340 ix86_expand_move (enum machine_mode mode, rtx operands[])
8342 int strict = (reload_in_progress || reload_completed);
8343 rtx op0, op1;
8344 enum tls_model model;
8346 op0 = operands[0];
8347 op1 = operands[1];
8349 if (GET_CODE (op1) == SYMBOL_REF)
8351 model = SYMBOL_REF_TLS_MODEL (op1);
8352 if (model)
8354 op1 = legitimize_tls_address (op1, model, true);
8355 op1 = force_operand (op1, op0);
8356 if (op1 == op0)
8357 return;
8360 else if (GET_CODE (op1) == CONST
8361 && GET_CODE (XEXP (op1, 0)) == PLUS
8362 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8364 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8365 if (model)
8367 rtx addend = XEXP (XEXP (op1, 0), 1);
8368 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8369 op1 = force_operand (op1, NULL);
8370 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8371 op0, 1, OPTAB_DIRECT);
8372 if (op1 == op0)
8373 return;
8377 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8379 #if TARGET_MACHO
8380 if (MACHOPIC_PURE)
8382 rtx temp = ((reload_in_progress
8383 || ((op0 && GET_CODE (op0) == REG)
8384 && mode == Pmode))
8385 ? op0 : gen_reg_rtx (Pmode));
8386 op1 = machopic_indirect_data_reference (op1, temp);
8387 op1 = machopic_legitimize_pic_address (op1, mode,
8388 temp == op1 ? 0 : temp);
8390 else if (MACHOPIC_INDIRECT)
8391 op1 = machopic_indirect_data_reference (op1, 0);
8392 if (op0 == op1)
8393 return;
8394 #else
8395 if (GET_CODE (op0) == MEM)
8396 op1 = force_reg (Pmode, op1);
8397 else
8398 op1 = legitimize_address (op1, op1, Pmode);
8399 #endif /* TARGET_MACHO */
8401 else
8403 if (GET_CODE (op0) == MEM
8404 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8405 || !push_operand (op0, mode))
8406 && GET_CODE (op1) == MEM)
8407 op1 = force_reg (mode, op1);
8409 if (push_operand (op0, mode)
8410 && ! general_no_elim_operand (op1, mode))
8411 op1 = copy_to_mode_reg (mode, op1);
8413 /* Force large constants in 64bit compilation into register
8414 to get them CSEed. */
8415 if (TARGET_64BIT && mode == DImode
8416 && immediate_operand (op1, mode)
8417 && !x86_64_zext_immediate_operand (op1, VOIDmode)
8418 && !register_operand (op0, mode)
8419 && optimize && !reload_completed && !reload_in_progress)
8420 op1 = copy_to_mode_reg (mode, op1);
8422 if (FLOAT_MODE_P (mode))
8424 /* If we are loading a floating point constant to a register,
8425 force the value to memory now, since we'll get better code
8426 out the back end. */
8428 if (strict)
8430 else if (GET_CODE (op1) == CONST_DOUBLE)
8432 op1 = validize_mem (force_const_mem (mode, op1));
8433 if (!register_operand (op0, mode))
8435 rtx temp = gen_reg_rtx (mode);
8436 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8437 emit_move_insn (op0, temp);
8438 return;
8444 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8447 void
8448 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8450 rtx op0 = operands[0], op1 = operands[1];
8452 /* Force constants other than zero into memory. We do not know how
8453 the instructions used to build constants modify the upper 64 bits
8454 of the register, once we have that information we may be able
8455 to handle some of them more efficiently. */
8456 if ((reload_in_progress | reload_completed) == 0
8457 && register_operand (op0, mode)
8458 && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
8459 op1 = validize_mem (force_const_mem (mode, op1));
8461 /* Make operand1 a register if it isn't already. */
8462 if (!no_new_pseudos
8463 && !register_operand (op0, mode)
8464 && !register_operand (op1, mode))
8466 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
8467 return;
8470 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8473 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
8474 straight to ix86_expand_vector_move. */
8476 void
8477 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
8479 rtx op0, op1, m;
8481 op0 = operands[0];
8482 op1 = operands[1];
8484 if (MEM_P (op1))
8486 /* If we're optimizing for size, movups is the smallest. */
8487 if (optimize_size)
8489 op0 = gen_lowpart (V4SFmode, op0);
8490 op1 = gen_lowpart (V4SFmode, op1);
8491 emit_insn (gen_sse_movups (op0, op1));
8492 return;
8495 /* ??? If we have typed data, then it would appear that using
8496 movdqu is the only way to get unaligned data loaded with
8497 integer type. */
8498 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8500 op0 = gen_lowpart (V16QImode, op0);
8501 op1 = gen_lowpart (V16QImode, op1);
8502 emit_insn (gen_sse2_movdqu (op0, op1));
8503 return;
8506 if (TARGET_SSE2 && mode == V2DFmode)
8508 rtx zero;
8510 /* When SSE registers are split into halves, we can avoid
8511 writing to the top half twice. */
8512 if (TARGET_SSE_SPLIT_REGS)
8514 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8515 zero = op0;
8517 else
8519 /* ??? Not sure about the best option for the Intel chips.
8520 The following would seem to satisfy; the register is
8521 entirely cleared, breaking the dependency chain. We
8522 then store to the upper half, with a dependency depth
8523 of one. A rumor has it that Intel recommends two movsd
8524 followed by an unpacklpd, but this is unconfirmed. And
8525 given that the dependency depth of the unpacklpd would
8526 still be one, I'm not sure why this would be better. */
8527 zero = CONST0_RTX (V2DFmode);
8530 m = adjust_address (op1, DFmode, 0);
8531 emit_insn (gen_sse2_loadlpd (op0, zero, m));
8532 m = adjust_address (op1, DFmode, 8);
8533 emit_insn (gen_sse2_loadhpd (op0, op0, m));
8535 else
8537 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
8538 emit_move_insn (op0, CONST0_RTX (mode));
8539 else
8540 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8542 if (mode != V4SFmode)
8543 op0 = gen_lowpart (V4SFmode, op0);
8544 m = adjust_address (op1, V2SFmode, 0);
8545 emit_insn (gen_sse_loadlps (op0, op0, m));
8546 m = adjust_address (op1, V2SFmode, 8);
8547 emit_insn (gen_sse_loadhps (op0, op0, m));
8550 else if (MEM_P (op0))
8552 /* If we're optimizing for size, movups is the smallest. */
8553 if (optimize_size)
8555 op0 = gen_lowpart (V4SFmode, op0);
8556 op1 = gen_lowpart (V4SFmode, op1);
8557 emit_insn (gen_sse_movups (op0, op1));
8558 return;
8561 /* ??? Similar to above, only less clear because of quote
8562 typeless stores unquote. */
8563 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
8564 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8566 op0 = gen_lowpart (V16QImode, op0);
8567 op1 = gen_lowpart (V16QImode, op1);
8568 emit_insn (gen_sse2_movdqu (op0, op1));
8569 return;
8572 if (TARGET_SSE2 && mode == V2DFmode)
8574 m = adjust_address (op0, DFmode, 0);
8575 emit_insn (gen_sse2_storelpd (m, op1));
8576 m = adjust_address (op0, DFmode, 8);
8577 emit_insn (gen_sse2_storehpd (m, op1));
8579 else
8581 if (mode != V4SFmode)
8582 op1 = gen_lowpart (V4SFmode, op1);
8583 m = adjust_address (op0, V2SFmode, 0);
8584 emit_insn (gen_sse_storelps (m, op1));
8585 m = adjust_address (op0, V2SFmode, 8);
8586 emit_insn (gen_sse_storehps (m, op1));
8589 else
8590 gcc_unreachable ();
8593 /* Expand a push in MODE. This is some mode for which we do not support
8594 proper push instructions, at least from the registers that we expect
8595 the value to live in. */
8597 void
8598 ix86_expand_push (enum machine_mode mode, rtx x)
8600 rtx tmp;
8602 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
8603 GEN_INT (-GET_MODE_SIZE (mode)),
8604 stack_pointer_rtx, 1, OPTAB_DIRECT);
8605 if (tmp != stack_pointer_rtx)
8606 emit_move_insn (stack_pointer_rtx, tmp);
8608 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
8609 emit_move_insn (tmp, x);
8612 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
8613 destination to use for the operation. If different from the true
8614 destination in operands[0], a copy operation will be required. */
8617 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
8618 rtx operands[])
8620 int matching_memory;
8621 rtx src1, src2, dst;
8623 dst = operands[0];
8624 src1 = operands[1];
8625 src2 = operands[2];
8627 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8628 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8629 && (rtx_equal_p (dst, src2)
8630 || immediate_operand (src1, mode)))
8632 rtx temp = src1;
8633 src1 = src2;
8634 src2 = temp;
8637 /* If the destination is memory, and we do not have matching source
8638 operands, do things in registers. */
8639 matching_memory = 0;
8640 if (GET_CODE (dst) == MEM)
8642 if (rtx_equal_p (dst, src1))
8643 matching_memory = 1;
8644 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8645 && rtx_equal_p (dst, src2))
8646 matching_memory = 2;
8647 else
8648 dst = gen_reg_rtx (mode);
8651 /* Both source operands cannot be in memory. */
8652 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8654 if (matching_memory != 2)
8655 src2 = force_reg (mode, src2);
8656 else
8657 src1 = force_reg (mode, src1);
8660 /* If the operation is not commutable, source 1 cannot be a constant
8661 or non-matching memory. */
8662 if ((CONSTANT_P (src1)
8663 || (!matching_memory && GET_CODE (src1) == MEM))
8664 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8665 src1 = force_reg (mode, src1);
8667 src1 = operands[1] = src1;
8668 src2 = operands[2] = src2;
8669 return dst;
8672 /* Similarly, but assume that the destination has already been
8673 set up properly. */
8675 void
8676 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
8677 enum machine_mode mode, rtx operands[])
8679 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
8680 gcc_assert (dst == operands[0]);
8683 /* Attempt to expand a binary operator. Make the expansion closer to the
8684 actual machine, then just general_operand, which will allow 3 separate
8685 memory references (one output, two input) in a single insn. */
8687 void
8688 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8689 rtx operands[])
8691 rtx src1, src2, dst, op, clob;
8693 dst = ix86_fixup_binary_operands (code, mode, operands);
8694 src1 = operands[1];
8695 src2 = operands[2];
8697 /* Emit the instruction. */
8699 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8700 if (reload_in_progress)
8702 /* Reload doesn't know about the flags register, and doesn't know that
8703 it doesn't want to clobber it. We can only do this with PLUS. */
8704 gcc_assert (code == PLUS);
8705 emit_insn (op);
8707 else
8709 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8710 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8713 /* Fix up the destination if needed. */
8714 if (dst != operands[0])
8715 emit_move_insn (operands[0], dst);
8718 /* Return TRUE or FALSE depending on whether the binary operator meets the
8719 appropriate constraints. */
8722 ix86_binary_operator_ok (enum rtx_code code,
8723 enum machine_mode mode ATTRIBUTE_UNUSED,
8724 rtx operands[3])
8726 /* Both source operands cannot be in memory. */
8727 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8728 return 0;
8729 /* If the operation is not commutable, source 1 cannot be a constant. */
8730 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8731 return 0;
8732 /* If the destination is memory, we must have a matching source operand. */
8733 if (GET_CODE (operands[0]) == MEM
8734 && ! (rtx_equal_p (operands[0], operands[1])
8735 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8736 && rtx_equal_p (operands[0], operands[2]))))
8737 return 0;
8738 /* If the operation is not commutable and the source 1 is memory, we must
8739 have a matching destination. */
8740 if (GET_CODE (operands[1]) == MEM
8741 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8742 && ! rtx_equal_p (operands[0], operands[1]))
8743 return 0;
8744 return 1;
8747 /* Attempt to expand a unary operator. Make the expansion closer to the
8748 actual machine, then just general_operand, which will allow 2 separate
8749 memory references (one output, one input) in a single insn. */
8751 void
8752 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8753 rtx operands[])
8755 int matching_memory;
8756 rtx src, dst, op, clob;
8758 dst = operands[0];
8759 src = operands[1];
8761 /* If the destination is memory, and we do not have matching source
8762 operands, do things in registers. */
8763 matching_memory = 0;
8764 if (MEM_P (dst))
8766 if (rtx_equal_p (dst, src))
8767 matching_memory = 1;
8768 else
8769 dst = gen_reg_rtx (mode);
8772 /* When source operand is memory, destination must match. */
8773 if (MEM_P (src) && !matching_memory)
8774 src = force_reg (mode, src);
8776 /* Emit the instruction. */
8778 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8779 if (reload_in_progress || code == NOT)
8781 /* Reload doesn't know about the flags register, and doesn't know that
8782 it doesn't want to clobber it. */
8783 gcc_assert (code == NOT);
8784 emit_insn (op);
8786 else
8788 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8789 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8792 /* Fix up the destination if needed. */
8793 if (dst != operands[0])
8794 emit_move_insn (operands[0], dst);
8797 /* Return TRUE or FALSE depending on whether the unary operator meets the
8798 appropriate constraints. */
8801 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8802 enum machine_mode mode ATTRIBUTE_UNUSED,
8803 rtx operands[2] ATTRIBUTE_UNUSED)
8805 /* If one of operands is memory, source and destination must match. */
8806 if ((GET_CODE (operands[0]) == MEM
8807 || GET_CODE (operands[1]) == MEM)
8808 && ! rtx_equal_p (operands[0], operands[1]))
8809 return FALSE;
8810 return TRUE;
8813 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
8814 Create a mask for the sign bit in MODE for an SSE register. If VECT is
8815 true, then replicate the mask for all elements of the vector register.
8816 If INVERT is true, then create a mask excluding the sign bit. */
8819 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
8821 enum machine_mode vec_mode;
8822 HOST_WIDE_INT hi, lo;
8823 int shift = 63;
8824 rtvec v;
8825 rtx mask;
8827 /* Find the sign bit, sign extended to 2*HWI. */
8828 if (mode == SFmode)
8829 lo = 0x80000000, hi = lo < 0;
8830 else if (HOST_BITS_PER_WIDE_INT >= 64)
8831 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8832 else
8833 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8835 if (invert)
8836 lo = ~lo, hi = ~hi;
8838 /* Force this value into the low part of a fp vector constant. */
8839 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
8840 mask = gen_lowpart (mode, mask);
8842 if (mode == SFmode)
8844 if (vect)
8845 v = gen_rtvec (4, mask, mask, mask, mask);
8846 else
8847 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8848 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8849 vec_mode = V4SFmode;
8851 else
8853 if (vect)
8854 v = gen_rtvec (2, mask, mask);
8855 else
8856 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8857 vec_mode = V2DFmode;
8860 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
8863 /* Generate code for floating point ABS or NEG. */
8865 void
8866 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
8867 rtx operands[])
8869 rtx mask, set, use, clob, dst, src;
8870 bool matching_memory;
8871 bool use_sse = false;
8872 bool vector_mode = VECTOR_MODE_P (mode);
8873 enum machine_mode elt_mode = mode;
8875 if (vector_mode)
8877 elt_mode = GET_MODE_INNER (mode);
8878 use_sse = true;
8880 else if (TARGET_SSE_MATH)
8881 use_sse = SSE_FLOAT_MODE_P (mode);
8883 /* NEG and ABS performed with SSE use bitwise mask operations.
8884 Create the appropriate mask now. */
8885 if (use_sse)
8886 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
8887 else
8889 /* When not using SSE, we don't use the mask, but prefer to keep the
8890 same general form of the insn pattern to reduce duplication when
8891 it comes time to split. */
8892 mask = const0_rtx;
8895 dst = operands[0];
8896 src = operands[1];
8898 /* If the destination is memory, and we don't have matching source
8899 operands, do things in registers. */
8900 matching_memory = false;
8901 if (MEM_P (dst))
8903 if (rtx_equal_p (dst, src))
8904 matching_memory = true;
8905 else
8906 dst = gen_reg_rtx (mode);
8908 if (MEM_P (src) && !matching_memory)
8909 src = force_reg (mode, src);
8911 if (vector_mode)
8913 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8914 set = gen_rtx_SET (VOIDmode, dst, set);
8915 emit_insn (set);
8917 else
8919 set = gen_rtx_fmt_e (code, mode, src);
8920 set = gen_rtx_SET (VOIDmode, dst, set);
8921 use = gen_rtx_USE (VOIDmode, mask);
8922 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8923 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8926 if (dst != operands[0])
8927 emit_move_insn (operands[0], dst);
8930 /* Expand a copysign operation. Special case operand 0 being a constant. */
8932 void
8933 ix86_expand_copysign (rtx operands[])
8935 enum machine_mode mode, vmode;
8936 rtx dest, op0, op1, mask, nmask;
8938 dest = operands[0];
8939 op0 = operands[1];
8940 op1 = operands[2];
8942 mode = GET_MODE (dest);
8943 vmode = mode == SFmode ? V4SFmode : V2DFmode;
8945 if (GET_CODE (op0) == CONST_DOUBLE)
8947 rtvec v;
8949 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
8950 op0 = simplify_unary_operation (ABS, mode, op0, mode);
8952 if (op0 == CONST0_RTX (mode))
8953 op0 = CONST0_RTX (vmode);
8954 else
8956 if (mode == SFmode)
8957 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
8958 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8959 else
8960 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
8961 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
8964 mask = ix86_build_signbit_mask (mode, 0, 0);
8966 if (mode == SFmode)
8967 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
8968 else
8969 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
8971 else
8973 nmask = ix86_build_signbit_mask (mode, 0, 1);
8974 mask = ix86_build_signbit_mask (mode, 0, 0);
8976 if (mode == SFmode)
8977 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
8978 else
8979 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
8983 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
8984 be a constant, and so has already been expanded into a vector constant. */
8986 void
8987 ix86_split_copysign_const (rtx operands[])
8989 enum machine_mode mode, vmode;
8990 rtx dest, op0, op1, mask, x;
8992 dest = operands[0];
8993 op0 = operands[1];
8994 op1 = operands[2];
8995 mask = operands[3];
8997 mode = GET_MODE (dest);
8998 vmode = GET_MODE (mask);
9000 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9001 x = gen_rtx_AND (vmode, dest, mask);
9002 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9004 if (op0 != CONST0_RTX (vmode))
9006 x = gen_rtx_IOR (vmode, dest, op0);
9007 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9011 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9012 so we have to do two masks. */
9014 void
9015 ix86_split_copysign_var (rtx operands[])
9017 enum machine_mode mode, vmode;
9018 rtx dest, scratch, op0, op1, mask, nmask, x;
9020 dest = operands[0];
9021 scratch = operands[1];
9022 op0 = operands[2];
9023 op1 = operands[3];
9024 nmask = operands[4];
9025 mask = operands[5];
9027 mode = GET_MODE (dest);
9028 vmode = GET_MODE (mask);
9030 if (rtx_equal_p (op0, op1))
9032 /* Shouldn't happen often (it's useless, obviously), but when it does
9033 we'd generate incorrect code if we continue below. */
9034 emit_move_insn (dest, op0);
9035 return;
9038 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9040 gcc_assert (REGNO (op1) == REGNO (scratch));
9042 x = gen_rtx_AND (vmode, scratch, mask);
9043 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9045 dest = mask;
9046 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9047 x = gen_rtx_NOT (vmode, dest);
9048 x = gen_rtx_AND (vmode, x, op0);
9049 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9051 else
9053 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9055 x = gen_rtx_AND (vmode, scratch, mask);
9057 else /* alternative 2,4 */
9059 gcc_assert (REGNO (mask) == REGNO (scratch));
9060 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9061 x = gen_rtx_AND (vmode, scratch, op1);
9063 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9065 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9067 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9068 x = gen_rtx_AND (vmode, dest, nmask);
9070 else /* alternative 3,4 */
9072 gcc_assert (REGNO (nmask) == REGNO (dest));
9073 dest = nmask;
9074 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9075 x = gen_rtx_AND (vmode, dest, op0);
9077 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9080 x = gen_rtx_IOR (vmode, dest, scratch);
9081 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9084 /* Return TRUE or FALSE depending on whether the first SET in INSN
9085 has source and destination with matching CC modes, and that the
9086 CC mode is at least as constrained as REQ_MODE. */
9089 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9091 rtx set;
9092 enum machine_mode set_mode;
9094 set = PATTERN (insn);
9095 if (GET_CODE (set) == PARALLEL)
9096 set = XVECEXP (set, 0, 0);
9097 gcc_assert (GET_CODE (set) == SET);
9098 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9100 set_mode = GET_MODE (SET_DEST (set));
9101 switch (set_mode)
9103 case CCNOmode:
9104 if (req_mode != CCNOmode
9105 && (req_mode != CCmode
9106 || XEXP (SET_SRC (set), 1) != const0_rtx))
9107 return 0;
9108 break;
9109 case CCmode:
9110 if (req_mode == CCGCmode)
9111 return 0;
9112 /* FALLTHRU */
9113 case CCGCmode:
9114 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9115 return 0;
9116 /* FALLTHRU */
9117 case CCGOCmode:
9118 if (req_mode == CCZmode)
9119 return 0;
9120 /* FALLTHRU */
9121 case CCZmode:
9122 break;
9124 default:
9125 gcc_unreachable ();
9128 return (GET_MODE (SET_SRC (set)) == set_mode);
9131 /* Generate insn patterns to do an integer compare of OPERANDS. */
9133 static rtx
9134 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9136 enum machine_mode cmpmode;
9137 rtx tmp, flags;
9139 cmpmode = SELECT_CC_MODE (code, op0, op1);
9140 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9142 /* This is very simple, but making the interface the same as in the
9143 FP case makes the rest of the code easier. */
9144 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9145 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9147 /* Return the test that should be put into the flags user, i.e.
9148 the bcc, scc, or cmov instruction. */
9149 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9152 /* Figure out whether to use ordered or unordered fp comparisons.
9153 Return the appropriate mode to use. */
9155 enum machine_mode
9156 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9158 /* ??? In order to make all comparisons reversible, we do all comparisons
9159 non-trapping when compiling for IEEE. Once gcc is able to distinguish
9160 all forms trapping and nontrapping comparisons, we can make inequality
9161 comparisons trapping again, since it results in better code when using
9162 FCOM based compares. */
9163 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9166 enum machine_mode
9167 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9169 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9170 return ix86_fp_compare_mode (code);
9171 switch (code)
9173 /* Only zero flag is needed. */
9174 case EQ: /* ZF=0 */
9175 case NE: /* ZF!=0 */
9176 return CCZmode;
9177 /* Codes needing carry flag. */
9178 case GEU: /* CF=0 */
9179 case GTU: /* CF=0 & ZF=0 */
9180 case LTU: /* CF=1 */
9181 case LEU: /* CF=1 | ZF=1 */
9182 return CCmode;
9183 /* Codes possibly doable only with sign flag when
9184 comparing against zero. */
9185 case GE: /* SF=OF or SF=0 */
9186 case LT: /* SF<>OF or SF=1 */
9187 if (op1 == const0_rtx)
9188 return CCGOCmode;
9189 else
9190 /* For other cases Carry flag is not required. */
9191 return CCGCmode;
9192 /* Codes doable only with sign flag when comparing
9193 against zero, but we miss jump instruction for it
9194 so we need to use relational tests against overflow
9195 that thus needs to be zero. */
9196 case GT: /* ZF=0 & SF=OF */
9197 case LE: /* ZF=1 | SF<>OF */
9198 if (op1 == const0_rtx)
9199 return CCNOmode;
9200 else
9201 return CCGCmode;
9202 /* strcmp pattern do (use flags) and combine may ask us for proper
9203 mode. */
9204 case USE:
9205 return CCmode;
9206 default:
9207 gcc_unreachable ();
9211 /* Return the fixed registers used for condition codes. */
9213 static bool
9214 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9216 *p1 = FLAGS_REG;
9217 *p2 = FPSR_REG;
9218 return true;
9221 /* If two condition code modes are compatible, return a condition code
9222 mode which is compatible with both. Otherwise, return
9223 VOIDmode. */
9225 static enum machine_mode
9226 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9228 if (m1 == m2)
9229 return m1;
9231 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9232 return VOIDmode;
9234 if ((m1 == CCGCmode && m2 == CCGOCmode)
9235 || (m1 == CCGOCmode && m2 == CCGCmode))
9236 return CCGCmode;
9238 switch (m1)
9240 default:
9241 gcc_unreachable ();
9243 case CCmode:
9244 case CCGCmode:
9245 case CCGOCmode:
9246 case CCNOmode:
9247 case CCZmode:
9248 switch (m2)
9250 default:
9251 return VOIDmode;
9253 case CCmode:
9254 case CCGCmode:
9255 case CCGOCmode:
9256 case CCNOmode:
9257 case CCZmode:
9258 return CCmode;
9261 case CCFPmode:
9262 case CCFPUmode:
9263 /* These are only compatible with themselves, which we already
9264 checked above. */
9265 return VOIDmode;
9269 /* Return true if we should use an FCOMI instruction for this fp comparison. */
9272 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9274 enum rtx_code swapped_code = swap_condition (code);
9275 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9276 || (ix86_fp_comparison_cost (swapped_code)
9277 == ix86_fp_comparison_fcomi_cost (swapped_code)));
9280 /* Swap, force into registers, or otherwise massage the two operands
9281 to a fp comparison. The operands are updated in place; the new
9282 comparison code is returned. */
9284 static enum rtx_code
9285 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9287 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9288 rtx op0 = *pop0, op1 = *pop1;
9289 enum machine_mode op_mode = GET_MODE (op0);
9290 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9292 /* All of the unordered compare instructions only work on registers.
9293 The same is true of the fcomi compare instructions. The XFmode
9294 compare instructions require registers except when comparing
9295 against zero or when converting operand 1 from fixed point to
9296 floating point. */
9298 if (!is_sse
9299 && (fpcmp_mode == CCFPUmode
9300 || (op_mode == XFmode
9301 && ! (standard_80387_constant_p (op0) == 1
9302 || standard_80387_constant_p (op1) == 1)
9303 && GET_CODE (op1) != FLOAT)
9304 || ix86_use_fcomi_compare (code)))
9306 op0 = force_reg (op_mode, op0);
9307 op1 = force_reg (op_mode, op1);
9309 else
9311 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
9312 things around if they appear profitable, otherwise force op0
9313 into a register. */
9315 if (standard_80387_constant_p (op0) == 0
9316 || (GET_CODE (op0) == MEM
9317 && ! (standard_80387_constant_p (op1) == 0
9318 || GET_CODE (op1) == MEM)))
9320 rtx tmp;
9321 tmp = op0, op0 = op1, op1 = tmp;
9322 code = swap_condition (code);
9325 if (GET_CODE (op0) != REG)
9326 op0 = force_reg (op_mode, op0);
9328 if (CONSTANT_P (op1))
9330 int tmp = standard_80387_constant_p (op1);
9331 if (tmp == 0)
9332 op1 = validize_mem (force_const_mem (op_mode, op1));
9333 else if (tmp == 1)
9335 if (TARGET_CMOVE)
9336 op1 = force_reg (op_mode, op1);
9338 else
9339 op1 = force_reg (op_mode, op1);
9343 /* Try to rearrange the comparison to make it cheaper. */
9344 if (ix86_fp_comparison_cost (code)
9345 > ix86_fp_comparison_cost (swap_condition (code))
9346 && (GET_CODE (op1) == REG || !no_new_pseudos))
9348 rtx tmp;
9349 tmp = op0, op0 = op1, op1 = tmp;
9350 code = swap_condition (code);
9351 if (GET_CODE (op0) != REG)
9352 op0 = force_reg (op_mode, op0);
9355 *pop0 = op0;
9356 *pop1 = op1;
9357 return code;
9360 /* Convert comparison codes we use to represent FP comparison to integer
9361 code that will result in proper branch. Return UNKNOWN if no such code
9362 is available. */
9364 enum rtx_code
9365 ix86_fp_compare_code_to_integer (enum rtx_code code)
9367 switch (code)
9369 case GT:
9370 return GTU;
9371 case GE:
9372 return GEU;
9373 case ORDERED:
9374 case UNORDERED:
9375 return code;
9376 break;
9377 case UNEQ:
9378 return EQ;
9379 break;
9380 case UNLT:
9381 return LTU;
9382 break;
9383 case UNLE:
9384 return LEU;
9385 break;
9386 case LTGT:
9387 return NE;
9388 break;
9389 default:
9390 return UNKNOWN;
9394 /* Split comparison code CODE into comparisons we can do using branch
9395 instructions. BYPASS_CODE is comparison code for branch that will
9396 branch around FIRST_CODE and SECOND_CODE. If some of branches
9397 is not required, set value to UNKNOWN.
9398 We never require more than two branches. */
9400 void
9401 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9402 enum rtx_code *first_code,
9403 enum rtx_code *second_code)
9405 *first_code = code;
9406 *bypass_code = UNKNOWN;
9407 *second_code = UNKNOWN;
9409 /* The fcomi comparison sets flags as follows:
9411 cmp ZF PF CF
9412 > 0 0 0
9413 < 0 0 1
9414 = 1 0 0
9415 un 1 1 1 */
9417 switch (code)
9419 case GT: /* GTU - CF=0 & ZF=0 */
9420 case GE: /* GEU - CF=0 */
9421 case ORDERED: /* PF=0 */
9422 case UNORDERED: /* PF=1 */
9423 case UNEQ: /* EQ - ZF=1 */
9424 case UNLT: /* LTU - CF=1 */
9425 case UNLE: /* LEU - CF=1 | ZF=1 */
9426 case LTGT: /* EQ - ZF=0 */
9427 break;
9428 case LT: /* LTU - CF=1 - fails on unordered */
9429 *first_code = UNLT;
9430 *bypass_code = UNORDERED;
9431 break;
9432 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
9433 *first_code = UNLE;
9434 *bypass_code = UNORDERED;
9435 break;
9436 case EQ: /* EQ - ZF=1 - fails on unordered */
9437 *first_code = UNEQ;
9438 *bypass_code = UNORDERED;
9439 break;
9440 case NE: /* NE - ZF=0 - fails on unordered */
9441 *first_code = LTGT;
9442 *second_code = UNORDERED;
9443 break;
9444 case UNGE: /* GEU - CF=0 - fails on unordered */
9445 *first_code = GE;
9446 *second_code = UNORDERED;
9447 break;
9448 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
9449 *first_code = GT;
9450 *second_code = UNORDERED;
9451 break;
9452 default:
9453 gcc_unreachable ();
9455 if (!TARGET_IEEE_FP)
9457 *second_code = UNKNOWN;
9458 *bypass_code = UNKNOWN;
9462 /* Return cost of comparison done fcom + arithmetics operations on AX.
9463 All following functions do use number of instructions as a cost metrics.
9464 In future this should be tweaked to compute bytes for optimize_size and
9465 take into account performance of various instructions on various CPUs. */
9466 static int
9467 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9469 if (!TARGET_IEEE_FP)
9470 return 4;
9471 /* The cost of code output by ix86_expand_fp_compare. */
9472 switch (code)
9474 case UNLE:
9475 case UNLT:
9476 case LTGT:
9477 case GT:
9478 case GE:
9479 case UNORDERED:
9480 case ORDERED:
9481 case UNEQ:
9482 return 4;
9483 break;
9484 case LT:
9485 case NE:
9486 case EQ:
9487 case UNGE:
9488 return 5;
9489 break;
9490 case LE:
9491 case UNGT:
9492 return 6;
9493 break;
9494 default:
9495 gcc_unreachable ();
9499 /* Return cost of comparison done using fcomi operation.
9500 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9501 static int
9502 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9504 enum rtx_code bypass_code, first_code, second_code;
9505 /* Return arbitrarily high cost when instruction is not supported - this
9506 prevents gcc from using it. */
9507 if (!TARGET_CMOVE)
9508 return 1024;
9509 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9510 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9513 /* Return cost of comparison done using sahf operation.
9514 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9515 static int
9516 ix86_fp_comparison_sahf_cost (enum rtx_code code)
9518 enum rtx_code bypass_code, first_code, second_code;
9519 /* Return arbitrarily high cost when instruction is not preferred - this
9520 avoids gcc from using it. */
9521 if (!TARGET_USE_SAHF && !optimize_size)
9522 return 1024;
9523 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9524 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9527 /* Compute cost of the comparison done using any method.
9528 See ix86_fp_comparison_arithmetics_cost for the metrics. */
9529 static int
9530 ix86_fp_comparison_cost (enum rtx_code code)
9532 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9533 int min;
9535 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9536 sahf_cost = ix86_fp_comparison_sahf_cost (code);
9538 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9539 if (min > sahf_cost)
9540 min = sahf_cost;
9541 if (min > fcomi_cost)
9542 min = fcomi_cost;
9543 return min;
9546 /* Generate insn patterns to do a floating point compare of OPERANDS. */
9548 static rtx
9549 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9550 rtx *second_test, rtx *bypass_test)
9552 enum machine_mode fpcmp_mode, intcmp_mode;
9553 rtx tmp, tmp2;
9554 int cost = ix86_fp_comparison_cost (code);
9555 enum rtx_code bypass_code, first_code, second_code;
9557 fpcmp_mode = ix86_fp_compare_mode (code);
9558 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9560 if (second_test)
9561 *second_test = NULL_RTX;
9562 if (bypass_test)
9563 *bypass_test = NULL_RTX;
9565 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9567 /* Do fcomi/sahf based test when profitable. */
9568 if ((bypass_code == UNKNOWN || bypass_test)
9569 && (second_code == UNKNOWN || second_test)
9570 && ix86_fp_comparison_arithmetics_cost (code) > cost)
9572 if (TARGET_CMOVE)
9574 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9575 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9576 tmp);
9577 emit_insn (tmp);
9579 else
9581 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9582 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9583 if (!scratch)
9584 scratch = gen_reg_rtx (HImode);
9585 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9586 emit_insn (gen_x86_sahf_1 (scratch));
9589 /* The FP codes work out to act like unsigned. */
9590 intcmp_mode = fpcmp_mode;
9591 code = first_code;
9592 if (bypass_code != UNKNOWN)
9593 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9594 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9595 const0_rtx);
9596 if (second_code != UNKNOWN)
9597 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9598 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9599 const0_rtx);
9601 else
9603 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
9604 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9605 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9606 if (!scratch)
9607 scratch = gen_reg_rtx (HImode);
9608 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9610 /* In the unordered case, we have to check C2 for NaN's, which
9611 doesn't happen to work out to anything nice combination-wise.
9612 So do some bit twiddling on the value we've got in AH to come
9613 up with an appropriate set of condition codes. */
9615 intcmp_mode = CCNOmode;
9616 switch (code)
9618 case GT:
9619 case UNGT:
9620 if (code == GT || !TARGET_IEEE_FP)
9622 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9623 code = EQ;
9625 else
9627 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9628 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9629 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9630 intcmp_mode = CCmode;
9631 code = GEU;
9633 break;
9634 case LT:
9635 case UNLT:
9636 if (code == LT && TARGET_IEEE_FP)
9638 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9639 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9640 intcmp_mode = CCmode;
9641 code = EQ;
9643 else
9645 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9646 code = NE;
9648 break;
9649 case GE:
9650 case UNGE:
9651 if (code == GE || !TARGET_IEEE_FP)
9653 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9654 code = EQ;
9656 else
9658 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9659 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9660 GEN_INT (0x01)));
9661 code = NE;
9663 break;
9664 case LE:
9665 case UNLE:
9666 if (code == LE && TARGET_IEEE_FP)
9668 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9669 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9670 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9671 intcmp_mode = CCmode;
9672 code = LTU;
9674 else
9676 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9677 code = NE;
9679 break;
9680 case EQ:
9681 case UNEQ:
9682 if (code == EQ && TARGET_IEEE_FP)
9684 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9685 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9686 intcmp_mode = CCmode;
9687 code = EQ;
9689 else
9691 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9692 code = NE;
9693 break;
9695 break;
9696 case NE:
9697 case LTGT:
9698 if (code == NE && TARGET_IEEE_FP)
9700 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9701 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9702 GEN_INT (0x40)));
9703 code = NE;
9705 else
9707 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9708 code = EQ;
9710 break;
9712 case UNORDERED:
9713 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9714 code = NE;
9715 break;
9716 case ORDERED:
9717 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9718 code = EQ;
9719 break;
9721 default:
9722 gcc_unreachable ();
9726 /* Return the test that should be put into the flags user, i.e.
9727 the bcc, scc, or cmov instruction. */
9728 return gen_rtx_fmt_ee (code, VOIDmode,
9729 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9730 const0_rtx);
9734 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9736 rtx op0, op1, ret;
9737 op0 = ix86_compare_op0;
9738 op1 = ix86_compare_op1;
9740 if (second_test)
9741 *second_test = NULL_RTX;
9742 if (bypass_test)
9743 *bypass_test = NULL_RTX;
9745 if (ix86_compare_emitted)
9747 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
9748 ix86_compare_emitted = NULL_RTX;
9750 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9751 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9752 second_test, bypass_test);
9753 else
9754 ret = ix86_expand_int_compare (code, op0, op1);
9756 return ret;
9759 /* Return true if the CODE will result in nontrivial jump sequence. */
9760 bool
9761 ix86_fp_jump_nontrivial_p (enum rtx_code code)
9763 enum rtx_code bypass_code, first_code, second_code;
9764 if (!TARGET_CMOVE)
9765 return true;
9766 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9767 return bypass_code != UNKNOWN || second_code != UNKNOWN;
9770 void
9771 ix86_expand_branch (enum rtx_code code, rtx label)
9773 rtx tmp;
9775 switch (GET_MODE (ix86_compare_op0))
9777 case QImode:
9778 case HImode:
9779 case SImode:
9780 simple:
9781 tmp = ix86_expand_compare (code, NULL, NULL);
9782 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9783 gen_rtx_LABEL_REF (VOIDmode, label),
9784 pc_rtx);
9785 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9786 return;
9788 case SFmode:
9789 case DFmode:
9790 case XFmode:
9792 rtvec vec;
9793 int use_fcomi;
9794 enum rtx_code bypass_code, first_code, second_code;
9796 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9797 &ix86_compare_op1);
9799 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9801 /* Check whether we will use the natural sequence with one jump. If
9802 so, we can expand jump early. Otherwise delay expansion by
9803 creating compound insn to not confuse optimizers. */
9804 if (bypass_code == UNKNOWN && second_code == UNKNOWN
9805 && TARGET_CMOVE)
9807 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9808 gen_rtx_LABEL_REF (VOIDmode, label),
9809 pc_rtx, NULL_RTX, NULL_RTX);
9811 else
9813 tmp = gen_rtx_fmt_ee (code, VOIDmode,
9814 ix86_compare_op0, ix86_compare_op1);
9815 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9816 gen_rtx_LABEL_REF (VOIDmode, label),
9817 pc_rtx);
9818 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9820 use_fcomi = ix86_use_fcomi_compare (code);
9821 vec = rtvec_alloc (3 + !use_fcomi);
9822 RTVEC_ELT (vec, 0) = tmp;
9823 RTVEC_ELT (vec, 1)
9824 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9825 RTVEC_ELT (vec, 2)
9826 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9827 if (! use_fcomi)
9828 RTVEC_ELT (vec, 3)
9829 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9831 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9833 return;
9836 case DImode:
9837 if (TARGET_64BIT)
9838 goto simple;
9839 case TImode:
9840 /* Expand DImode branch into multiple compare+branch. */
9842 rtx lo[2], hi[2], label2;
9843 enum rtx_code code1, code2, code3;
9844 enum machine_mode submode;
9846 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9848 tmp = ix86_compare_op0;
9849 ix86_compare_op0 = ix86_compare_op1;
9850 ix86_compare_op1 = tmp;
9851 code = swap_condition (code);
9853 if (GET_MODE (ix86_compare_op0) == DImode)
9855 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9856 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9857 submode = SImode;
9859 else
9861 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
9862 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
9863 submode = DImode;
9866 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9867 avoid two branches. This costs one extra insn, so disable when
9868 optimizing for size. */
9870 if ((code == EQ || code == NE)
9871 && (!optimize_size
9872 || hi[1] == const0_rtx || lo[1] == const0_rtx))
9874 rtx xor0, xor1;
9876 xor1 = hi[0];
9877 if (hi[1] != const0_rtx)
9878 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
9879 NULL_RTX, 0, OPTAB_WIDEN);
9881 xor0 = lo[0];
9882 if (lo[1] != const0_rtx)
9883 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
9884 NULL_RTX, 0, OPTAB_WIDEN);
9886 tmp = expand_binop (submode, ior_optab, xor1, xor0,
9887 NULL_RTX, 0, OPTAB_WIDEN);
9889 ix86_compare_op0 = tmp;
9890 ix86_compare_op1 = const0_rtx;
9891 ix86_expand_branch (code, label);
9892 return;
9895 /* Otherwise, if we are doing less-than or greater-or-equal-than,
9896 op1 is a constant and the low word is zero, then we can just
9897 examine the high word. */
9899 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9900 switch (code)
9902 case LT: case LTU: case GE: case GEU:
9903 ix86_compare_op0 = hi[0];
9904 ix86_compare_op1 = hi[1];
9905 ix86_expand_branch (code, label);
9906 return;
9907 default:
9908 break;
9911 /* Otherwise, we need two or three jumps. */
9913 label2 = gen_label_rtx ();
9915 code1 = code;
9916 code2 = swap_condition (code);
9917 code3 = unsigned_condition (code);
9919 switch (code)
9921 case LT: case GT: case LTU: case GTU:
9922 break;
9924 case LE: code1 = LT; code2 = GT; break;
9925 case GE: code1 = GT; code2 = LT; break;
9926 case LEU: code1 = LTU; code2 = GTU; break;
9927 case GEU: code1 = GTU; code2 = LTU; break;
9929 case EQ: code1 = UNKNOWN; code2 = NE; break;
9930 case NE: code2 = UNKNOWN; break;
9932 default:
9933 gcc_unreachable ();
9937 * a < b =>
9938 * if (hi(a) < hi(b)) goto true;
9939 * if (hi(a) > hi(b)) goto false;
9940 * if (lo(a) < lo(b)) goto true;
9941 * false:
9944 ix86_compare_op0 = hi[0];
9945 ix86_compare_op1 = hi[1];
9947 if (code1 != UNKNOWN)
9948 ix86_expand_branch (code1, label);
9949 if (code2 != UNKNOWN)
9950 ix86_expand_branch (code2, label2);
9952 ix86_compare_op0 = lo[0];
9953 ix86_compare_op1 = lo[1];
9954 ix86_expand_branch (code3, label);
9956 if (code2 != UNKNOWN)
9957 emit_label (label2);
9958 return;
9961 default:
9962 gcc_unreachable ();
9966 /* Split branch based on floating point condition. */
9967 void
9968 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9969 rtx target1, rtx target2, rtx tmp, rtx pushed)
9971 rtx second, bypass;
9972 rtx label = NULL_RTX;
9973 rtx condition;
9974 int bypass_probability = -1, second_probability = -1, probability = -1;
9975 rtx i;
9977 if (target2 != pc_rtx)
9979 rtx tmp = target2;
9980 code = reverse_condition_maybe_unordered (code);
9981 target2 = target1;
9982 target1 = tmp;
9985 condition = ix86_expand_fp_compare (code, op1, op2,
9986 tmp, &second, &bypass);
9988 /* Remove pushed operand from stack. */
9989 if (pushed)
9990 ix86_free_from_memory (GET_MODE (pushed));
9992 if (split_branch_probability >= 0)
9994 /* Distribute the probabilities across the jumps.
9995 Assume the BYPASS and SECOND to be always test
9996 for UNORDERED. */
9997 probability = split_branch_probability;
9999 /* Value of 1 is low enough to make no need for probability
10000 to be updated. Later we may run some experiments and see
10001 if unordered values are more frequent in practice. */
10002 if (bypass)
10003 bypass_probability = 1;
10004 if (second)
10005 second_probability = 1;
10007 if (bypass != NULL_RTX)
10009 label = gen_label_rtx ();
10010 i = emit_jump_insn (gen_rtx_SET
10011 (VOIDmode, pc_rtx,
10012 gen_rtx_IF_THEN_ELSE (VOIDmode,
10013 bypass,
10014 gen_rtx_LABEL_REF (VOIDmode,
10015 label),
10016 pc_rtx)));
10017 if (bypass_probability >= 0)
10018 REG_NOTES (i)
10019 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10020 GEN_INT (bypass_probability),
10021 REG_NOTES (i));
10023 i = emit_jump_insn (gen_rtx_SET
10024 (VOIDmode, pc_rtx,
10025 gen_rtx_IF_THEN_ELSE (VOIDmode,
10026 condition, target1, target2)));
10027 if (probability >= 0)
10028 REG_NOTES (i)
10029 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10030 GEN_INT (probability),
10031 REG_NOTES (i));
10032 if (second != NULL_RTX)
10034 i = emit_jump_insn (gen_rtx_SET
10035 (VOIDmode, pc_rtx,
10036 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10037 target2)));
10038 if (second_probability >= 0)
10039 REG_NOTES (i)
10040 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10041 GEN_INT (second_probability),
10042 REG_NOTES (i));
10044 if (label != NULL_RTX)
10045 emit_label (label);
10049 ix86_expand_setcc (enum rtx_code code, rtx dest)
10051 rtx ret, tmp, tmpreg, equiv;
10052 rtx second_test, bypass_test;
10054 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10055 return 0; /* FAIL */
10057 gcc_assert (GET_MODE (dest) == QImode);
10059 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10060 PUT_MODE (ret, QImode);
10062 tmp = dest;
10063 tmpreg = dest;
10065 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10066 if (bypass_test || second_test)
10068 rtx test = second_test;
10069 int bypass = 0;
10070 rtx tmp2 = gen_reg_rtx (QImode);
10071 if (bypass_test)
10073 gcc_assert (!second_test);
10074 test = bypass_test;
10075 bypass = 1;
10076 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10078 PUT_MODE (test, QImode);
10079 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10081 if (bypass)
10082 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10083 else
10084 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10087 /* Attach a REG_EQUAL note describing the comparison result. */
10088 if (ix86_compare_op0 && ix86_compare_op1)
10090 equiv = simplify_gen_relational (code, QImode,
10091 GET_MODE (ix86_compare_op0),
10092 ix86_compare_op0, ix86_compare_op1);
10093 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10096 return 1; /* DONE */
10099 /* Expand comparison setting or clearing carry flag. Return true when
10100 successful and set pop for the operation. */
10101 static bool
10102 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10104 enum machine_mode mode =
10105 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10107 /* Do not handle DImode compares that go trought special path. Also we can't
10108 deal with FP compares yet. This is possible to add. */
10109 if (mode == (TARGET_64BIT ? TImode : DImode))
10110 return false;
10111 if (FLOAT_MODE_P (mode))
10113 rtx second_test = NULL, bypass_test = NULL;
10114 rtx compare_op, compare_seq;
10116 /* Shortcut: following common codes never translate into carry flag compares. */
10117 if (code == EQ || code == NE || code == UNEQ || code == LTGT
10118 || code == ORDERED || code == UNORDERED)
10119 return false;
10121 /* These comparisons require zero flag; swap operands so they won't. */
10122 if ((code == GT || code == UNLE || code == LE || code == UNGT)
10123 && !TARGET_IEEE_FP)
10125 rtx tmp = op0;
10126 op0 = op1;
10127 op1 = tmp;
10128 code = swap_condition (code);
10131 /* Try to expand the comparison and verify that we end up with carry flag
10132 based comparison. This is fails to be true only when we decide to expand
10133 comparison using arithmetic that is not too common scenario. */
10134 start_sequence ();
10135 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10136 &second_test, &bypass_test);
10137 compare_seq = get_insns ();
10138 end_sequence ();
10140 if (second_test || bypass_test)
10141 return false;
10142 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10143 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10144 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10145 else
10146 code = GET_CODE (compare_op);
10147 if (code != LTU && code != GEU)
10148 return false;
10149 emit_insn (compare_seq);
10150 *pop = compare_op;
10151 return true;
10153 if (!INTEGRAL_MODE_P (mode))
10154 return false;
10155 switch (code)
10157 case LTU:
10158 case GEU:
10159 break;
10161 /* Convert a==0 into (unsigned)a<1. */
10162 case EQ:
10163 case NE:
10164 if (op1 != const0_rtx)
10165 return false;
10166 op1 = const1_rtx;
10167 code = (code == EQ ? LTU : GEU);
10168 break;
10170 /* Convert a>b into b<a or a>=b-1. */
10171 case GTU:
10172 case LEU:
10173 if (GET_CODE (op1) == CONST_INT)
10175 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10176 /* Bail out on overflow. We still can swap operands but that
10177 would force loading of the constant into register. */
10178 if (op1 == const0_rtx
10179 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10180 return false;
10181 code = (code == GTU ? GEU : LTU);
10183 else
10185 rtx tmp = op1;
10186 op1 = op0;
10187 op0 = tmp;
10188 code = (code == GTU ? LTU : GEU);
10190 break;
10192 /* Convert a>=0 into (unsigned)a<0x80000000. */
10193 case LT:
10194 case GE:
10195 if (mode == DImode || op1 != const0_rtx)
10196 return false;
10197 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10198 code = (code == LT ? GEU : LTU);
10199 break;
10200 case LE:
10201 case GT:
10202 if (mode == DImode || op1 != constm1_rtx)
10203 return false;
10204 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10205 code = (code == LE ? GEU : LTU);
10206 break;
10208 default:
10209 return false;
10211 /* Swapping operands may cause constant to appear as first operand. */
10212 if (!nonimmediate_operand (op0, VOIDmode))
10214 if (no_new_pseudos)
10215 return false;
10216 op0 = force_reg (mode, op0);
10218 ix86_compare_op0 = op0;
10219 ix86_compare_op1 = op1;
10220 *pop = ix86_expand_compare (code, NULL, NULL);
10221 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10222 return true;
10226 ix86_expand_int_movcc (rtx operands[])
10228 enum rtx_code code = GET_CODE (operands[1]), compare_code;
10229 rtx compare_seq, compare_op;
10230 rtx second_test, bypass_test;
10231 enum machine_mode mode = GET_MODE (operands[0]);
10232 bool sign_bit_compare_p = false;;
10234 start_sequence ();
10235 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10236 compare_seq = get_insns ();
10237 end_sequence ();
10239 compare_code = GET_CODE (compare_op);
10241 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10242 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10243 sign_bit_compare_p = true;
10245 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10246 HImode insns, we'd be swallowed in word prefix ops. */
10248 if ((mode != HImode || TARGET_FAST_PREFIX)
10249 && (mode != (TARGET_64BIT ? TImode : DImode))
10250 && GET_CODE (operands[2]) == CONST_INT
10251 && GET_CODE (operands[3]) == CONST_INT)
10253 rtx out = operands[0];
10254 HOST_WIDE_INT ct = INTVAL (operands[2]);
10255 HOST_WIDE_INT cf = INTVAL (operands[3]);
10256 HOST_WIDE_INT diff;
10258 diff = ct - cf;
10259 /* Sign bit compares are better done using shifts than we do by using
10260 sbb. */
10261 if (sign_bit_compare_p
10262 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10263 ix86_compare_op1, &compare_op))
10265 /* Detect overlap between destination and compare sources. */
10266 rtx tmp = out;
10268 if (!sign_bit_compare_p)
10270 bool fpcmp = false;
10272 compare_code = GET_CODE (compare_op);
10274 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10275 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10277 fpcmp = true;
10278 compare_code = ix86_fp_compare_code_to_integer (compare_code);
10281 /* To simplify rest of code, restrict to the GEU case. */
10282 if (compare_code == LTU)
10284 HOST_WIDE_INT tmp = ct;
10285 ct = cf;
10286 cf = tmp;
10287 compare_code = reverse_condition (compare_code);
10288 code = reverse_condition (code);
10290 else
10292 if (fpcmp)
10293 PUT_CODE (compare_op,
10294 reverse_condition_maybe_unordered
10295 (GET_CODE (compare_op)));
10296 else
10297 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10299 diff = ct - cf;
10301 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10302 || reg_overlap_mentioned_p (out, ix86_compare_op1))
10303 tmp = gen_reg_rtx (mode);
10305 if (mode == DImode)
10306 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10307 else
10308 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10310 else
10312 if (code == GT || code == GE)
10313 code = reverse_condition (code);
10314 else
10316 HOST_WIDE_INT tmp = ct;
10317 ct = cf;
10318 cf = tmp;
10319 diff = ct - cf;
10321 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10322 ix86_compare_op1, VOIDmode, 0, -1);
10325 if (diff == 1)
10328 * cmpl op0,op1
10329 * sbbl dest,dest
10330 * [addl dest, ct]
10332 * Size 5 - 8.
10334 if (ct)
10335 tmp = expand_simple_binop (mode, PLUS,
10336 tmp, GEN_INT (ct),
10337 copy_rtx (tmp), 1, OPTAB_DIRECT);
10339 else if (cf == -1)
10342 * cmpl op0,op1
10343 * sbbl dest,dest
10344 * orl $ct, dest
10346 * Size 8.
10348 tmp = expand_simple_binop (mode, IOR,
10349 tmp, GEN_INT (ct),
10350 copy_rtx (tmp), 1, OPTAB_DIRECT);
10352 else if (diff == -1 && ct)
10355 * cmpl op0,op1
10356 * sbbl dest,dest
10357 * notl dest
10358 * [addl dest, cf]
10360 * Size 8 - 11.
10362 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10363 if (cf)
10364 tmp = expand_simple_binop (mode, PLUS,
10365 copy_rtx (tmp), GEN_INT (cf),
10366 copy_rtx (tmp), 1, OPTAB_DIRECT);
10368 else
10371 * cmpl op0,op1
10372 * sbbl dest,dest
10373 * [notl dest]
10374 * andl cf - ct, dest
10375 * [addl dest, ct]
10377 * Size 8 - 11.
10380 if (cf == 0)
10382 cf = ct;
10383 ct = 0;
10384 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10387 tmp = expand_simple_binop (mode, AND,
10388 copy_rtx (tmp),
10389 gen_int_mode (cf - ct, mode),
10390 copy_rtx (tmp), 1, OPTAB_DIRECT);
10391 if (ct)
10392 tmp = expand_simple_binop (mode, PLUS,
10393 copy_rtx (tmp), GEN_INT (ct),
10394 copy_rtx (tmp), 1, OPTAB_DIRECT);
10397 if (!rtx_equal_p (tmp, out))
10398 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10400 return 1; /* DONE */
10403 if (diff < 0)
10405 HOST_WIDE_INT tmp;
10406 tmp = ct, ct = cf, cf = tmp;
10407 diff = -diff;
10408 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10410 /* We may be reversing unordered compare to normal compare, that
10411 is not valid in general (we may convert non-trapping condition
10412 to trapping one), however on i386 we currently emit all
10413 comparisons unordered. */
10414 compare_code = reverse_condition_maybe_unordered (compare_code);
10415 code = reverse_condition_maybe_unordered (code);
10417 else
10419 compare_code = reverse_condition (compare_code);
10420 code = reverse_condition (code);
10424 compare_code = UNKNOWN;
10425 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10426 && GET_CODE (ix86_compare_op1) == CONST_INT)
10428 if (ix86_compare_op1 == const0_rtx
10429 && (code == LT || code == GE))
10430 compare_code = code;
10431 else if (ix86_compare_op1 == constm1_rtx)
10433 if (code == LE)
10434 compare_code = LT;
10435 else if (code == GT)
10436 compare_code = GE;
10440 /* Optimize dest = (op0 < 0) ? -1 : cf. */
10441 if (compare_code != UNKNOWN
10442 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10443 && (cf == -1 || ct == -1))
10445 /* If lea code below could be used, only optimize
10446 if it results in a 2 insn sequence. */
10448 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10449 || diff == 3 || diff == 5 || diff == 9)
10450 || (compare_code == LT && ct == -1)
10451 || (compare_code == GE && cf == -1))
10454 * notl op1 (if necessary)
10455 * sarl $31, op1
10456 * orl cf, op1
10458 if (ct != -1)
10460 cf = ct;
10461 ct = -1;
10462 code = reverse_condition (code);
10465 out = emit_store_flag (out, code, ix86_compare_op0,
10466 ix86_compare_op1, VOIDmode, 0, -1);
10468 out = expand_simple_binop (mode, IOR,
10469 out, GEN_INT (cf),
10470 out, 1, OPTAB_DIRECT);
10471 if (out != operands[0])
10472 emit_move_insn (operands[0], out);
10474 return 1; /* DONE */
10479 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10480 || diff == 3 || diff == 5 || diff == 9)
10481 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10482 && (mode != DImode
10483 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
10486 * xorl dest,dest
10487 * cmpl op1,op2
10488 * setcc dest
10489 * lea cf(dest*(ct-cf)),dest
10491 * Size 14.
10493 * This also catches the degenerate setcc-only case.
10496 rtx tmp;
10497 int nops;
10499 out = emit_store_flag (out, code, ix86_compare_op0,
10500 ix86_compare_op1, VOIDmode, 0, 1);
10502 nops = 0;
10503 /* On x86_64 the lea instruction operates on Pmode, so we need
10504 to get arithmetics done in proper mode to match. */
10505 if (diff == 1)
10506 tmp = copy_rtx (out);
10507 else
10509 rtx out1;
10510 out1 = copy_rtx (out);
10511 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10512 nops++;
10513 if (diff & 1)
10515 tmp = gen_rtx_PLUS (mode, tmp, out1);
10516 nops++;
10519 if (cf != 0)
10521 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10522 nops++;
10524 if (!rtx_equal_p (tmp, out))
10526 if (nops == 1)
10527 out = force_operand (tmp, copy_rtx (out));
10528 else
10529 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10531 if (!rtx_equal_p (out, operands[0]))
10532 emit_move_insn (operands[0], copy_rtx (out));
10534 return 1; /* DONE */
10538 * General case: Jumpful:
10539 * xorl dest,dest cmpl op1, op2
10540 * cmpl op1, op2 movl ct, dest
10541 * setcc dest jcc 1f
10542 * decl dest movl cf, dest
10543 * andl (cf-ct),dest 1:
10544 * addl ct,dest
10546 * Size 20. Size 14.
10548 * This is reasonably steep, but branch mispredict costs are
10549 * high on modern cpus, so consider failing only if optimizing
10550 * for space.
10553 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10554 && BRANCH_COST >= 2)
10556 if (cf == 0)
10558 cf = ct;
10559 ct = 0;
10560 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10561 /* We may be reversing unordered compare to normal compare,
10562 that is not valid in general (we may convert non-trapping
10563 condition to trapping one), however on i386 we currently
10564 emit all comparisons unordered. */
10565 code = reverse_condition_maybe_unordered (code);
10566 else
10568 code = reverse_condition (code);
10569 if (compare_code != UNKNOWN)
10570 compare_code = reverse_condition (compare_code);
10574 if (compare_code != UNKNOWN)
10576 /* notl op1 (if needed)
10577 sarl $31, op1
10578 andl (cf-ct), op1
10579 addl ct, op1
10581 For x < 0 (resp. x <= -1) there will be no notl,
10582 so if possible swap the constants to get rid of the
10583 complement.
10584 True/false will be -1/0 while code below (store flag
10585 followed by decrement) is 0/-1, so the constants need
10586 to be exchanged once more. */
10588 if (compare_code == GE || !cf)
10590 code = reverse_condition (code);
10591 compare_code = LT;
10593 else
10595 HOST_WIDE_INT tmp = cf;
10596 cf = ct;
10597 ct = tmp;
10600 out = emit_store_flag (out, code, ix86_compare_op0,
10601 ix86_compare_op1, VOIDmode, 0, -1);
10603 else
10605 out = emit_store_flag (out, code, ix86_compare_op0,
10606 ix86_compare_op1, VOIDmode, 0, 1);
10608 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10609 copy_rtx (out), 1, OPTAB_DIRECT);
10612 out = expand_simple_binop (mode, AND, copy_rtx (out),
10613 gen_int_mode (cf - ct, mode),
10614 copy_rtx (out), 1, OPTAB_DIRECT);
10615 if (ct)
10616 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10617 copy_rtx (out), 1, OPTAB_DIRECT);
10618 if (!rtx_equal_p (out, operands[0]))
10619 emit_move_insn (operands[0], copy_rtx (out));
10621 return 1; /* DONE */
10625 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10627 /* Try a few things more with specific constants and a variable. */
10629 optab op;
10630 rtx var, orig_out, out, tmp;
10632 if (BRANCH_COST <= 2)
10633 return 0; /* FAIL */
10635 /* If one of the two operands is an interesting constant, load a
10636 constant with the above and mask it in with a logical operation. */
10638 if (GET_CODE (operands[2]) == CONST_INT)
10640 var = operands[3];
10641 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10642 operands[3] = constm1_rtx, op = and_optab;
10643 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10644 operands[3] = const0_rtx, op = ior_optab;
10645 else
10646 return 0; /* FAIL */
10648 else if (GET_CODE (operands[3]) == CONST_INT)
10650 var = operands[2];
10651 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10652 operands[2] = constm1_rtx, op = and_optab;
10653 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10654 operands[2] = const0_rtx, op = ior_optab;
10655 else
10656 return 0; /* FAIL */
10658 else
10659 return 0; /* FAIL */
10661 orig_out = operands[0];
10662 tmp = gen_reg_rtx (mode);
10663 operands[0] = tmp;
10665 /* Recurse to get the constant loaded. */
10666 if (ix86_expand_int_movcc (operands) == 0)
10667 return 0; /* FAIL */
10669 /* Mask in the interesting variable. */
10670 out = expand_binop (mode, op, var, tmp, orig_out, 0,
10671 OPTAB_WIDEN);
10672 if (!rtx_equal_p (out, orig_out))
10673 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10675 return 1; /* DONE */
10679 * For comparison with above,
10681 * movl cf,dest
10682 * movl ct,tmp
10683 * cmpl op1,op2
10684 * cmovcc tmp,dest
10686 * Size 15.
10689 if (! nonimmediate_operand (operands[2], mode))
10690 operands[2] = force_reg (mode, operands[2]);
10691 if (! nonimmediate_operand (operands[3], mode))
10692 operands[3] = force_reg (mode, operands[3]);
10694 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10696 rtx tmp = gen_reg_rtx (mode);
10697 emit_move_insn (tmp, operands[3]);
10698 operands[3] = tmp;
10700 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10702 rtx tmp = gen_reg_rtx (mode);
10703 emit_move_insn (tmp, operands[2]);
10704 operands[2] = tmp;
10707 if (! register_operand (operands[2], VOIDmode)
10708 && (mode == QImode
10709 || ! register_operand (operands[3], VOIDmode)))
10710 operands[2] = force_reg (mode, operands[2]);
10712 if (mode == QImode
10713 && ! register_operand (operands[3], VOIDmode))
10714 operands[3] = force_reg (mode, operands[3]);
10716 emit_insn (compare_seq);
10717 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10718 gen_rtx_IF_THEN_ELSE (mode,
10719 compare_op, operands[2],
10720 operands[3])));
10721 if (bypass_test)
10722 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10723 gen_rtx_IF_THEN_ELSE (mode,
10724 bypass_test,
10725 copy_rtx (operands[3]),
10726 copy_rtx (operands[0]))));
10727 if (second_test)
10728 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10729 gen_rtx_IF_THEN_ELSE (mode,
10730 second_test,
10731 copy_rtx (operands[2]),
10732 copy_rtx (operands[0]))));
10734 return 1; /* DONE */
10737 /* Swap, force into registers, or otherwise massage the two operands
10738 to an sse comparison with a mask result. Thus we differ a bit from
10739 ix86_prepare_fp_compare_args which expects to produce a flags result.
10741 The DEST operand exists to help determine whether to commute commutative
10742 operators. The POP0/POP1 operands are updated in place. The new
10743 comparison code is returned, or UNKNOWN if not implementable. */
10745 static enum rtx_code
10746 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
10747 rtx *pop0, rtx *pop1)
10749 rtx tmp;
10751 switch (code)
10753 case LTGT:
10754 case UNEQ:
10755 /* We have no LTGT as an operator. We could implement it with
10756 NE & ORDERED, but this requires an extra temporary. It's
10757 not clear that it's worth it. */
10758 return UNKNOWN;
10760 case LT:
10761 case LE:
10762 case UNGT:
10763 case UNGE:
10764 /* These are supported directly. */
10765 break;
10767 case EQ:
10768 case NE:
10769 case UNORDERED:
10770 case ORDERED:
10771 /* For commutative operators, try to canonicalize the destination
10772 operand to be first in the comparison - this helps reload to
10773 avoid extra moves. */
10774 if (!dest || !rtx_equal_p (dest, *pop1))
10775 break;
10776 /* FALLTHRU */
10778 case GE:
10779 case GT:
10780 case UNLE:
10781 case UNLT:
10782 /* These are not supported directly. Swap the comparison operands
10783 to transform into something that is supported. */
10784 tmp = *pop0;
10785 *pop0 = *pop1;
10786 *pop1 = tmp;
10787 code = swap_condition (code);
10788 break;
10790 default:
10791 gcc_unreachable ();
10794 return code;
10797 /* Detect conditional moves that exactly match min/max operational
10798 semantics. Note that this is IEEE safe, as long as we don't
10799 interchange the operands.
10801 Returns FALSE if this conditional move doesn't match a MIN/MAX,
10802 and TRUE if the operation is successful and instructions are emitted. */
10804 static bool
10805 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
10806 rtx cmp_op1, rtx if_true, rtx if_false)
10808 enum machine_mode mode;
10809 bool is_min;
10810 rtx tmp;
10812 if (code == LT)
10814 else if (code == UNGE)
10816 tmp = if_true;
10817 if_true = if_false;
10818 if_false = tmp;
10820 else
10821 return false;
10823 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
10824 is_min = true;
10825 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
10826 is_min = false;
10827 else
10828 return false;
10830 mode = GET_MODE (dest);
10832 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
10833 but MODE may be a vector mode and thus not appropriate. */
10834 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
10836 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
10837 rtvec v;
10839 if_true = force_reg (mode, if_true);
10840 v = gen_rtvec (2, if_true, if_false);
10841 tmp = gen_rtx_UNSPEC (mode, v, u);
10843 else
10845 code = is_min ? SMIN : SMAX;
10846 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
10849 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
10850 return true;
10853 /* Expand an sse vector comparison. Return the register with the result. */
10855 static rtx
10856 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
10857 rtx op_true, rtx op_false)
10859 enum machine_mode mode = GET_MODE (dest);
10860 rtx x;
10862 cmp_op0 = force_reg (mode, cmp_op0);
10863 if (!nonimmediate_operand (cmp_op1, mode))
10864 cmp_op1 = force_reg (mode, cmp_op1);
10866 if (optimize
10867 || reg_overlap_mentioned_p (dest, op_true)
10868 || reg_overlap_mentioned_p (dest, op_false))
10869 dest = gen_reg_rtx (mode);
10871 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
10872 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10874 return dest;
10877 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
10878 operations. This is used for both scalar and vector conditional moves. */
10880 static void
10881 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
10883 enum machine_mode mode = GET_MODE (dest);
10884 rtx t2, t3, x;
10886 if (op_false == CONST0_RTX (mode))
10888 op_true = force_reg (mode, op_true);
10889 x = gen_rtx_AND (mode, cmp, op_true);
10890 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10892 else if (op_true == CONST0_RTX (mode))
10894 op_false = force_reg (mode, op_false);
10895 x = gen_rtx_NOT (mode, cmp);
10896 x = gen_rtx_AND (mode, x, op_false);
10897 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10899 else
10901 op_true = force_reg (mode, op_true);
10902 op_false = force_reg (mode, op_false);
10904 t2 = gen_reg_rtx (mode);
10905 if (optimize)
10906 t3 = gen_reg_rtx (mode);
10907 else
10908 t3 = dest;
10910 x = gen_rtx_AND (mode, op_true, cmp);
10911 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
10913 x = gen_rtx_NOT (mode, cmp);
10914 x = gen_rtx_AND (mode, x, op_false);
10915 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
10917 x = gen_rtx_IOR (mode, t3, t2);
10918 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10922 /* Expand a floating-point conditional move. Return true if successful. */
10925 ix86_expand_fp_movcc (rtx operands[])
10927 enum machine_mode mode = GET_MODE (operands[0]);
10928 enum rtx_code code = GET_CODE (operands[1]);
10929 rtx tmp, compare_op, second_test, bypass_test;
10931 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
10933 enum machine_mode cmode;
10935 /* Since we've no cmove for sse registers, don't force bad register
10936 allocation just to gain access to it. Deny movcc when the
10937 comparison mode doesn't match the move mode. */
10938 cmode = GET_MODE (ix86_compare_op0);
10939 if (cmode == VOIDmode)
10940 cmode = GET_MODE (ix86_compare_op1);
10941 if (cmode != mode)
10942 return 0;
10944 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
10945 &ix86_compare_op0,
10946 &ix86_compare_op1);
10947 if (code == UNKNOWN)
10948 return 0;
10950 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
10951 ix86_compare_op1, operands[2],
10952 operands[3]))
10953 return 1;
10955 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
10956 ix86_compare_op1, operands[2], operands[3]);
10957 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
10958 return 1;
10961 /* The floating point conditional move instructions don't directly
10962 support conditions resulting from a signed integer comparison. */
10964 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10966 /* The floating point conditional move instructions don't directly
10967 support signed integer comparisons. */
10969 if (!fcmov_comparison_operator (compare_op, VOIDmode))
10971 gcc_assert (!second_test && !bypass_test);
10972 tmp = gen_reg_rtx (QImode);
10973 ix86_expand_setcc (code, tmp);
10974 code = NE;
10975 ix86_compare_op0 = tmp;
10976 ix86_compare_op1 = const0_rtx;
10977 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10979 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10981 tmp = gen_reg_rtx (mode);
10982 emit_move_insn (tmp, operands[3]);
10983 operands[3] = tmp;
10985 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10987 tmp = gen_reg_rtx (mode);
10988 emit_move_insn (tmp, operands[2]);
10989 operands[2] = tmp;
10992 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10993 gen_rtx_IF_THEN_ELSE (mode, compare_op,
10994 operands[2], operands[3])));
10995 if (bypass_test)
10996 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10997 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
10998 operands[3], operands[0])));
10999 if (second_test)
11000 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11001 gen_rtx_IF_THEN_ELSE (mode, second_test,
11002 operands[2], operands[0])));
11004 return 1;
11007 /* Expand a floating-point vector conditional move; a vcond operation
11008 rather than a movcc operation. */
11010 bool
11011 ix86_expand_fp_vcond (rtx operands[])
11013 enum rtx_code code = GET_CODE (operands[3]);
11014 rtx cmp;
11016 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11017 &operands[4], &operands[5]);
11018 if (code == UNKNOWN)
11019 return false;
11021 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11022 operands[5], operands[1], operands[2]))
11023 return true;
11025 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11026 operands[1], operands[2]);
11027 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11028 return true;
11031 /* Expand a signed integral vector conditional move. */
11033 bool
11034 ix86_expand_int_vcond (rtx operands[])
11036 enum machine_mode mode = GET_MODE (operands[0]);
11037 enum rtx_code code = GET_CODE (operands[3]);
11038 bool negate = false;
11039 rtx x, cop0, cop1;
11041 cop0 = operands[4];
11042 cop1 = operands[5];
11044 /* Canonicalize the comparison to EQ, GT, GTU. */
11045 switch (code)
11047 case EQ:
11048 case GT:
11049 case GTU:
11050 break;
11052 case NE:
11053 case LE:
11054 case LEU:
11055 code = reverse_condition (code);
11056 negate = true;
11057 break;
11059 case GE:
11060 case GEU:
11061 code = reverse_condition (code);
11062 negate = true;
11063 /* FALLTHRU */
11065 case LT:
11066 case LTU:
11067 code = swap_condition (code);
11068 x = cop0, cop0 = cop1, cop1 = x;
11069 break;
11071 default:
11072 gcc_unreachable ();
11075 /* Unsigned parallel compare is not supported by the hardware. Play some
11076 tricks to turn this into a signed comparison against 0. */
11077 if (code == GTU)
11079 cop0 = force_reg (mode, cop0);
11081 switch (mode)
11083 case V4SImode:
11085 rtx t1, t2, mask;
11087 /* Perform a parallel modulo subtraction. */
11088 t1 = gen_reg_rtx (mode);
11089 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11091 /* Extract the original sign bit of op0. */
11092 mask = GEN_INT (-0x80000000);
11093 mask = gen_rtx_CONST_VECTOR (mode,
11094 gen_rtvec (4, mask, mask, mask, mask));
11095 mask = force_reg (mode, mask);
11096 t2 = gen_reg_rtx (mode);
11097 emit_insn (gen_andv4si3 (t2, cop0, mask));
11099 /* XOR it back into the result of the subtraction. This results
11100 in the sign bit set iff we saw unsigned underflow. */
11101 x = gen_reg_rtx (mode);
11102 emit_insn (gen_xorv4si3 (x, t1, t2));
11104 code = GT;
11106 break;
11108 case V16QImode:
11109 case V8HImode:
11110 /* Perform a parallel unsigned saturating subtraction. */
11111 x = gen_reg_rtx (mode);
11112 emit_insn (gen_rtx_SET (VOIDmode, x,
11113 gen_rtx_US_MINUS (mode, cop0, cop1)));
11115 code = EQ;
11116 negate = !negate;
11117 break;
11119 default:
11120 gcc_unreachable ();
11123 cop0 = x;
11124 cop1 = CONST0_RTX (mode);
11127 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11128 operands[1+negate], operands[2-negate]);
11130 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11131 operands[2-negate]);
11132 return true;
11135 /* Expand conditional increment or decrement using adb/sbb instructions.
11136 The default case using setcc followed by the conditional move can be
11137 done by generic code. */
11139 ix86_expand_int_addcc (rtx operands[])
11141 enum rtx_code code = GET_CODE (operands[1]);
11142 rtx compare_op;
11143 rtx val = const0_rtx;
11144 bool fpcmp = false;
11145 enum machine_mode mode = GET_MODE (operands[0]);
11147 if (operands[3] != const1_rtx
11148 && operands[3] != constm1_rtx)
11149 return 0;
11150 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11151 ix86_compare_op1, &compare_op))
11152 return 0;
11153 code = GET_CODE (compare_op);
11155 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11156 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11158 fpcmp = true;
11159 code = ix86_fp_compare_code_to_integer (code);
11162 if (code != LTU)
11164 val = constm1_rtx;
11165 if (fpcmp)
11166 PUT_CODE (compare_op,
11167 reverse_condition_maybe_unordered
11168 (GET_CODE (compare_op)));
11169 else
11170 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11172 PUT_MODE (compare_op, mode);
11174 /* Construct either adc or sbb insn. */
11175 if ((code == LTU) == (operands[3] == constm1_rtx))
11177 switch (GET_MODE (operands[0]))
11179 case QImode:
11180 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11181 break;
11182 case HImode:
11183 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11184 break;
11185 case SImode:
11186 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11187 break;
11188 case DImode:
11189 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11190 break;
11191 default:
11192 gcc_unreachable ();
11195 else
11197 switch (GET_MODE (operands[0]))
11199 case QImode:
11200 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11201 break;
11202 case HImode:
11203 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11204 break;
11205 case SImode:
11206 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11207 break;
11208 case DImode:
11209 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11210 break;
11211 default:
11212 gcc_unreachable ();
11215 return 1; /* DONE */
11219 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
11220 works for floating pointer parameters and nonoffsetable memories.
11221 For pushes, it returns just stack offsets; the values will be saved
11222 in the right order. Maximally three parts are generated. */
11224 static int
11225 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11227 int size;
11229 if (!TARGET_64BIT)
11230 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11231 else
11232 size = (GET_MODE_SIZE (mode) + 4) / 8;
11234 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11235 gcc_assert (size >= 2 && size <= 3);
11237 /* Optimize constant pool reference to immediates. This is used by fp
11238 moves, that force all constants to memory to allow combining. */
11239 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11241 rtx tmp = maybe_get_pool_constant (operand);
11242 if (tmp)
11243 operand = tmp;
11246 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11248 /* The only non-offsetable memories we handle are pushes. */
11249 int ok = push_operand (operand, VOIDmode);
11251 gcc_assert (ok);
11253 operand = copy_rtx (operand);
11254 PUT_MODE (operand, Pmode);
11255 parts[0] = parts[1] = parts[2] = operand;
11256 return size;
11259 if (GET_CODE (operand) == CONST_VECTOR)
11261 enum machine_mode imode = int_mode_for_mode (mode);
11262 /* Caution: if we looked through a constant pool memory above,
11263 the operand may actually have a different mode now. That's
11264 ok, since we want to pun this all the way back to an integer. */
11265 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11266 gcc_assert (operand != NULL);
11267 mode = imode;
11270 if (!TARGET_64BIT)
11272 if (mode == DImode)
11273 split_di (&operand, 1, &parts[0], &parts[1]);
11274 else
11276 if (REG_P (operand))
11278 gcc_assert (reload_completed);
11279 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11280 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11281 if (size == 3)
11282 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11284 else if (offsettable_memref_p (operand))
11286 operand = adjust_address (operand, SImode, 0);
11287 parts[0] = operand;
11288 parts[1] = adjust_address (operand, SImode, 4);
11289 if (size == 3)
11290 parts[2] = adjust_address (operand, SImode, 8);
11292 else if (GET_CODE (operand) == CONST_DOUBLE)
11294 REAL_VALUE_TYPE r;
11295 long l[4];
11297 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11298 switch (mode)
11300 case XFmode:
11301 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11302 parts[2] = gen_int_mode (l[2], SImode);
11303 break;
11304 case DFmode:
11305 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11306 break;
11307 default:
11308 gcc_unreachable ();
11310 parts[1] = gen_int_mode (l[1], SImode);
11311 parts[0] = gen_int_mode (l[0], SImode);
11313 else
11314 gcc_unreachable ();
11317 else
11319 if (mode == TImode)
11320 split_ti (&operand, 1, &parts[0], &parts[1]);
11321 if (mode == XFmode || mode == TFmode)
11323 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11324 if (REG_P (operand))
11326 gcc_assert (reload_completed);
11327 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11328 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11330 else if (offsettable_memref_p (operand))
11332 operand = adjust_address (operand, DImode, 0);
11333 parts[0] = operand;
11334 parts[1] = adjust_address (operand, upper_mode, 8);
11336 else if (GET_CODE (operand) == CONST_DOUBLE)
11338 REAL_VALUE_TYPE r;
11339 long l[4];
11341 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11342 real_to_target (l, &r, mode);
11344 /* Do not use shift by 32 to avoid warning on 32bit systems. */
11345 if (HOST_BITS_PER_WIDE_INT >= 64)
11346 parts[0]
11347 = gen_int_mode
11348 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11349 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11350 DImode);
11351 else
11352 parts[0] = immed_double_const (l[0], l[1], DImode);
11354 if (upper_mode == SImode)
11355 parts[1] = gen_int_mode (l[2], SImode);
11356 else if (HOST_BITS_PER_WIDE_INT >= 64)
11357 parts[1]
11358 = gen_int_mode
11359 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11360 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11361 DImode);
11362 else
11363 parts[1] = immed_double_const (l[2], l[3], DImode);
11365 else
11366 gcc_unreachable ();
11370 return size;
11373 /* Emit insns to perform a move or push of DI, DF, and XF values.
11374 Return false when normal moves are needed; true when all required
11375 insns have been emitted. Operands 2-4 contain the input values
11376 int the correct order; operands 5-7 contain the output values. */
11378 void
11379 ix86_split_long_move (rtx operands[])
11381 rtx part[2][3];
11382 int nparts;
11383 int push = 0;
11384 int collisions = 0;
11385 enum machine_mode mode = GET_MODE (operands[0]);
11387 /* The DFmode expanders may ask us to move double.
11388 For 64bit target this is single move. By hiding the fact
11389 here we simplify i386.md splitters. */
11390 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11392 /* Optimize constant pool reference to immediates. This is used by
11393 fp moves, that force all constants to memory to allow combining. */
11395 if (GET_CODE (operands[1]) == MEM
11396 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11397 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11398 operands[1] = get_pool_constant (XEXP (operands[1], 0));
11399 if (push_operand (operands[0], VOIDmode))
11401 operands[0] = copy_rtx (operands[0]);
11402 PUT_MODE (operands[0], Pmode);
11404 else
11405 operands[0] = gen_lowpart (DImode, operands[0]);
11406 operands[1] = gen_lowpart (DImode, operands[1]);
11407 emit_move_insn (operands[0], operands[1]);
11408 return;
11411 /* The only non-offsettable memory we handle is push. */
11412 if (push_operand (operands[0], VOIDmode))
11413 push = 1;
11414 else
11415 gcc_assert (GET_CODE (operands[0]) != MEM
11416 || offsettable_memref_p (operands[0]));
11418 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11419 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
11421 /* When emitting push, take care for source operands on the stack. */
11422 if (push && GET_CODE (operands[1]) == MEM
11423 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11425 if (nparts == 3)
11426 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11427 XEXP (part[1][2], 0));
11428 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11429 XEXP (part[1][1], 0));
11432 /* We need to do copy in the right order in case an address register
11433 of the source overlaps the destination. */
11434 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11436 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
11437 collisions++;
11438 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11439 collisions++;
11440 if (nparts == 3
11441 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
11442 collisions++;
11444 /* Collision in the middle part can be handled by reordering. */
11445 if (collisions == 1 && nparts == 3
11446 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11448 rtx tmp;
11449 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
11450 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
11453 /* If there are more collisions, we can't handle it by reordering.
11454 Do an lea to the last part and use only one colliding move. */
11455 else if (collisions > 1)
11457 rtx base;
11459 collisions = 1;
11461 base = part[0][nparts - 1];
11463 /* Handle the case when the last part isn't valid for lea.
11464 Happens in 64-bit mode storing the 12-byte XFmode. */
11465 if (GET_MODE (base) != Pmode)
11466 base = gen_rtx_REG (Pmode, REGNO (base));
11468 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
11469 part[1][0] = replace_equiv_address (part[1][0], base);
11470 part[1][1] = replace_equiv_address (part[1][1],
11471 plus_constant (base, UNITS_PER_WORD));
11472 if (nparts == 3)
11473 part[1][2] = replace_equiv_address (part[1][2],
11474 plus_constant (base, 8));
11478 if (push)
11480 if (!TARGET_64BIT)
11482 if (nparts == 3)
11484 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
11485 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
11486 emit_move_insn (part[0][2], part[1][2]);
11489 else
11491 /* In 64bit mode we don't have 32bit push available. In case this is
11492 register, it is OK - we will just use larger counterpart. We also
11493 retype memory - these comes from attempt to avoid REX prefix on
11494 moving of second half of TFmode value. */
11495 if (GET_MODE (part[1][1]) == SImode)
11497 switch (GET_CODE (part[1][1]))
11499 case MEM:
11500 part[1][1] = adjust_address (part[1][1], DImode, 0);
11501 break;
11503 case REG:
11504 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
11505 break;
11507 default:
11508 gcc_unreachable ();
11511 if (GET_MODE (part[1][0]) == SImode)
11512 part[1][0] = part[1][1];
11515 emit_move_insn (part[0][1], part[1][1]);
11516 emit_move_insn (part[0][0], part[1][0]);
11517 return;
11520 /* Choose correct order to not overwrite the source before it is copied. */
11521 if ((REG_P (part[0][0])
11522 && REG_P (part[1][1])
11523 && (REGNO (part[0][0]) == REGNO (part[1][1])
11524 || (nparts == 3
11525 && REGNO (part[0][0]) == REGNO (part[1][2]))))
11526 || (collisions > 0
11527 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
11529 if (nparts == 3)
11531 operands[2] = part[0][2];
11532 operands[3] = part[0][1];
11533 operands[4] = part[0][0];
11534 operands[5] = part[1][2];
11535 operands[6] = part[1][1];
11536 operands[7] = part[1][0];
11538 else
11540 operands[2] = part[0][1];
11541 operands[3] = part[0][0];
11542 operands[5] = part[1][1];
11543 operands[6] = part[1][0];
11546 else
11548 if (nparts == 3)
11550 operands[2] = part[0][0];
11551 operands[3] = part[0][1];
11552 operands[4] = part[0][2];
11553 operands[5] = part[1][0];
11554 operands[6] = part[1][1];
11555 operands[7] = part[1][2];
11557 else
11559 operands[2] = part[0][0];
11560 operands[3] = part[0][1];
11561 operands[5] = part[1][0];
11562 operands[6] = part[1][1];
11566 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
11567 if (optimize_size)
11569 if (GET_CODE (operands[5]) == CONST_INT
11570 && operands[5] != const0_rtx
11571 && REG_P (operands[2]))
11573 if (GET_CODE (operands[6]) == CONST_INT
11574 && INTVAL (operands[6]) == INTVAL (operands[5]))
11575 operands[6] = operands[2];
11577 if (nparts == 3
11578 && GET_CODE (operands[7]) == CONST_INT
11579 && INTVAL (operands[7]) == INTVAL (operands[5]))
11580 operands[7] = operands[2];
11583 if (nparts == 3
11584 && GET_CODE (operands[6]) == CONST_INT
11585 && operands[6] != const0_rtx
11586 && REG_P (operands[3])
11587 && GET_CODE (operands[7]) == CONST_INT
11588 && INTVAL (operands[7]) == INTVAL (operands[6]))
11589 operands[7] = operands[3];
11592 emit_move_insn (operands[2], operands[5]);
11593 emit_move_insn (operands[3], operands[6]);
11594 if (nparts == 3)
11595 emit_move_insn (operands[4], operands[7]);
11597 return;
11600 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
11601 left shift by a constant, either using a single shift or
11602 a sequence of add instructions. */
11604 static void
11605 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
11607 if (count == 1)
11609 emit_insn ((mode == DImode
11610 ? gen_addsi3
11611 : gen_adddi3) (operand, operand, operand));
11613 else if (!optimize_size
11614 && count * ix86_cost->add <= ix86_cost->shift_const)
11616 int i;
11617 for (i=0; i<count; i++)
11619 emit_insn ((mode == DImode
11620 ? gen_addsi3
11621 : gen_adddi3) (operand, operand, operand));
11624 else
11625 emit_insn ((mode == DImode
11626 ? gen_ashlsi3
11627 : gen_ashldi3) (operand, operand, GEN_INT (count)));
11630 void
11631 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
11633 rtx low[2], high[2];
11634 int count;
11635 const int single_width = mode == DImode ? 32 : 64;
11637 if (GET_CODE (operands[2]) == CONST_INT)
11639 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11640 count = INTVAL (operands[2]) & (single_width * 2 - 1);
11642 if (count >= single_width)
11644 emit_move_insn (high[0], low[1]);
11645 emit_move_insn (low[0], const0_rtx);
11647 if (count > single_width)
11648 ix86_expand_ashl_const (high[0], count - single_width, mode);
11650 else
11652 if (!rtx_equal_p (operands[0], operands[1]))
11653 emit_move_insn (operands[0], operands[1]);
11654 emit_insn ((mode == DImode
11655 ? gen_x86_shld_1
11656 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
11657 ix86_expand_ashl_const (low[0], count, mode);
11659 return;
11662 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11664 if (operands[1] == const1_rtx)
11666 /* Assuming we've chosen a QImode capable registers, then 1 << N
11667 can be done with two 32/64-bit shifts, no branches, no cmoves. */
11668 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
11670 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
11672 ix86_expand_clear (low[0]);
11673 ix86_expand_clear (high[0]);
11674 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
11676 d = gen_lowpart (QImode, low[0]);
11677 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11678 s = gen_rtx_EQ (QImode, flags, const0_rtx);
11679 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11681 d = gen_lowpart (QImode, high[0]);
11682 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11683 s = gen_rtx_NE (QImode, flags, const0_rtx);
11684 emit_insn (gen_rtx_SET (VOIDmode, d, s));
11687 /* Otherwise, we can get the same results by manually performing
11688 a bit extract operation on bit 5/6, and then performing the two
11689 shifts. The two methods of getting 0/1 into low/high are exactly
11690 the same size. Avoiding the shift in the bit extract case helps
11691 pentium4 a bit; no one else seems to care much either way. */
11692 else
11694 rtx x;
11696 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
11697 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
11698 else
11699 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
11700 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
11702 emit_insn ((mode == DImode
11703 ? gen_lshrsi3
11704 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
11705 emit_insn ((mode == DImode
11706 ? gen_andsi3
11707 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
11708 emit_move_insn (low[0], high[0]);
11709 emit_insn ((mode == DImode
11710 ? gen_xorsi3
11711 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
11714 emit_insn ((mode == DImode
11715 ? gen_ashlsi3
11716 : gen_ashldi3) (low[0], low[0], operands[2]));
11717 emit_insn ((mode == DImode
11718 ? gen_ashlsi3
11719 : gen_ashldi3) (high[0], high[0], operands[2]));
11720 return;
11723 if (operands[1] == constm1_rtx)
11725 /* For -1 << N, we can avoid the shld instruction, because we
11726 know that we're shifting 0...31/63 ones into a -1. */
11727 emit_move_insn (low[0], constm1_rtx);
11728 if (optimize_size)
11729 emit_move_insn (high[0], low[0]);
11730 else
11731 emit_move_insn (high[0], constm1_rtx);
11733 else
11735 if (!rtx_equal_p (operands[0], operands[1]))
11736 emit_move_insn (operands[0], operands[1]);
11738 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11739 emit_insn ((mode == DImode
11740 ? gen_x86_shld_1
11741 : gen_x86_64_shld) (high[0], low[0], operands[2]));
11744 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
11746 if (TARGET_CMOVE && scratch)
11748 ix86_expand_clear (scratch);
11749 emit_insn ((mode == DImode
11750 ? gen_x86_shift_adj_1
11751 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
11753 else
11754 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
11757 void
11758 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
11760 rtx low[2], high[2];
11761 int count;
11762 const int single_width = mode == DImode ? 32 : 64;
11764 if (GET_CODE (operands[2]) == CONST_INT)
11766 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11767 count = INTVAL (operands[2]) & (single_width * 2 - 1);
11769 if (count == single_width * 2 - 1)
11771 emit_move_insn (high[0], high[1]);
11772 emit_insn ((mode == DImode
11773 ? gen_ashrsi3
11774 : gen_ashrdi3) (high[0], high[0],
11775 GEN_INT (single_width - 1)));
11776 emit_move_insn (low[0], high[0]);
11779 else if (count >= single_width)
11781 emit_move_insn (low[0], high[1]);
11782 emit_move_insn (high[0], low[0]);
11783 emit_insn ((mode == DImode
11784 ? gen_ashrsi3
11785 : gen_ashrdi3) (high[0], high[0],
11786 GEN_INT (single_width - 1)));
11787 if (count > single_width)
11788 emit_insn ((mode == DImode
11789 ? gen_ashrsi3
11790 : gen_ashrdi3) (low[0], low[0],
11791 GEN_INT (count - single_width)));
11793 else
11795 if (!rtx_equal_p (operands[0], operands[1]))
11796 emit_move_insn (operands[0], operands[1]);
11797 emit_insn ((mode == DImode
11798 ? gen_x86_shrd_1
11799 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
11800 emit_insn ((mode == DImode
11801 ? gen_ashrsi3
11802 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
11805 else
11807 if (!rtx_equal_p (operands[0], operands[1]))
11808 emit_move_insn (operands[0], operands[1]);
11810 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11812 emit_insn ((mode == DImode
11813 ? gen_x86_shrd_1
11814 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
11815 emit_insn ((mode == DImode
11816 ? gen_ashrsi3
11817 : gen_ashrdi3) (high[0], high[0], operands[2]));
11819 if (TARGET_CMOVE && scratch)
11821 emit_move_insn (scratch, high[0]);
11822 emit_insn ((mode == DImode
11823 ? gen_ashrsi3
11824 : gen_ashrdi3) (scratch, scratch,
11825 GEN_INT (single_width - 1)));
11826 emit_insn ((mode == DImode
11827 ? gen_x86_shift_adj_1
11828 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
11829 scratch));
11831 else
11832 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11836 void
11837 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
11839 rtx low[2], high[2];
11840 int count;
11841 const int single_width = mode == DImode ? 32 : 64;
11843 if (GET_CODE (operands[2]) == CONST_INT)
11845 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11846 count = INTVAL (operands[2]) & (single_width * 2 - 1);
11848 if (count >= single_width)
11850 emit_move_insn (low[0], high[1]);
11851 ix86_expand_clear (high[0]);
11853 if (count > single_width)
11854 emit_insn ((mode == DImode
11855 ? gen_lshrsi3
11856 : gen_lshrdi3) (low[0], low[0],
11857 GEN_INT (count - single_width)));
11859 else
11861 if (!rtx_equal_p (operands[0], operands[1]))
11862 emit_move_insn (operands[0], operands[1]);
11863 emit_insn ((mode == DImode
11864 ? gen_x86_shrd_1
11865 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
11866 emit_insn ((mode == DImode
11867 ? gen_lshrsi3
11868 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
11871 else
11873 if (!rtx_equal_p (operands[0], operands[1]))
11874 emit_move_insn (operands[0], operands[1]);
11876 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11878 emit_insn ((mode == DImode
11879 ? gen_x86_shrd_1
11880 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
11881 emit_insn ((mode == DImode
11882 ? gen_lshrsi3
11883 : gen_lshrdi3) (high[0], high[0], operands[2]));
11885 /* Heh. By reversing the arguments, we can reuse this pattern. */
11886 if (TARGET_CMOVE && scratch)
11888 ix86_expand_clear (scratch);
11889 emit_insn ((mode == DImode
11890 ? gen_x86_shift_adj_1
11891 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
11892 scratch));
11894 else
11895 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11899 /* Helper function for the string operations below. Dest VARIABLE whether
11900 it is aligned to VALUE bytes. If true, jump to the label. */
11901 static rtx
11902 ix86_expand_aligntest (rtx variable, int value)
11904 rtx label = gen_label_rtx ();
11905 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11906 if (GET_MODE (variable) == DImode)
11907 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11908 else
11909 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11910 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11911 1, label);
11912 return label;
11915 /* Adjust COUNTER by the VALUE. */
11916 static void
11917 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11919 if (GET_MODE (countreg) == DImode)
11920 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11921 else
11922 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11925 /* Zero extend possibly SImode EXP to Pmode register. */
11927 ix86_zero_extend_to_Pmode (rtx exp)
11929 rtx r;
11930 if (GET_MODE (exp) == VOIDmode)
11931 return force_reg (Pmode, exp);
11932 if (GET_MODE (exp) == Pmode)
11933 return copy_to_mode_reg (Pmode, exp);
11934 r = gen_reg_rtx (Pmode);
11935 emit_insn (gen_zero_extendsidi2 (r, exp));
11936 return r;
11939 /* Expand string move (memcpy) operation. Use i386 string operations when
11940 profitable. expand_clrmem contains similar code. */
11942 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11944 rtx srcreg, destreg, countreg, srcexp, destexp;
11945 enum machine_mode counter_mode;
11946 HOST_WIDE_INT align = 0;
11947 unsigned HOST_WIDE_INT count = 0;
11949 if (GET_CODE (align_exp) == CONST_INT)
11950 align = INTVAL (align_exp);
11952 /* Can't use any of this if the user has appropriated esi or edi. */
11953 if (global_regs[4] || global_regs[5])
11954 return 0;
11956 /* This simple hack avoids all inlining code and simplifies code below. */
11957 if (!TARGET_ALIGN_STRINGOPS)
11958 align = 64;
11960 if (GET_CODE (count_exp) == CONST_INT)
11962 count = INTVAL (count_exp);
11963 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11964 return 0;
11967 /* Figure out proper mode for counter. For 32bits it is always SImode,
11968 for 64bits use SImode when possible, otherwise DImode.
11969 Set count to number of bytes copied when known at compile time. */
11970 if (!TARGET_64BIT
11971 || GET_MODE (count_exp) == SImode
11972 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11973 counter_mode = SImode;
11974 else
11975 counter_mode = DImode;
11977 gcc_assert (counter_mode == SImode || counter_mode == DImode);
11979 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11980 if (destreg != XEXP (dst, 0))
11981 dst = replace_equiv_address_nv (dst, destreg);
11982 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11983 if (srcreg != XEXP (src, 0))
11984 src = replace_equiv_address_nv (src, srcreg);
11986 /* When optimizing for size emit simple rep ; movsb instruction for
11987 counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
11988 sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
11989 Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
11990 count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
11991 but we don't know whether upper 24 (resp. 56) bits of %ecx will be
11992 known to be zero or not. The rep; movsb sequence causes higher
11993 register pressure though, so take that into account. */
11995 if ((!optimize || optimize_size)
11996 && (count == 0
11997 || ((count & 0x03)
11998 && (!optimize_size
11999 || count > 5 * 4
12000 || (count & 3) + count / 4 > 6))))
12002 emit_insn (gen_cld ());
12003 countreg = ix86_zero_extend_to_Pmode (count_exp);
12004 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12005 srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12006 emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12007 destexp, srcexp));
12010 /* For constant aligned (or small unaligned) copies use rep movsl
12011 followed by code copying the rest. For PentiumPro ensure 8 byte
12012 alignment to allow rep movsl acceleration. */
12014 else if (count != 0
12015 && (align >= 8
12016 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12017 || optimize_size || count < (unsigned int) 64))
12019 unsigned HOST_WIDE_INT offset = 0;
12020 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12021 rtx srcmem, dstmem;
12023 emit_insn (gen_cld ());
12024 if (count & ~(size - 1))
12026 if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12028 enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12030 while (offset < (count & ~(size - 1)))
12032 srcmem = adjust_automodify_address_nv (src, movs_mode,
12033 srcreg, offset);
12034 dstmem = adjust_automodify_address_nv (dst, movs_mode,
12035 destreg, offset);
12036 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12037 offset += size;
12040 else
12042 countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12043 & (TARGET_64BIT ? -1 : 0x3fffffff));
12044 countreg = copy_to_mode_reg (counter_mode, countreg);
12045 countreg = ix86_zero_extend_to_Pmode (countreg);
12047 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12048 GEN_INT (size == 4 ? 2 : 3));
12049 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12050 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12052 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12053 countreg, destexp, srcexp));
12054 offset = count & ~(size - 1);
12057 if (size == 8 && (count & 0x04))
12059 srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12060 offset);
12061 dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12062 offset);
12063 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12064 offset += 4;
12066 if (count & 0x02)
12068 srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12069 offset);
12070 dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12071 offset);
12072 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12073 offset += 2;
12075 if (count & 0x01)
12077 srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12078 offset);
12079 dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12080 offset);
12081 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12084 /* The generic code based on the glibc implementation:
12085 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12086 allowing accelerated copying there)
12087 - copy the data using rep movsl
12088 - copy the rest. */
12089 else
12091 rtx countreg2;
12092 rtx label = NULL;
12093 rtx srcmem, dstmem;
12094 int desired_alignment = (TARGET_PENTIUMPRO
12095 && (count == 0 || count >= (unsigned int) 260)
12096 ? 8 : UNITS_PER_WORD);
12097 /* Get rid of MEM_OFFSETs, they won't be accurate. */
12098 dst = change_address (dst, BLKmode, destreg);
12099 src = change_address (src, BLKmode, srcreg);
12101 /* In case we don't know anything about the alignment, default to
12102 library version, since it is usually equally fast and result in
12103 shorter code.
12105 Also emit call when we know that the count is large and call overhead
12106 will not be important. */
12107 if (!TARGET_INLINE_ALL_STRINGOPS
12108 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12109 return 0;
12111 if (TARGET_SINGLE_STRINGOP)
12112 emit_insn (gen_cld ());
12114 countreg2 = gen_reg_rtx (Pmode);
12115 countreg = copy_to_mode_reg (counter_mode, count_exp);
12117 /* We don't use loops to align destination and to copy parts smaller
12118 than 4 bytes, because gcc is able to optimize such code better (in
12119 the case the destination or the count really is aligned, gcc is often
12120 able to predict the branches) and also it is friendlier to the
12121 hardware branch prediction.
12123 Using loops is beneficial for generic case, because we can
12124 handle small counts using the loops. Many CPUs (such as Athlon)
12125 have large REP prefix setup costs.
12127 This is quite costly. Maybe we can revisit this decision later or
12128 add some customizability to this code. */
12130 if (count == 0 && align < desired_alignment)
12132 label = gen_label_rtx ();
12133 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12134 LEU, 0, counter_mode, 1, label);
12136 if (align <= 1)
12138 rtx label = ix86_expand_aligntest (destreg, 1);
12139 srcmem = change_address (src, QImode, srcreg);
12140 dstmem = change_address (dst, QImode, destreg);
12141 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12142 ix86_adjust_counter (countreg, 1);
12143 emit_label (label);
12144 LABEL_NUSES (label) = 1;
12146 if (align <= 2)
12148 rtx label = ix86_expand_aligntest (destreg, 2);
12149 srcmem = change_address (src, HImode, srcreg);
12150 dstmem = change_address (dst, HImode, destreg);
12151 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12152 ix86_adjust_counter (countreg, 2);
12153 emit_label (label);
12154 LABEL_NUSES (label) = 1;
12156 if (align <= 4 && desired_alignment > 4)
12158 rtx label = ix86_expand_aligntest (destreg, 4);
12159 srcmem = change_address (src, SImode, srcreg);
12160 dstmem = change_address (dst, SImode, destreg);
12161 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12162 ix86_adjust_counter (countreg, 4);
12163 emit_label (label);
12164 LABEL_NUSES (label) = 1;
12167 if (label && desired_alignment > 4 && !TARGET_64BIT)
12169 emit_label (label);
12170 LABEL_NUSES (label) = 1;
12171 label = NULL_RTX;
12173 if (!TARGET_SINGLE_STRINGOP)
12174 emit_insn (gen_cld ());
12175 if (TARGET_64BIT)
12177 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12178 GEN_INT (3)));
12179 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12181 else
12183 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12184 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12186 srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12187 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12188 emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12189 countreg2, destexp, srcexp));
12191 if (label)
12193 emit_label (label);
12194 LABEL_NUSES (label) = 1;
12196 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12198 srcmem = change_address (src, SImode, srcreg);
12199 dstmem = change_address (dst, SImode, destreg);
12200 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12202 if ((align <= 4 || count == 0) && TARGET_64BIT)
12204 rtx label = ix86_expand_aligntest (countreg, 4);
12205 srcmem = change_address (src, SImode, srcreg);
12206 dstmem = change_address (dst, SImode, destreg);
12207 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12208 emit_label (label);
12209 LABEL_NUSES (label) = 1;
12211 if (align > 2 && count != 0 && (count & 2))
12213 srcmem = change_address (src, HImode, srcreg);
12214 dstmem = change_address (dst, HImode, destreg);
12215 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12217 if (align <= 2 || count == 0)
12219 rtx label = ix86_expand_aligntest (countreg, 2);
12220 srcmem = change_address (src, HImode, srcreg);
12221 dstmem = change_address (dst, HImode, destreg);
12222 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12223 emit_label (label);
12224 LABEL_NUSES (label) = 1;
12226 if (align > 1 && count != 0 && (count & 1))
12228 srcmem = change_address (src, QImode, srcreg);
12229 dstmem = change_address (dst, QImode, destreg);
12230 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12232 if (align <= 1 || count == 0)
12234 rtx label = ix86_expand_aligntest (countreg, 1);
12235 srcmem = change_address (src, QImode, srcreg);
12236 dstmem = change_address (dst, QImode, destreg);
12237 emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12238 emit_label (label);
12239 LABEL_NUSES (label) = 1;
12243 return 1;
12246 /* Expand string clear operation (bzero). Use i386 string operations when
12247 profitable. expand_movmem contains similar code. */
12249 ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12251 rtx destreg, zeroreg, countreg, destexp;
12252 enum machine_mode counter_mode;
12253 HOST_WIDE_INT align = 0;
12254 unsigned HOST_WIDE_INT count = 0;
12256 if (GET_CODE (align_exp) == CONST_INT)
12257 align = INTVAL (align_exp);
12259 /* Can't use any of this if the user has appropriated esi. */
12260 if (global_regs[4])
12261 return 0;
12263 /* This simple hack avoids all inlining code and simplifies code below. */
12264 if (!TARGET_ALIGN_STRINGOPS)
12265 align = 32;
12267 if (GET_CODE (count_exp) == CONST_INT)
12269 count = INTVAL (count_exp);
12270 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12271 return 0;
12273 /* Figure out proper mode for counter. For 32bits it is always SImode,
12274 for 64bits use SImode when possible, otherwise DImode.
12275 Set count to number of bytes copied when known at compile time. */
12276 if (!TARGET_64BIT
12277 || GET_MODE (count_exp) == SImode
12278 || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12279 counter_mode = SImode;
12280 else
12281 counter_mode = DImode;
12283 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12284 if (destreg != XEXP (dst, 0))
12285 dst = replace_equiv_address_nv (dst, destreg);
12288 /* When optimizing for size emit simple rep ; movsb instruction for
12289 counts not divisible by 4. The movl $N, %ecx; rep; stosb
12290 sequence is 7 bytes long, so if optimizing for size and count is
12291 small enough that some stosl, stosw and stosb instructions without
12292 rep are shorter, fall back into the next if. */
12294 if ((!optimize || optimize_size)
12295 && (count == 0
12296 || ((count & 0x03)
12297 && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12299 emit_insn (gen_cld ());
12301 countreg = ix86_zero_extend_to_Pmode (count_exp);
12302 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12303 destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12304 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12306 else if (count != 0
12307 && (align >= 8
12308 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12309 || optimize_size || count < (unsigned int) 64))
12311 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12312 unsigned HOST_WIDE_INT offset = 0;
12314 emit_insn (gen_cld ());
12316 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12317 if (count & ~(size - 1))
12319 unsigned HOST_WIDE_INT repcount;
12320 unsigned int max_nonrep;
12322 repcount = count >> (size == 4 ? 2 : 3);
12323 if (!TARGET_64BIT)
12324 repcount &= 0x3fffffff;
12326 /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12327 movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12328 bytes. In both cases the latter seems to be faster for small
12329 values of N. */
12330 max_nonrep = size == 4 ? 7 : 4;
12331 if (!optimize_size)
12332 switch (ix86_tune)
12334 case PROCESSOR_PENTIUM4:
12335 case PROCESSOR_NOCONA:
12336 max_nonrep = 3;
12337 break;
12338 default:
12339 break;
12342 if (repcount <= max_nonrep)
12343 while (repcount-- > 0)
12345 rtx mem = adjust_automodify_address_nv (dst,
12346 GET_MODE (zeroreg),
12347 destreg, offset);
12348 emit_insn (gen_strset (destreg, mem, zeroreg));
12349 offset += size;
12351 else
12353 countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12354 countreg = ix86_zero_extend_to_Pmode (countreg);
12355 destexp = gen_rtx_ASHIFT (Pmode, countreg,
12356 GEN_INT (size == 4 ? 2 : 3));
12357 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12358 emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12359 destexp));
12360 offset = count & ~(size - 1);
12363 if (size == 8 && (count & 0x04))
12365 rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12366 offset);
12367 emit_insn (gen_strset (destreg, mem,
12368 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12369 offset += 4;
12371 if (count & 0x02)
12373 rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12374 offset);
12375 emit_insn (gen_strset (destreg, mem,
12376 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12377 offset += 2;
12379 if (count & 0x01)
12381 rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12382 offset);
12383 emit_insn (gen_strset (destreg, mem,
12384 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12387 else
12389 rtx countreg2;
12390 rtx label = NULL;
12391 /* Compute desired alignment of the string operation. */
12392 int desired_alignment = (TARGET_PENTIUMPRO
12393 && (count == 0 || count >= (unsigned int) 260)
12394 ? 8 : UNITS_PER_WORD);
12396 /* In case we don't know anything about the alignment, default to
12397 library version, since it is usually equally fast and result in
12398 shorter code.
12400 Also emit call when we know that the count is large and call overhead
12401 will not be important. */
12402 if (!TARGET_INLINE_ALL_STRINGOPS
12403 && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12404 return 0;
12406 if (TARGET_SINGLE_STRINGOP)
12407 emit_insn (gen_cld ());
12409 countreg2 = gen_reg_rtx (Pmode);
12410 countreg = copy_to_mode_reg (counter_mode, count_exp);
12411 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12412 /* Get rid of MEM_OFFSET, it won't be accurate. */
12413 dst = change_address (dst, BLKmode, destreg);
12415 if (count == 0 && align < desired_alignment)
12417 label = gen_label_rtx ();
12418 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12419 LEU, 0, counter_mode, 1, label);
12421 if (align <= 1)
12423 rtx label = ix86_expand_aligntest (destreg, 1);
12424 emit_insn (gen_strset (destreg, dst,
12425 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12426 ix86_adjust_counter (countreg, 1);
12427 emit_label (label);
12428 LABEL_NUSES (label) = 1;
12430 if (align <= 2)
12432 rtx label = ix86_expand_aligntest (destreg, 2);
12433 emit_insn (gen_strset (destreg, dst,
12434 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12435 ix86_adjust_counter (countreg, 2);
12436 emit_label (label);
12437 LABEL_NUSES (label) = 1;
12439 if (align <= 4 && desired_alignment > 4)
12441 rtx label = ix86_expand_aligntest (destreg, 4);
12442 emit_insn (gen_strset (destreg, dst,
12443 (TARGET_64BIT
12444 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
12445 : zeroreg)));
12446 ix86_adjust_counter (countreg, 4);
12447 emit_label (label);
12448 LABEL_NUSES (label) = 1;
12451 if (label && desired_alignment > 4 && !TARGET_64BIT)
12453 emit_label (label);
12454 LABEL_NUSES (label) = 1;
12455 label = NULL_RTX;
12458 if (!TARGET_SINGLE_STRINGOP)
12459 emit_insn (gen_cld ());
12460 if (TARGET_64BIT)
12462 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12463 GEN_INT (3)));
12464 destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12466 else
12468 emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12469 destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12471 destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12472 emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
12474 if (label)
12476 emit_label (label);
12477 LABEL_NUSES (label) = 1;
12480 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12481 emit_insn (gen_strset (destreg, dst,
12482 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12483 if (TARGET_64BIT && (align <= 4 || count == 0))
12485 rtx label = ix86_expand_aligntest (countreg, 4);
12486 emit_insn (gen_strset (destreg, dst,
12487 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12488 emit_label (label);
12489 LABEL_NUSES (label) = 1;
12491 if (align > 2 && count != 0 && (count & 2))
12492 emit_insn (gen_strset (destreg, dst,
12493 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12494 if (align <= 2 || count == 0)
12496 rtx label = ix86_expand_aligntest (countreg, 2);
12497 emit_insn (gen_strset (destreg, dst,
12498 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12499 emit_label (label);
12500 LABEL_NUSES (label) = 1;
12502 if (align > 1 && count != 0 && (count & 1))
12503 emit_insn (gen_strset (destreg, dst,
12504 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12505 if (align <= 1 || count == 0)
12507 rtx label = ix86_expand_aligntest (countreg, 1);
12508 emit_insn (gen_strset (destreg, dst,
12509 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12510 emit_label (label);
12511 LABEL_NUSES (label) = 1;
12514 return 1;
12517 /* Expand strlen. */
12519 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
12521 rtx addr, scratch1, scratch2, scratch3, scratch4;
12523 /* The generic case of strlen expander is long. Avoid it's
12524 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
12526 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12527 && !TARGET_INLINE_ALL_STRINGOPS
12528 && !optimize_size
12529 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
12530 return 0;
12532 addr = force_reg (Pmode, XEXP (src, 0));
12533 scratch1 = gen_reg_rtx (Pmode);
12535 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12536 && !optimize_size)
12538 /* Well it seems that some optimizer does not combine a call like
12539 foo(strlen(bar), strlen(bar));
12540 when the move and the subtraction is done here. It does calculate
12541 the length just once when these instructions are done inside of
12542 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
12543 often used and I use one fewer register for the lifetime of
12544 output_strlen_unroll() this is better. */
12546 emit_move_insn (out, addr);
12548 ix86_expand_strlensi_unroll_1 (out, src, align);
12550 /* strlensi_unroll_1 returns the address of the zero at the end of
12551 the string, like memchr(), so compute the length by subtracting
12552 the start address. */
12553 if (TARGET_64BIT)
12554 emit_insn (gen_subdi3 (out, out, addr));
12555 else
12556 emit_insn (gen_subsi3 (out, out, addr));
12558 else
12560 rtx unspec;
12561 scratch2 = gen_reg_rtx (Pmode);
12562 scratch3 = gen_reg_rtx (Pmode);
12563 scratch4 = force_reg (Pmode, constm1_rtx);
12565 emit_move_insn (scratch3, addr);
12566 eoschar = force_reg (QImode, eoschar);
12568 emit_insn (gen_cld ());
12569 src = replace_equiv_address_nv (src, scratch3);
12571 /* If .md starts supporting :P, this can be done in .md. */
12572 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
12573 scratch4), UNSPEC_SCAS);
12574 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
12575 if (TARGET_64BIT)
12577 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
12578 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
12580 else
12582 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
12583 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
12586 return 1;
12589 /* Expand the appropriate insns for doing strlen if not just doing
12590 repnz; scasb
12592 out = result, initialized with the start address
12593 align_rtx = alignment of the address.
12594 scratch = scratch register, initialized with the startaddress when
12595 not aligned, otherwise undefined
12597 This is just the body. It needs the initializations mentioned above and
12598 some address computing at the end. These things are done in i386.md. */
12600 static void
12601 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
12603 int align;
12604 rtx tmp;
12605 rtx align_2_label = NULL_RTX;
12606 rtx align_3_label = NULL_RTX;
12607 rtx align_4_label = gen_label_rtx ();
12608 rtx end_0_label = gen_label_rtx ();
12609 rtx mem;
12610 rtx tmpreg = gen_reg_rtx (SImode);
12611 rtx scratch = gen_reg_rtx (SImode);
12612 rtx cmp;
12614 align = 0;
12615 if (GET_CODE (align_rtx) == CONST_INT)
12616 align = INTVAL (align_rtx);
12618 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
12620 /* Is there a known alignment and is it less than 4? */
12621 if (align < 4)
12623 rtx scratch1 = gen_reg_rtx (Pmode);
12624 emit_move_insn (scratch1, out);
12625 /* Is there a known alignment and is it not 2? */
12626 if (align != 2)
12628 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
12629 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
12631 /* Leave just the 3 lower bits. */
12632 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
12633 NULL_RTX, 0, OPTAB_WIDEN);
12635 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12636 Pmode, 1, align_4_label);
12637 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
12638 Pmode, 1, align_2_label);
12639 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
12640 Pmode, 1, align_3_label);
12642 else
12644 /* Since the alignment is 2, we have to check 2 or 0 bytes;
12645 check if is aligned to 4 - byte. */
12647 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
12648 NULL_RTX, 0, OPTAB_WIDEN);
12650 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12651 Pmode, 1, align_4_label);
12654 mem = change_address (src, QImode, out);
12656 /* Now compare the bytes. */
12658 /* Compare the first n unaligned byte on a byte per byte basis. */
12659 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
12660 QImode, 1, end_0_label);
12662 /* Increment the address. */
12663 if (TARGET_64BIT)
12664 emit_insn (gen_adddi3 (out, out, const1_rtx));
12665 else
12666 emit_insn (gen_addsi3 (out, out, const1_rtx));
12668 /* Not needed with an alignment of 2 */
12669 if (align != 2)
12671 emit_label (align_2_label);
12673 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12674 end_0_label);
12676 if (TARGET_64BIT)
12677 emit_insn (gen_adddi3 (out, out, const1_rtx));
12678 else
12679 emit_insn (gen_addsi3 (out, out, const1_rtx));
12681 emit_label (align_3_label);
12684 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12685 end_0_label);
12687 if (TARGET_64BIT)
12688 emit_insn (gen_adddi3 (out, out, const1_rtx));
12689 else
12690 emit_insn (gen_addsi3 (out, out, const1_rtx));
12693 /* Generate loop to check 4 bytes at a time. It is not a good idea to
12694 align this loop. It gives only huge programs, but does not help to
12695 speed up. */
12696 emit_label (align_4_label);
12698 mem = change_address (src, SImode, out);
12699 emit_move_insn (scratch, mem);
12700 if (TARGET_64BIT)
12701 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
12702 else
12703 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
12705 /* This formula yields a nonzero result iff one of the bytes is zero.
12706 This saves three branches inside loop and many cycles. */
12708 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
12709 emit_insn (gen_one_cmplsi2 (scratch, scratch));
12710 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
12711 emit_insn (gen_andsi3 (tmpreg, tmpreg,
12712 gen_int_mode (0x80808080, SImode)));
12713 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
12714 align_4_label);
12716 if (TARGET_CMOVE)
12718 rtx reg = gen_reg_rtx (SImode);
12719 rtx reg2 = gen_reg_rtx (Pmode);
12720 emit_move_insn (reg, tmpreg);
12721 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
12723 /* If zero is not in the first two bytes, move two bytes forward. */
12724 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
12725 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12726 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12727 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
12728 gen_rtx_IF_THEN_ELSE (SImode, tmp,
12729 reg,
12730 tmpreg)));
12731 /* Emit lea manually to avoid clobbering of flags. */
12732 emit_insn (gen_rtx_SET (SImode, reg2,
12733 gen_rtx_PLUS (Pmode, out, const2_rtx)));
12735 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12736 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12737 emit_insn (gen_rtx_SET (VOIDmode, out,
12738 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
12739 reg2,
12740 out)));
12743 else
12745 rtx end_2_label = gen_label_rtx ();
12746 /* Is zero in the first two bytes? */
12748 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
12749 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12750 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
12751 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12752 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
12753 pc_rtx);
12754 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12755 JUMP_LABEL (tmp) = end_2_label;
12757 /* Not in the first two. Move two bytes forward. */
12758 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
12759 if (TARGET_64BIT)
12760 emit_insn (gen_adddi3 (out, out, const2_rtx));
12761 else
12762 emit_insn (gen_addsi3 (out, out, const2_rtx));
12764 emit_label (end_2_label);
12768 /* Avoid branch in fixing the byte. */
12769 tmpreg = gen_lowpart (QImode, tmpreg);
12770 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
12771 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
12772 if (TARGET_64BIT)
12773 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
12774 else
12775 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
12777 emit_label (end_0_label);
12780 void
12781 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
12782 rtx callarg2 ATTRIBUTE_UNUSED,
12783 rtx pop, int sibcall)
12785 rtx use = NULL, call;
12787 if (pop == const0_rtx)
12788 pop = NULL;
12789 gcc_assert (!TARGET_64BIT || !pop);
12791 #if TARGET_MACHO
12792 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
12793 fnaddr = machopic_indirect_call_target (fnaddr);
12794 #else
12795 /* Static functions and indirect calls don't need the pic register. */
12796 if (! TARGET_64BIT && flag_pic
12797 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12798 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
12799 use_reg (&use, pic_offset_table_rtx);
12801 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
12803 rtx al = gen_rtx_REG (QImode, 0);
12804 emit_move_insn (al, callarg2);
12805 use_reg (&use, al);
12807 #endif /* TARGET_MACHO */
12809 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
12811 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12812 fnaddr = gen_rtx_MEM (QImode, fnaddr);
12814 if (sibcall && TARGET_64BIT
12815 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
12817 rtx addr;
12818 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12819 fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
12820 emit_move_insn (fnaddr, addr);
12821 fnaddr = gen_rtx_MEM (QImode, fnaddr);
12824 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
12825 if (retval)
12826 call = gen_rtx_SET (VOIDmode, retval, call);
12827 if (pop)
12829 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
12830 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
12831 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
12834 call = emit_call_insn (call);
12835 if (use)
12836 CALL_INSN_FUNCTION_USAGE (call) = use;
12840 /* Clear stack slot assignments remembered from previous functions.
12841 This is called from INIT_EXPANDERS once before RTL is emitted for each
12842 function. */
12844 static struct machine_function *
12845 ix86_init_machine_status (void)
12847 struct machine_function *f;
12849 f = ggc_alloc_cleared (sizeof (struct machine_function));
12850 f->use_fast_prologue_epilogue_nregs = -1;
12852 return f;
12855 /* Return a MEM corresponding to a stack slot with mode MODE.
12856 Allocate a new slot if necessary.
12858 The RTL for a function can have several slots available: N is
12859 which slot to use. */
12862 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
12864 struct stack_local_entry *s;
12866 gcc_assert (n < MAX_386_STACK_LOCALS);
12868 /* Virtual slot is valid only before vregs are instantiated. */
12869 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
12871 for (s = ix86_stack_locals; s; s = s->next)
12872 if (s->mode == mode && s->n == n)
12873 return s->rtl;
12875 s = (struct stack_local_entry *)
12876 ggc_alloc (sizeof (struct stack_local_entry));
12877 s->n = n;
12878 s->mode = mode;
12879 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
12881 s->next = ix86_stack_locals;
12882 ix86_stack_locals = s;
12883 return s->rtl;
12886 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12888 static GTY(()) rtx ix86_tls_symbol;
12890 ix86_tls_get_addr (void)
12893 if (!ix86_tls_symbol)
12895 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
12896 (TARGET_GNU_TLS && !TARGET_64BIT)
12897 ? "___tls_get_addr"
12898 : "__tls_get_addr");
12901 return ix86_tls_symbol;
12904 /* Calculate the length of the memory address in the instruction
12905 encoding. Does not include the one-byte modrm, opcode, or prefix. */
12908 memory_address_length (rtx addr)
12910 struct ix86_address parts;
12911 rtx base, index, disp;
12912 int len;
12913 int ok;
12915 if (GET_CODE (addr) == PRE_DEC
12916 || GET_CODE (addr) == POST_INC
12917 || GET_CODE (addr) == PRE_MODIFY
12918 || GET_CODE (addr) == POST_MODIFY)
12919 return 0;
12921 ok = ix86_decompose_address (addr, &parts);
12922 gcc_assert (ok);
12924 if (parts.base && GET_CODE (parts.base) == SUBREG)
12925 parts.base = SUBREG_REG (parts.base);
12926 if (parts.index && GET_CODE (parts.index) == SUBREG)
12927 parts.index = SUBREG_REG (parts.index);
12929 base = parts.base;
12930 index = parts.index;
12931 disp = parts.disp;
12932 len = 0;
12934 /* Rule of thumb:
12935 - esp as the base always wants an index,
12936 - ebp as the base always wants a displacement. */
12938 /* Register Indirect. */
12939 if (base && !index && !disp)
12941 /* esp (for its index) and ebp (for its displacement) need
12942 the two-byte modrm form. */
12943 if (addr == stack_pointer_rtx
12944 || addr == arg_pointer_rtx
12945 || addr == frame_pointer_rtx
12946 || addr == hard_frame_pointer_rtx)
12947 len = 1;
12950 /* Direct Addressing. */
12951 else if (disp && !base && !index)
12952 len = 4;
12954 else
12956 /* Find the length of the displacement constant. */
12957 if (disp)
12959 if (GET_CODE (disp) == CONST_INT
12960 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12961 && base)
12962 len = 1;
12963 else
12964 len = 4;
12966 /* ebp always wants a displacement. */
12967 else if (base == hard_frame_pointer_rtx)
12968 len = 1;
12970 /* An index requires the two-byte modrm form.... */
12971 if (index
12972 /* ...like esp, which always wants an index. */
12973 || base == stack_pointer_rtx
12974 || base == arg_pointer_rtx
12975 || base == frame_pointer_rtx)
12976 len += 1;
12979 return len;
12982 /* Compute default value for "length_immediate" attribute. When SHORTFORM
12983 is set, expect that insn have 8bit immediate alternative. */
12985 ix86_attr_length_immediate_default (rtx insn, int shortform)
12987 int len = 0;
12988 int i;
12989 extract_insn_cached (insn);
12990 for (i = recog_data.n_operands - 1; i >= 0; --i)
12991 if (CONSTANT_P (recog_data.operand[i]))
12993 gcc_assert (!len);
12994 if (shortform
12995 && GET_CODE (recog_data.operand[i]) == CONST_INT
12996 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12997 len = 1;
12998 else
13000 switch (get_attr_mode (insn))
13002 case MODE_QI:
13003 len+=1;
13004 break;
13005 case MODE_HI:
13006 len+=2;
13007 break;
13008 case MODE_SI:
13009 len+=4;
13010 break;
13011 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
13012 case MODE_DI:
13013 len+=4;
13014 break;
13015 default:
13016 fatal_insn ("unknown insn mode", insn);
13020 return len;
13022 /* Compute default value for "length_address" attribute. */
13024 ix86_attr_length_address_default (rtx insn)
13026 int i;
13028 if (get_attr_type (insn) == TYPE_LEA)
13030 rtx set = PATTERN (insn);
13032 if (GET_CODE (set) == PARALLEL)
13033 set = XVECEXP (set, 0, 0);
13035 gcc_assert (GET_CODE (set) == SET);
13037 return memory_address_length (SET_SRC (set));
13040 extract_insn_cached (insn);
13041 for (i = recog_data.n_operands - 1; i >= 0; --i)
13042 if (GET_CODE (recog_data.operand[i]) == MEM)
13044 return memory_address_length (XEXP (recog_data.operand[i], 0));
13045 break;
13047 return 0;
13050 /* Return the maximum number of instructions a cpu can issue. */
13052 static int
13053 ix86_issue_rate (void)
13055 switch (ix86_tune)
13057 case PROCESSOR_PENTIUM:
13058 case PROCESSOR_K6:
13059 return 2;
13061 case PROCESSOR_PENTIUMPRO:
13062 case PROCESSOR_PENTIUM4:
13063 case PROCESSOR_ATHLON:
13064 case PROCESSOR_K8:
13065 case PROCESSOR_NOCONA:
13066 return 3;
13068 default:
13069 return 1;
13073 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13074 by DEP_INSN and nothing set by DEP_INSN. */
13076 static int
13077 ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
13079 rtx set, set2;
13081 /* Simplify the test for uninteresting insns. */
13082 if (insn_type != TYPE_SETCC
13083 && insn_type != TYPE_ICMOV
13084 && insn_type != TYPE_FCMOV
13085 && insn_type != TYPE_IBR)
13086 return 0;
13088 if ((set = single_set (dep_insn)) != 0)
13090 set = SET_DEST (set);
13091 set2 = NULL_RTX;
13093 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13094 && XVECLEN (PATTERN (dep_insn), 0) == 2
13095 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13096 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13098 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13099 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13101 else
13102 return 0;
13104 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13105 return 0;
13107 /* This test is true if the dependent insn reads the flags but
13108 not any other potentially set register. */
13109 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13110 return 0;
13112 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13113 return 0;
13115 return 1;
13118 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13119 address with operands set by DEP_INSN. */
13121 static int
13122 ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
13124 rtx addr;
13126 if (insn_type == TYPE_LEA
13127 && TARGET_PENTIUM)
13129 addr = PATTERN (insn);
13131 if (GET_CODE (addr) == PARALLEL)
13132 addr = XVECEXP (addr, 0, 0);
13134 gcc_assert (GET_CODE (addr) == SET);
13136 addr = SET_SRC (addr);
13138 else
13140 int i;
13141 extract_insn_cached (insn);
13142 for (i = recog_data.n_operands - 1; i >= 0; --i)
13143 if (GET_CODE (recog_data.operand[i]) == MEM)
13145 addr = XEXP (recog_data.operand[i], 0);
13146 goto found;
13148 return 0;
13149 found:;
13152 return modified_in_p (addr, dep_insn);
13155 static int
13156 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13158 enum attr_type insn_type, dep_insn_type;
13159 enum attr_memory memory;
13160 rtx set, set2;
13161 int dep_insn_code_number;
13163 /* Anti and output dependencies have zero cost on all CPUs. */
13164 if (REG_NOTE_KIND (link) != 0)
13165 return 0;
13167 dep_insn_code_number = recog_memoized (dep_insn);
13169 /* If we can't recognize the insns, we can't really do anything. */
13170 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13171 return cost;
13173 insn_type = get_attr_type (insn);
13174 dep_insn_type = get_attr_type (dep_insn);
13176 switch (ix86_tune)
13178 case PROCESSOR_PENTIUM:
13179 /* Address Generation Interlock adds a cycle of latency. */
13180 if (ix86_agi_dependant (insn, dep_insn, insn_type))
13181 cost += 1;
13183 /* ??? Compares pair with jump/setcc. */
13184 if (ix86_flags_dependant (insn, dep_insn, insn_type))
13185 cost = 0;
13187 /* Floating point stores require value to be ready one cycle earlier. */
13188 if (insn_type == TYPE_FMOV
13189 && get_attr_memory (insn) == MEMORY_STORE
13190 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13191 cost += 1;
13192 break;
13194 case PROCESSOR_PENTIUMPRO:
13195 memory = get_attr_memory (insn);
13197 /* INT->FP conversion is expensive. */
13198 if (get_attr_fp_int_src (dep_insn))
13199 cost += 5;
13201 /* There is one cycle extra latency between an FP op and a store. */
13202 if (insn_type == TYPE_FMOV
13203 && (set = single_set (dep_insn)) != NULL_RTX
13204 && (set2 = single_set (insn)) != NULL_RTX
13205 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13206 && GET_CODE (SET_DEST (set2)) == MEM)
13207 cost += 1;
13209 /* Show ability of reorder buffer to hide latency of load by executing
13210 in parallel with previous instruction in case
13211 previous instruction is not needed to compute the address. */
13212 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13213 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13215 /* Claim moves to take one cycle, as core can issue one load
13216 at time and the next load can start cycle later. */
13217 if (dep_insn_type == TYPE_IMOV
13218 || dep_insn_type == TYPE_FMOV)
13219 cost = 1;
13220 else if (cost > 1)
13221 cost--;
13223 break;
13225 case PROCESSOR_K6:
13226 memory = get_attr_memory (insn);
13228 /* The esp dependency is resolved before the instruction is really
13229 finished. */
13230 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13231 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13232 return 1;
13234 /* INT->FP conversion is expensive. */
13235 if (get_attr_fp_int_src (dep_insn))
13236 cost += 5;
13238 /* Show ability of reorder buffer to hide latency of load by executing
13239 in parallel with previous instruction in case
13240 previous instruction is not needed to compute the address. */
13241 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13242 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13244 /* Claim moves to take one cycle, as core can issue one load
13245 at time and the next load can start cycle later. */
13246 if (dep_insn_type == TYPE_IMOV
13247 || dep_insn_type == TYPE_FMOV)
13248 cost = 1;
13249 else if (cost > 2)
13250 cost -= 2;
13251 else
13252 cost = 1;
13254 break;
13256 case PROCESSOR_ATHLON:
13257 case PROCESSOR_K8:
13258 memory = get_attr_memory (insn);
13260 /* Show ability of reorder buffer to hide latency of load by executing
13261 in parallel with previous instruction in case
13262 previous instruction is not needed to compute the address. */
13263 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13264 && !ix86_agi_dependant (insn, dep_insn, insn_type))
13266 enum attr_unit unit = get_attr_unit (insn);
13267 int loadcost = 3;
13269 /* Because of the difference between the length of integer and
13270 floating unit pipeline preparation stages, the memory operands
13271 for floating point are cheaper.
13273 ??? For Athlon it the difference is most probably 2. */
13274 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13275 loadcost = 3;
13276 else
13277 loadcost = TARGET_ATHLON ? 2 : 0;
13279 if (cost >= loadcost)
13280 cost -= loadcost;
13281 else
13282 cost = 0;
13285 default:
13286 break;
13289 return cost;
13292 /* How many alternative schedules to try. This should be as wide as the
13293 scheduling freedom in the DFA, but no wider. Making this value too
13294 large results extra work for the scheduler. */
13296 static int
13297 ia32_multipass_dfa_lookahead (void)
13299 if (ix86_tune == PROCESSOR_PENTIUM)
13300 return 2;
13302 if (ix86_tune == PROCESSOR_PENTIUMPRO
13303 || ix86_tune == PROCESSOR_K6)
13304 return 1;
13306 else
13307 return 0;
13311 /* Compute the alignment given to a constant that is being placed in memory.
13312 EXP is the constant and ALIGN is the alignment that the object would
13313 ordinarily have.
13314 The value of this function is used instead of that alignment to align
13315 the object. */
13318 ix86_constant_alignment (tree exp, int align)
13320 if (TREE_CODE (exp) == REAL_CST)
13322 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13323 return 64;
13324 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13325 return 128;
13327 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13328 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13329 return BITS_PER_WORD;
13331 return align;
13334 /* Compute the alignment for a static variable.
13335 TYPE is the data type, and ALIGN is the alignment that
13336 the object would ordinarily have. The value of this function is used
13337 instead of that alignment to align the object. */
13340 ix86_data_alignment (tree type, int align)
13342 if (AGGREGATE_TYPE_P (type)
13343 && TYPE_SIZE (type)
13344 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13345 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
13346 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
13347 return 256;
13349 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13350 to 16byte boundary. */
13351 if (TARGET_64BIT)
13353 if (AGGREGATE_TYPE_P (type)
13354 && TYPE_SIZE (type)
13355 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13356 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13357 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13358 return 128;
13361 if (TREE_CODE (type) == ARRAY_TYPE)
13363 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13364 return 64;
13365 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13366 return 128;
13368 else if (TREE_CODE (type) == COMPLEX_TYPE)
13371 if (TYPE_MODE (type) == DCmode && align < 64)
13372 return 64;
13373 if (TYPE_MODE (type) == XCmode && align < 128)
13374 return 128;
13376 else if ((TREE_CODE (type) == RECORD_TYPE
13377 || TREE_CODE (type) == UNION_TYPE
13378 || TREE_CODE (type) == QUAL_UNION_TYPE)
13379 && TYPE_FIELDS (type))
13381 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13382 return 64;
13383 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13384 return 128;
13386 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13387 || TREE_CODE (type) == INTEGER_TYPE)
13389 if (TYPE_MODE (type) == DFmode && align < 64)
13390 return 64;
13391 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13392 return 128;
13395 return align;
13398 /* Compute the alignment for a local variable.
13399 TYPE is the data type, and ALIGN is the alignment that
13400 the object would ordinarily have. The value of this macro is used
13401 instead of that alignment to align the object. */
13404 ix86_local_alignment (tree type, int align)
13406 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13407 to 16byte boundary. */
13408 if (TARGET_64BIT)
13410 if (AGGREGATE_TYPE_P (type)
13411 && TYPE_SIZE (type)
13412 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13413 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
13414 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13415 return 128;
13417 if (TREE_CODE (type) == ARRAY_TYPE)
13419 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13420 return 64;
13421 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13422 return 128;
13424 else if (TREE_CODE (type) == COMPLEX_TYPE)
13426 if (TYPE_MODE (type) == DCmode && align < 64)
13427 return 64;
13428 if (TYPE_MODE (type) == XCmode && align < 128)
13429 return 128;
13431 else if ((TREE_CODE (type) == RECORD_TYPE
13432 || TREE_CODE (type) == UNION_TYPE
13433 || TREE_CODE (type) == QUAL_UNION_TYPE)
13434 && TYPE_FIELDS (type))
13436 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13437 return 64;
13438 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13439 return 128;
13441 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13442 || TREE_CODE (type) == INTEGER_TYPE)
13445 if (TYPE_MODE (type) == DFmode && align < 64)
13446 return 64;
13447 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13448 return 128;
13450 return align;
13453 /* Emit RTL insns to initialize the variable parts of a trampoline.
13454 FNADDR is an RTX for the address of the function's pure code.
13455 CXT is an RTX for the static chain value for the function. */
13456 void
13457 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
13459 if (!TARGET_64BIT)
13461 /* Compute offset from the end of the jmp to the target function. */
13462 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
13463 plus_constant (tramp, 10),
13464 NULL_RTX, 1, OPTAB_DIRECT);
13465 emit_move_insn (gen_rtx_MEM (QImode, tramp),
13466 gen_int_mode (0xb9, QImode));
13467 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
13468 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
13469 gen_int_mode (0xe9, QImode));
13470 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
13472 else
13474 int offset = 0;
13475 /* Try to load address using shorter movl instead of movabs.
13476 We may want to support movq for kernel mode, but kernel does not use
13477 trampolines at the moment. */
13478 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
13480 fnaddr = copy_to_mode_reg (DImode, fnaddr);
13481 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13482 gen_int_mode (0xbb41, HImode));
13483 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
13484 gen_lowpart (SImode, fnaddr));
13485 offset += 6;
13487 else
13489 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13490 gen_int_mode (0xbb49, HImode));
13491 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13492 fnaddr);
13493 offset += 10;
13495 /* Load static chain using movabs to r10. */
13496 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13497 gen_int_mode (0xba49, HImode));
13498 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13499 cxt);
13500 offset += 10;
13501 /* Jump to the r11 */
13502 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13503 gen_int_mode (0xff49, HImode));
13504 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
13505 gen_int_mode (0xe3, QImode));
13506 offset += 3;
13507 gcc_assert (offset <= TRAMPOLINE_SIZE);
13510 #ifdef ENABLE_EXECUTE_STACK
13511 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
13512 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
13513 #endif
13516 /* Codes for all the SSE/MMX builtins. */
13517 enum ix86_builtins
13519 IX86_BUILTIN_ADDPS,
13520 IX86_BUILTIN_ADDSS,
13521 IX86_BUILTIN_DIVPS,
13522 IX86_BUILTIN_DIVSS,
13523 IX86_BUILTIN_MULPS,
13524 IX86_BUILTIN_MULSS,
13525 IX86_BUILTIN_SUBPS,
13526 IX86_BUILTIN_SUBSS,
13528 IX86_BUILTIN_CMPEQPS,
13529 IX86_BUILTIN_CMPLTPS,
13530 IX86_BUILTIN_CMPLEPS,
13531 IX86_BUILTIN_CMPGTPS,
13532 IX86_BUILTIN_CMPGEPS,
13533 IX86_BUILTIN_CMPNEQPS,
13534 IX86_BUILTIN_CMPNLTPS,
13535 IX86_BUILTIN_CMPNLEPS,
13536 IX86_BUILTIN_CMPNGTPS,
13537 IX86_BUILTIN_CMPNGEPS,
13538 IX86_BUILTIN_CMPORDPS,
13539 IX86_BUILTIN_CMPUNORDPS,
13540 IX86_BUILTIN_CMPNEPS,
13541 IX86_BUILTIN_CMPEQSS,
13542 IX86_BUILTIN_CMPLTSS,
13543 IX86_BUILTIN_CMPLESS,
13544 IX86_BUILTIN_CMPNEQSS,
13545 IX86_BUILTIN_CMPNLTSS,
13546 IX86_BUILTIN_CMPNLESS,
13547 IX86_BUILTIN_CMPNGTSS,
13548 IX86_BUILTIN_CMPNGESS,
13549 IX86_BUILTIN_CMPORDSS,
13550 IX86_BUILTIN_CMPUNORDSS,
13551 IX86_BUILTIN_CMPNESS,
13553 IX86_BUILTIN_COMIEQSS,
13554 IX86_BUILTIN_COMILTSS,
13555 IX86_BUILTIN_COMILESS,
13556 IX86_BUILTIN_COMIGTSS,
13557 IX86_BUILTIN_COMIGESS,
13558 IX86_BUILTIN_COMINEQSS,
13559 IX86_BUILTIN_UCOMIEQSS,
13560 IX86_BUILTIN_UCOMILTSS,
13561 IX86_BUILTIN_UCOMILESS,
13562 IX86_BUILTIN_UCOMIGTSS,
13563 IX86_BUILTIN_UCOMIGESS,
13564 IX86_BUILTIN_UCOMINEQSS,
13566 IX86_BUILTIN_CVTPI2PS,
13567 IX86_BUILTIN_CVTPS2PI,
13568 IX86_BUILTIN_CVTSI2SS,
13569 IX86_BUILTIN_CVTSI642SS,
13570 IX86_BUILTIN_CVTSS2SI,
13571 IX86_BUILTIN_CVTSS2SI64,
13572 IX86_BUILTIN_CVTTPS2PI,
13573 IX86_BUILTIN_CVTTSS2SI,
13574 IX86_BUILTIN_CVTTSS2SI64,
13576 IX86_BUILTIN_MAXPS,
13577 IX86_BUILTIN_MAXSS,
13578 IX86_BUILTIN_MINPS,
13579 IX86_BUILTIN_MINSS,
13581 IX86_BUILTIN_LOADUPS,
13582 IX86_BUILTIN_STOREUPS,
13583 IX86_BUILTIN_MOVSS,
13585 IX86_BUILTIN_MOVHLPS,
13586 IX86_BUILTIN_MOVLHPS,
13587 IX86_BUILTIN_LOADHPS,
13588 IX86_BUILTIN_LOADLPS,
13589 IX86_BUILTIN_STOREHPS,
13590 IX86_BUILTIN_STORELPS,
13592 IX86_BUILTIN_MASKMOVQ,
13593 IX86_BUILTIN_MOVMSKPS,
13594 IX86_BUILTIN_PMOVMSKB,
13596 IX86_BUILTIN_MOVNTPS,
13597 IX86_BUILTIN_MOVNTQ,
13599 IX86_BUILTIN_LOADDQU,
13600 IX86_BUILTIN_STOREDQU,
13602 IX86_BUILTIN_PACKSSWB,
13603 IX86_BUILTIN_PACKSSDW,
13604 IX86_BUILTIN_PACKUSWB,
13606 IX86_BUILTIN_PADDB,
13607 IX86_BUILTIN_PADDW,
13608 IX86_BUILTIN_PADDD,
13609 IX86_BUILTIN_PADDQ,
13610 IX86_BUILTIN_PADDSB,
13611 IX86_BUILTIN_PADDSW,
13612 IX86_BUILTIN_PADDUSB,
13613 IX86_BUILTIN_PADDUSW,
13614 IX86_BUILTIN_PSUBB,
13615 IX86_BUILTIN_PSUBW,
13616 IX86_BUILTIN_PSUBD,
13617 IX86_BUILTIN_PSUBQ,
13618 IX86_BUILTIN_PSUBSB,
13619 IX86_BUILTIN_PSUBSW,
13620 IX86_BUILTIN_PSUBUSB,
13621 IX86_BUILTIN_PSUBUSW,
13623 IX86_BUILTIN_PAND,
13624 IX86_BUILTIN_PANDN,
13625 IX86_BUILTIN_POR,
13626 IX86_BUILTIN_PXOR,
13628 IX86_BUILTIN_PAVGB,
13629 IX86_BUILTIN_PAVGW,
13631 IX86_BUILTIN_PCMPEQB,
13632 IX86_BUILTIN_PCMPEQW,
13633 IX86_BUILTIN_PCMPEQD,
13634 IX86_BUILTIN_PCMPGTB,
13635 IX86_BUILTIN_PCMPGTW,
13636 IX86_BUILTIN_PCMPGTD,
13638 IX86_BUILTIN_PMADDWD,
13640 IX86_BUILTIN_PMAXSW,
13641 IX86_BUILTIN_PMAXUB,
13642 IX86_BUILTIN_PMINSW,
13643 IX86_BUILTIN_PMINUB,
13645 IX86_BUILTIN_PMULHUW,
13646 IX86_BUILTIN_PMULHW,
13647 IX86_BUILTIN_PMULLW,
13649 IX86_BUILTIN_PSADBW,
13650 IX86_BUILTIN_PSHUFW,
13652 IX86_BUILTIN_PSLLW,
13653 IX86_BUILTIN_PSLLD,
13654 IX86_BUILTIN_PSLLQ,
13655 IX86_BUILTIN_PSRAW,
13656 IX86_BUILTIN_PSRAD,
13657 IX86_BUILTIN_PSRLW,
13658 IX86_BUILTIN_PSRLD,
13659 IX86_BUILTIN_PSRLQ,
13660 IX86_BUILTIN_PSLLWI,
13661 IX86_BUILTIN_PSLLDI,
13662 IX86_BUILTIN_PSLLQI,
13663 IX86_BUILTIN_PSRAWI,
13664 IX86_BUILTIN_PSRADI,
13665 IX86_BUILTIN_PSRLWI,
13666 IX86_BUILTIN_PSRLDI,
13667 IX86_BUILTIN_PSRLQI,
13669 IX86_BUILTIN_PUNPCKHBW,
13670 IX86_BUILTIN_PUNPCKHWD,
13671 IX86_BUILTIN_PUNPCKHDQ,
13672 IX86_BUILTIN_PUNPCKLBW,
13673 IX86_BUILTIN_PUNPCKLWD,
13674 IX86_BUILTIN_PUNPCKLDQ,
13676 IX86_BUILTIN_SHUFPS,
13678 IX86_BUILTIN_RCPPS,
13679 IX86_BUILTIN_RCPSS,
13680 IX86_BUILTIN_RSQRTPS,
13681 IX86_BUILTIN_RSQRTSS,
13682 IX86_BUILTIN_SQRTPS,
13683 IX86_BUILTIN_SQRTSS,
13685 IX86_BUILTIN_UNPCKHPS,
13686 IX86_BUILTIN_UNPCKLPS,
13688 IX86_BUILTIN_ANDPS,
13689 IX86_BUILTIN_ANDNPS,
13690 IX86_BUILTIN_ORPS,
13691 IX86_BUILTIN_XORPS,
13693 IX86_BUILTIN_EMMS,
13694 IX86_BUILTIN_LDMXCSR,
13695 IX86_BUILTIN_STMXCSR,
13696 IX86_BUILTIN_SFENCE,
13698 /* 3DNow! Original */
13699 IX86_BUILTIN_FEMMS,
13700 IX86_BUILTIN_PAVGUSB,
13701 IX86_BUILTIN_PF2ID,
13702 IX86_BUILTIN_PFACC,
13703 IX86_BUILTIN_PFADD,
13704 IX86_BUILTIN_PFCMPEQ,
13705 IX86_BUILTIN_PFCMPGE,
13706 IX86_BUILTIN_PFCMPGT,
13707 IX86_BUILTIN_PFMAX,
13708 IX86_BUILTIN_PFMIN,
13709 IX86_BUILTIN_PFMUL,
13710 IX86_BUILTIN_PFRCP,
13711 IX86_BUILTIN_PFRCPIT1,
13712 IX86_BUILTIN_PFRCPIT2,
13713 IX86_BUILTIN_PFRSQIT1,
13714 IX86_BUILTIN_PFRSQRT,
13715 IX86_BUILTIN_PFSUB,
13716 IX86_BUILTIN_PFSUBR,
13717 IX86_BUILTIN_PI2FD,
13718 IX86_BUILTIN_PMULHRW,
13720 /* 3DNow! Athlon Extensions */
13721 IX86_BUILTIN_PF2IW,
13722 IX86_BUILTIN_PFNACC,
13723 IX86_BUILTIN_PFPNACC,
13724 IX86_BUILTIN_PI2FW,
13725 IX86_BUILTIN_PSWAPDSI,
13726 IX86_BUILTIN_PSWAPDSF,
13728 /* SSE2 */
13729 IX86_BUILTIN_ADDPD,
13730 IX86_BUILTIN_ADDSD,
13731 IX86_BUILTIN_DIVPD,
13732 IX86_BUILTIN_DIVSD,
13733 IX86_BUILTIN_MULPD,
13734 IX86_BUILTIN_MULSD,
13735 IX86_BUILTIN_SUBPD,
13736 IX86_BUILTIN_SUBSD,
13738 IX86_BUILTIN_CMPEQPD,
13739 IX86_BUILTIN_CMPLTPD,
13740 IX86_BUILTIN_CMPLEPD,
13741 IX86_BUILTIN_CMPGTPD,
13742 IX86_BUILTIN_CMPGEPD,
13743 IX86_BUILTIN_CMPNEQPD,
13744 IX86_BUILTIN_CMPNLTPD,
13745 IX86_BUILTIN_CMPNLEPD,
13746 IX86_BUILTIN_CMPNGTPD,
13747 IX86_BUILTIN_CMPNGEPD,
13748 IX86_BUILTIN_CMPORDPD,
13749 IX86_BUILTIN_CMPUNORDPD,
13750 IX86_BUILTIN_CMPNEPD,
13751 IX86_BUILTIN_CMPEQSD,
13752 IX86_BUILTIN_CMPLTSD,
13753 IX86_BUILTIN_CMPLESD,
13754 IX86_BUILTIN_CMPNEQSD,
13755 IX86_BUILTIN_CMPNLTSD,
13756 IX86_BUILTIN_CMPNLESD,
13757 IX86_BUILTIN_CMPORDSD,
13758 IX86_BUILTIN_CMPUNORDSD,
13759 IX86_BUILTIN_CMPNESD,
13761 IX86_BUILTIN_COMIEQSD,
13762 IX86_BUILTIN_COMILTSD,
13763 IX86_BUILTIN_COMILESD,
13764 IX86_BUILTIN_COMIGTSD,
13765 IX86_BUILTIN_COMIGESD,
13766 IX86_BUILTIN_COMINEQSD,
13767 IX86_BUILTIN_UCOMIEQSD,
13768 IX86_BUILTIN_UCOMILTSD,
13769 IX86_BUILTIN_UCOMILESD,
13770 IX86_BUILTIN_UCOMIGTSD,
13771 IX86_BUILTIN_UCOMIGESD,
13772 IX86_BUILTIN_UCOMINEQSD,
13774 IX86_BUILTIN_MAXPD,
13775 IX86_BUILTIN_MAXSD,
13776 IX86_BUILTIN_MINPD,
13777 IX86_BUILTIN_MINSD,
13779 IX86_BUILTIN_ANDPD,
13780 IX86_BUILTIN_ANDNPD,
13781 IX86_BUILTIN_ORPD,
13782 IX86_BUILTIN_XORPD,
13784 IX86_BUILTIN_SQRTPD,
13785 IX86_BUILTIN_SQRTSD,
13787 IX86_BUILTIN_UNPCKHPD,
13788 IX86_BUILTIN_UNPCKLPD,
13790 IX86_BUILTIN_SHUFPD,
13792 IX86_BUILTIN_LOADUPD,
13793 IX86_BUILTIN_STOREUPD,
13794 IX86_BUILTIN_MOVSD,
13796 IX86_BUILTIN_LOADHPD,
13797 IX86_BUILTIN_LOADLPD,
13799 IX86_BUILTIN_CVTDQ2PD,
13800 IX86_BUILTIN_CVTDQ2PS,
13802 IX86_BUILTIN_CVTPD2DQ,
13803 IX86_BUILTIN_CVTPD2PI,
13804 IX86_BUILTIN_CVTPD2PS,
13805 IX86_BUILTIN_CVTTPD2DQ,
13806 IX86_BUILTIN_CVTTPD2PI,
13808 IX86_BUILTIN_CVTPI2PD,
13809 IX86_BUILTIN_CVTSI2SD,
13810 IX86_BUILTIN_CVTSI642SD,
13812 IX86_BUILTIN_CVTSD2SI,
13813 IX86_BUILTIN_CVTSD2SI64,
13814 IX86_BUILTIN_CVTSD2SS,
13815 IX86_BUILTIN_CVTSS2SD,
13816 IX86_BUILTIN_CVTTSD2SI,
13817 IX86_BUILTIN_CVTTSD2SI64,
13819 IX86_BUILTIN_CVTPS2DQ,
13820 IX86_BUILTIN_CVTPS2PD,
13821 IX86_BUILTIN_CVTTPS2DQ,
13823 IX86_BUILTIN_MOVNTI,
13824 IX86_BUILTIN_MOVNTPD,
13825 IX86_BUILTIN_MOVNTDQ,
13827 /* SSE2 MMX */
13828 IX86_BUILTIN_MASKMOVDQU,
13829 IX86_BUILTIN_MOVMSKPD,
13830 IX86_BUILTIN_PMOVMSKB128,
13832 IX86_BUILTIN_PACKSSWB128,
13833 IX86_BUILTIN_PACKSSDW128,
13834 IX86_BUILTIN_PACKUSWB128,
13836 IX86_BUILTIN_PADDB128,
13837 IX86_BUILTIN_PADDW128,
13838 IX86_BUILTIN_PADDD128,
13839 IX86_BUILTIN_PADDQ128,
13840 IX86_BUILTIN_PADDSB128,
13841 IX86_BUILTIN_PADDSW128,
13842 IX86_BUILTIN_PADDUSB128,
13843 IX86_BUILTIN_PADDUSW128,
13844 IX86_BUILTIN_PSUBB128,
13845 IX86_BUILTIN_PSUBW128,
13846 IX86_BUILTIN_PSUBD128,
13847 IX86_BUILTIN_PSUBQ128,
13848 IX86_BUILTIN_PSUBSB128,
13849 IX86_BUILTIN_PSUBSW128,
13850 IX86_BUILTIN_PSUBUSB128,
13851 IX86_BUILTIN_PSUBUSW128,
13853 IX86_BUILTIN_PAND128,
13854 IX86_BUILTIN_PANDN128,
13855 IX86_BUILTIN_POR128,
13856 IX86_BUILTIN_PXOR128,
13858 IX86_BUILTIN_PAVGB128,
13859 IX86_BUILTIN_PAVGW128,
13861 IX86_BUILTIN_PCMPEQB128,
13862 IX86_BUILTIN_PCMPEQW128,
13863 IX86_BUILTIN_PCMPEQD128,
13864 IX86_BUILTIN_PCMPGTB128,
13865 IX86_BUILTIN_PCMPGTW128,
13866 IX86_BUILTIN_PCMPGTD128,
13868 IX86_BUILTIN_PMADDWD128,
13870 IX86_BUILTIN_PMAXSW128,
13871 IX86_BUILTIN_PMAXUB128,
13872 IX86_BUILTIN_PMINSW128,
13873 IX86_BUILTIN_PMINUB128,
13875 IX86_BUILTIN_PMULUDQ,
13876 IX86_BUILTIN_PMULUDQ128,
13877 IX86_BUILTIN_PMULHUW128,
13878 IX86_BUILTIN_PMULHW128,
13879 IX86_BUILTIN_PMULLW128,
13881 IX86_BUILTIN_PSADBW128,
13882 IX86_BUILTIN_PSHUFHW,
13883 IX86_BUILTIN_PSHUFLW,
13884 IX86_BUILTIN_PSHUFD,
13886 IX86_BUILTIN_PSLLW128,
13887 IX86_BUILTIN_PSLLD128,
13888 IX86_BUILTIN_PSLLQ128,
13889 IX86_BUILTIN_PSRAW128,
13890 IX86_BUILTIN_PSRAD128,
13891 IX86_BUILTIN_PSRLW128,
13892 IX86_BUILTIN_PSRLD128,
13893 IX86_BUILTIN_PSRLQ128,
13894 IX86_BUILTIN_PSLLDQI128,
13895 IX86_BUILTIN_PSLLWI128,
13896 IX86_BUILTIN_PSLLDI128,
13897 IX86_BUILTIN_PSLLQI128,
13898 IX86_BUILTIN_PSRAWI128,
13899 IX86_BUILTIN_PSRADI128,
13900 IX86_BUILTIN_PSRLDQI128,
13901 IX86_BUILTIN_PSRLWI128,
13902 IX86_BUILTIN_PSRLDI128,
13903 IX86_BUILTIN_PSRLQI128,
13905 IX86_BUILTIN_PUNPCKHBW128,
13906 IX86_BUILTIN_PUNPCKHWD128,
13907 IX86_BUILTIN_PUNPCKHDQ128,
13908 IX86_BUILTIN_PUNPCKHQDQ128,
13909 IX86_BUILTIN_PUNPCKLBW128,
13910 IX86_BUILTIN_PUNPCKLWD128,
13911 IX86_BUILTIN_PUNPCKLDQ128,
13912 IX86_BUILTIN_PUNPCKLQDQ128,
13914 IX86_BUILTIN_CLFLUSH,
13915 IX86_BUILTIN_MFENCE,
13916 IX86_BUILTIN_LFENCE,
13918 /* Prescott New Instructions. */
13919 IX86_BUILTIN_ADDSUBPS,
13920 IX86_BUILTIN_HADDPS,
13921 IX86_BUILTIN_HSUBPS,
13922 IX86_BUILTIN_MOVSHDUP,
13923 IX86_BUILTIN_MOVSLDUP,
13924 IX86_BUILTIN_ADDSUBPD,
13925 IX86_BUILTIN_HADDPD,
13926 IX86_BUILTIN_HSUBPD,
13927 IX86_BUILTIN_LDDQU,
13929 IX86_BUILTIN_MONITOR,
13930 IX86_BUILTIN_MWAIT,
13932 IX86_BUILTIN_VEC_INIT_V2SI,
13933 IX86_BUILTIN_VEC_INIT_V4HI,
13934 IX86_BUILTIN_VEC_INIT_V8QI,
13935 IX86_BUILTIN_VEC_EXT_V2DF,
13936 IX86_BUILTIN_VEC_EXT_V2DI,
13937 IX86_BUILTIN_VEC_EXT_V4SF,
13938 IX86_BUILTIN_VEC_EXT_V4SI,
13939 IX86_BUILTIN_VEC_EXT_V8HI,
13940 IX86_BUILTIN_VEC_EXT_V2SI,
13941 IX86_BUILTIN_VEC_EXT_V4HI,
13942 IX86_BUILTIN_VEC_SET_V8HI,
13943 IX86_BUILTIN_VEC_SET_V4HI,
13945 IX86_BUILTIN_MAX
13948 #define def_builtin(MASK, NAME, TYPE, CODE) \
13949 do { \
13950 if ((MASK) & target_flags \
13951 && (!((MASK) & MASK_64BIT) || TARGET_64BIT)) \
13952 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
13953 NULL, NULL_TREE); \
13954 } while (0)
13956 /* Bits for builtin_description.flag. */
13958 /* Set when we don't support the comparison natively, and should
13959 swap_comparison in order to support it. */
13960 #define BUILTIN_DESC_SWAP_OPERANDS 1
13962 struct builtin_description
13964 const unsigned int mask;
13965 const enum insn_code icode;
13966 const char *const name;
13967 const enum ix86_builtins code;
13968 const enum rtx_code comparison;
13969 const unsigned int flag;
13972 static const struct builtin_description bdesc_comi[] =
13974 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
13975 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
13976 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
13977 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
13978 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
13979 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
13980 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
13981 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
13982 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
13983 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
13984 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
13985 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
13986 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
13987 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
13988 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
13989 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
13990 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
13991 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
13992 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
13993 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
13994 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
13995 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
13996 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
13997 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
14000 static const struct builtin_description bdesc_2arg[] =
14002 /* SSE */
14003 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14004 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14005 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14006 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14007 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14008 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14009 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14010 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14012 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14013 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14014 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14015 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14016 BUILTIN_DESC_SWAP_OPERANDS },
14017 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14018 BUILTIN_DESC_SWAP_OPERANDS },
14019 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14020 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14021 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14022 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14023 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14024 BUILTIN_DESC_SWAP_OPERANDS },
14025 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14026 BUILTIN_DESC_SWAP_OPERANDS },
14027 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14028 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14029 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14030 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14031 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14032 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14033 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14034 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14035 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14036 BUILTIN_DESC_SWAP_OPERANDS },
14037 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14038 BUILTIN_DESC_SWAP_OPERANDS },
14039 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
14041 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14042 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14043 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14044 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14046 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14047 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14048 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14049 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14051 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14052 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14053 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14054 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14055 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14057 /* MMX */
14058 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14059 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14060 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14061 { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14062 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14063 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14064 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14065 { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14067 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14068 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14069 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14070 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14071 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14072 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14073 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14074 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14076 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14077 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14078 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14080 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14081 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14082 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14083 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14085 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14086 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14088 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14089 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14090 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14091 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14092 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14093 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14095 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14096 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14097 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14098 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14100 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14101 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14102 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14103 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14104 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14105 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14107 /* Special. */
14108 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14109 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14110 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14112 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14113 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14114 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14116 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14117 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14118 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14119 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14120 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14121 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14123 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14124 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14125 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14126 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14127 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14128 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14130 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14131 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14132 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14133 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14135 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14136 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14138 /* SSE2 */
14139 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14140 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14141 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14142 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14143 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14144 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14145 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14146 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14148 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14149 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14150 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14151 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14152 BUILTIN_DESC_SWAP_OPERANDS },
14153 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14154 BUILTIN_DESC_SWAP_OPERANDS },
14155 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14156 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14157 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14158 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14159 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14160 BUILTIN_DESC_SWAP_OPERANDS },
14161 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14162 BUILTIN_DESC_SWAP_OPERANDS },
14163 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14164 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14165 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14166 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14167 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14168 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14169 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14170 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14171 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14173 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14174 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14175 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14176 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14178 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14179 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14180 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14181 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14183 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14184 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14185 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14187 /* SSE2 MMX */
14188 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14189 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14190 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14191 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14192 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14193 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14194 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14195 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14197 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14198 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14199 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14200 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14201 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14202 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14203 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14204 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14206 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14207 { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14209 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14210 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14211 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14212 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14214 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14215 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14217 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14218 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14219 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14220 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14221 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14222 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14224 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14225 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14226 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14227 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14229 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14230 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14231 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14232 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14233 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14234 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14235 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14236 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14238 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14239 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14240 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14242 { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14243 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14245 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14246 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14248 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14249 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14250 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14252 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14253 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14254 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14256 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14257 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14259 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14261 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14262 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14263 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14264 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14266 /* SSE3 MMX */
14267 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14268 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14269 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14270 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14271 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14272 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14275 static const struct builtin_description bdesc_1arg[] =
14277 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14278 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14280 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14281 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14282 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14284 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14285 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14286 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14287 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14288 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14289 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14291 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14292 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14294 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14296 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14297 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14299 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14300 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14301 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14302 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14303 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14305 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14307 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14308 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14309 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14310 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14312 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14313 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14314 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14316 /* SSE3 */
14317 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14318 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14321 static void
14322 ix86_init_builtins (void)
14324 if (TARGET_MMX)
14325 ix86_init_mmx_sse_builtins ();
14328 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
14329 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
14330 builtins. */
14331 static void
14332 ix86_init_mmx_sse_builtins (void)
14334 const struct builtin_description * d;
14335 size_t i;
14337 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14338 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14339 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14340 tree V2DI_type_node
14341 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14342 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14343 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14344 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14345 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14346 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14347 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14349 tree pchar_type_node = build_pointer_type (char_type_node);
14350 tree pcchar_type_node = build_pointer_type (
14351 build_type_variant (char_type_node, 1, 0));
14352 tree pfloat_type_node = build_pointer_type (float_type_node);
14353 tree pcfloat_type_node = build_pointer_type (
14354 build_type_variant (float_type_node, 1, 0));
14355 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14356 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14357 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14359 /* Comparisons. */
14360 tree int_ftype_v4sf_v4sf
14361 = build_function_type_list (integer_type_node,
14362 V4SF_type_node, V4SF_type_node, NULL_TREE);
14363 tree v4si_ftype_v4sf_v4sf
14364 = build_function_type_list (V4SI_type_node,
14365 V4SF_type_node, V4SF_type_node, NULL_TREE);
14366 /* MMX/SSE/integer conversions. */
14367 tree int_ftype_v4sf
14368 = build_function_type_list (integer_type_node,
14369 V4SF_type_node, NULL_TREE);
14370 tree int64_ftype_v4sf
14371 = build_function_type_list (long_long_integer_type_node,
14372 V4SF_type_node, NULL_TREE);
14373 tree int_ftype_v8qi
14374 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
14375 tree v4sf_ftype_v4sf_int
14376 = build_function_type_list (V4SF_type_node,
14377 V4SF_type_node, integer_type_node, NULL_TREE);
14378 tree v4sf_ftype_v4sf_int64
14379 = build_function_type_list (V4SF_type_node,
14380 V4SF_type_node, long_long_integer_type_node,
14381 NULL_TREE);
14382 tree v4sf_ftype_v4sf_v2si
14383 = build_function_type_list (V4SF_type_node,
14384 V4SF_type_node, V2SI_type_node, NULL_TREE);
14386 /* Miscellaneous. */
14387 tree v8qi_ftype_v4hi_v4hi
14388 = build_function_type_list (V8QI_type_node,
14389 V4HI_type_node, V4HI_type_node, NULL_TREE);
14390 tree v4hi_ftype_v2si_v2si
14391 = build_function_type_list (V4HI_type_node,
14392 V2SI_type_node, V2SI_type_node, NULL_TREE);
14393 tree v4sf_ftype_v4sf_v4sf_int
14394 = build_function_type_list (V4SF_type_node,
14395 V4SF_type_node, V4SF_type_node,
14396 integer_type_node, NULL_TREE);
14397 tree v2si_ftype_v4hi_v4hi
14398 = build_function_type_list (V2SI_type_node,
14399 V4HI_type_node, V4HI_type_node, NULL_TREE);
14400 tree v4hi_ftype_v4hi_int
14401 = build_function_type_list (V4HI_type_node,
14402 V4HI_type_node, integer_type_node, NULL_TREE);
14403 tree v4hi_ftype_v4hi_di
14404 = build_function_type_list (V4HI_type_node,
14405 V4HI_type_node, long_long_unsigned_type_node,
14406 NULL_TREE);
14407 tree v2si_ftype_v2si_di
14408 = build_function_type_list (V2SI_type_node,
14409 V2SI_type_node, long_long_unsigned_type_node,
14410 NULL_TREE);
14411 tree void_ftype_void
14412 = build_function_type (void_type_node, void_list_node);
14413 tree void_ftype_unsigned
14414 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
14415 tree void_ftype_unsigned_unsigned
14416 = build_function_type_list (void_type_node, unsigned_type_node,
14417 unsigned_type_node, NULL_TREE);
14418 tree void_ftype_pcvoid_unsigned_unsigned
14419 = build_function_type_list (void_type_node, const_ptr_type_node,
14420 unsigned_type_node, unsigned_type_node,
14421 NULL_TREE);
14422 tree unsigned_ftype_void
14423 = build_function_type (unsigned_type_node, void_list_node);
14424 tree v2si_ftype_v4sf
14425 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
14426 /* Loads/stores. */
14427 tree void_ftype_v8qi_v8qi_pchar
14428 = build_function_type_list (void_type_node,
14429 V8QI_type_node, V8QI_type_node,
14430 pchar_type_node, NULL_TREE);
14431 tree v4sf_ftype_pcfloat
14432 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
14433 /* @@@ the type is bogus */
14434 tree v4sf_ftype_v4sf_pv2si
14435 = build_function_type_list (V4SF_type_node,
14436 V4SF_type_node, pv2si_type_node, NULL_TREE);
14437 tree void_ftype_pv2si_v4sf
14438 = build_function_type_list (void_type_node,
14439 pv2si_type_node, V4SF_type_node, NULL_TREE);
14440 tree void_ftype_pfloat_v4sf
14441 = build_function_type_list (void_type_node,
14442 pfloat_type_node, V4SF_type_node, NULL_TREE);
14443 tree void_ftype_pdi_di
14444 = build_function_type_list (void_type_node,
14445 pdi_type_node, long_long_unsigned_type_node,
14446 NULL_TREE);
14447 tree void_ftype_pv2di_v2di
14448 = build_function_type_list (void_type_node,
14449 pv2di_type_node, V2DI_type_node, NULL_TREE);
14450 /* Normal vector unops. */
14451 tree v4sf_ftype_v4sf
14452 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14454 /* Normal vector binops. */
14455 tree v4sf_ftype_v4sf_v4sf
14456 = build_function_type_list (V4SF_type_node,
14457 V4SF_type_node, V4SF_type_node, NULL_TREE);
14458 tree v8qi_ftype_v8qi_v8qi
14459 = build_function_type_list (V8QI_type_node,
14460 V8QI_type_node, V8QI_type_node, NULL_TREE);
14461 tree v4hi_ftype_v4hi_v4hi
14462 = build_function_type_list (V4HI_type_node,
14463 V4HI_type_node, V4HI_type_node, NULL_TREE);
14464 tree v2si_ftype_v2si_v2si
14465 = build_function_type_list (V2SI_type_node,
14466 V2SI_type_node, V2SI_type_node, NULL_TREE);
14467 tree di_ftype_di_di
14468 = build_function_type_list (long_long_unsigned_type_node,
14469 long_long_unsigned_type_node,
14470 long_long_unsigned_type_node, NULL_TREE);
14472 tree v2si_ftype_v2sf
14473 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
14474 tree v2sf_ftype_v2si
14475 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
14476 tree v2si_ftype_v2si
14477 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
14478 tree v2sf_ftype_v2sf
14479 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
14480 tree v2sf_ftype_v2sf_v2sf
14481 = build_function_type_list (V2SF_type_node,
14482 V2SF_type_node, V2SF_type_node, NULL_TREE);
14483 tree v2si_ftype_v2sf_v2sf
14484 = build_function_type_list (V2SI_type_node,
14485 V2SF_type_node, V2SF_type_node, NULL_TREE);
14486 tree pint_type_node = build_pointer_type (integer_type_node);
14487 tree pdouble_type_node = build_pointer_type (double_type_node);
14488 tree pcdouble_type_node = build_pointer_type (
14489 build_type_variant (double_type_node, 1, 0));
14490 tree int_ftype_v2df_v2df
14491 = build_function_type_list (integer_type_node,
14492 V2DF_type_node, V2DF_type_node, NULL_TREE);
14494 tree ti_ftype_ti_ti
14495 = build_function_type_list (intTI_type_node,
14496 intTI_type_node, intTI_type_node, NULL_TREE);
14497 tree void_ftype_pcvoid
14498 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
14499 tree v4sf_ftype_v4si
14500 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
14501 tree v4si_ftype_v4sf
14502 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
14503 tree v2df_ftype_v4si
14504 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
14505 tree v4si_ftype_v2df
14506 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
14507 tree v2si_ftype_v2df
14508 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
14509 tree v4sf_ftype_v2df
14510 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
14511 tree v2df_ftype_v2si
14512 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
14513 tree v2df_ftype_v4sf
14514 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
14515 tree int_ftype_v2df
14516 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
14517 tree int64_ftype_v2df
14518 = build_function_type_list (long_long_integer_type_node,
14519 V2DF_type_node, NULL_TREE);
14520 tree v2df_ftype_v2df_int
14521 = build_function_type_list (V2DF_type_node,
14522 V2DF_type_node, integer_type_node, NULL_TREE);
14523 tree v2df_ftype_v2df_int64
14524 = build_function_type_list (V2DF_type_node,
14525 V2DF_type_node, long_long_integer_type_node,
14526 NULL_TREE);
14527 tree v4sf_ftype_v4sf_v2df
14528 = build_function_type_list (V4SF_type_node,
14529 V4SF_type_node, V2DF_type_node, NULL_TREE);
14530 tree v2df_ftype_v2df_v4sf
14531 = build_function_type_list (V2DF_type_node,
14532 V2DF_type_node, V4SF_type_node, NULL_TREE);
14533 tree v2df_ftype_v2df_v2df_int
14534 = build_function_type_list (V2DF_type_node,
14535 V2DF_type_node, V2DF_type_node,
14536 integer_type_node,
14537 NULL_TREE);
14538 tree v2df_ftype_v2df_pcdouble
14539 = build_function_type_list (V2DF_type_node,
14540 V2DF_type_node, pcdouble_type_node, NULL_TREE);
14541 tree void_ftype_pdouble_v2df
14542 = build_function_type_list (void_type_node,
14543 pdouble_type_node, V2DF_type_node, NULL_TREE);
14544 tree void_ftype_pint_int
14545 = build_function_type_list (void_type_node,
14546 pint_type_node, integer_type_node, NULL_TREE);
14547 tree void_ftype_v16qi_v16qi_pchar
14548 = build_function_type_list (void_type_node,
14549 V16QI_type_node, V16QI_type_node,
14550 pchar_type_node, NULL_TREE);
14551 tree v2df_ftype_pcdouble
14552 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
14553 tree v2df_ftype_v2df_v2df
14554 = build_function_type_list (V2DF_type_node,
14555 V2DF_type_node, V2DF_type_node, NULL_TREE);
14556 tree v16qi_ftype_v16qi_v16qi
14557 = build_function_type_list (V16QI_type_node,
14558 V16QI_type_node, V16QI_type_node, NULL_TREE);
14559 tree v8hi_ftype_v8hi_v8hi
14560 = build_function_type_list (V8HI_type_node,
14561 V8HI_type_node, V8HI_type_node, NULL_TREE);
14562 tree v4si_ftype_v4si_v4si
14563 = build_function_type_list (V4SI_type_node,
14564 V4SI_type_node, V4SI_type_node, NULL_TREE);
14565 tree v2di_ftype_v2di_v2di
14566 = build_function_type_list (V2DI_type_node,
14567 V2DI_type_node, V2DI_type_node, NULL_TREE);
14568 tree v2di_ftype_v2df_v2df
14569 = build_function_type_list (V2DI_type_node,
14570 V2DF_type_node, V2DF_type_node, NULL_TREE);
14571 tree v2df_ftype_v2df
14572 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14573 tree v2di_ftype_v2di_int
14574 = build_function_type_list (V2DI_type_node,
14575 V2DI_type_node, integer_type_node, NULL_TREE);
14576 tree v4si_ftype_v4si_int
14577 = build_function_type_list (V4SI_type_node,
14578 V4SI_type_node, integer_type_node, NULL_TREE);
14579 tree v8hi_ftype_v8hi_int
14580 = build_function_type_list (V8HI_type_node,
14581 V8HI_type_node, integer_type_node, NULL_TREE);
14582 tree v4si_ftype_v8hi_v8hi
14583 = build_function_type_list (V4SI_type_node,
14584 V8HI_type_node, V8HI_type_node, NULL_TREE);
14585 tree di_ftype_v8qi_v8qi
14586 = build_function_type_list (long_long_unsigned_type_node,
14587 V8QI_type_node, V8QI_type_node, NULL_TREE);
14588 tree di_ftype_v2si_v2si
14589 = build_function_type_list (long_long_unsigned_type_node,
14590 V2SI_type_node, V2SI_type_node, NULL_TREE);
14591 tree v2di_ftype_v16qi_v16qi
14592 = build_function_type_list (V2DI_type_node,
14593 V16QI_type_node, V16QI_type_node, NULL_TREE);
14594 tree v2di_ftype_v4si_v4si
14595 = build_function_type_list (V2DI_type_node,
14596 V4SI_type_node, V4SI_type_node, NULL_TREE);
14597 tree int_ftype_v16qi
14598 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
14599 tree v16qi_ftype_pcchar
14600 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
14601 tree void_ftype_pchar_v16qi
14602 = build_function_type_list (void_type_node,
14603 pchar_type_node, V16QI_type_node, NULL_TREE);
14605 tree float80_type;
14606 tree float128_type;
14607 tree ftype;
14609 /* The __float80 type. */
14610 if (TYPE_MODE (long_double_type_node) == XFmode)
14611 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
14612 "__float80");
14613 else
14615 /* The __float80 type. */
14616 float80_type = make_node (REAL_TYPE);
14617 TYPE_PRECISION (float80_type) = 80;
14618 layout_type (float80_type);
14619 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
14622 if (TARGET_64BIT)
14624 float128_type = make_node (REAL_TYPE);
14625 TYPE_PRECISION (float128_type) = 128;
14626 layout_type (float128_type);
14627 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
14630 /* Add all builtins that are more or less simple operations on two
14631 operands. */
14632 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14634 /* Use one of the operands; the target can have a different mode for
14635 mask-generating compares. */
14636 enum machine_mode mode;
14637 tree type;
14639 if (d->name == 0)
14640 continue;
14641 mode = insn_data[d->icode].operand[1].mode;
14643 switch (mode)
14645 case V16QImode:
14646 type = v16qi_ftype_v16qi_v16qi;
14647 break;
14648 case V8HImode:
14649 type = v8hi_ftype_v8hi_v8hi;
14650 break;
14651 case V4SImode:
14652 type = v4si_ftype_v4si_v4si;
14653 break;
14654 case V2DImode:
14655 type = v2di_ftype_v2di_v2di;
14656 break;
14657 case V2DFmode:
14658 type = v2df_ftype_v2df_v2df;
14659 break;
14660 case TImode:
14661 type = ti_ftype_ti_ti;
14662 break;
14663 case V4SFmode:
14664 type = v4sf_ftype_v4sf_v4sf;
14665 break;
14666 case V8QImode:
14667 type = v8qi_ftype_v8qi_v8qi;
14668 break;
14669 case V4HImode:
14670 type = v4hi_ftype_v4hi_v4hi;
14671 break;
14672 case V2SImode:
14673 type = v2si_ftype_v2si_v2si;
14674 break;
14675 case DImode:
14676 type = di_ftype_di_di;
14677 break;
14679 default:
14680 gcc_unreachable ();
14683 /* Override for comparisons. */
14684 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
14685 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
14686 type = v4si_ftype_v4sf_v4sf;
14688 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
14689 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
14690 type = v2di_ftype_v2df_v2df;
14692 def_builtin (d->mask, d->name, type, d->code);
14695 /* Add the remaining MMX insns with somewhat more complicated types. */
14696 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
14697 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
14698 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
14699 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
14701 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
14702 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
14703 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
14705 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
14706 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
14708 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
14709 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
14711 /* comi/ucomi insns. */
14712 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14713 if (d->mask == MASK_SSE2)
14714 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
14715 else
14716 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
14718 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
14719 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
14720 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
14722 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
14723 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
14724 def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
14725 def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
14726 def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
14727 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
14728 def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
14729 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
14730 def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
14731 def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
14732 def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
14734 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
14736 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
14737 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
14739 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
14740 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
14741 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
14742 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
14744 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
14745 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
14746 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
14747 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
14749 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
14751 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
14753 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
14754 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
14755 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
14756 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
14757 def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
14758 def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
14760 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
14762 /* Original 3DNow! */
14763 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
14764 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
14765 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
14766 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
14767 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
14768 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
14769 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
14770 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
14771 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
14772 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
14773 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
14774 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
14775 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
14776 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
14777 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
14778 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
14779 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
14780 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
14781 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
14782 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
14784 /* 3DNow! extension as used in the Athlon CPU. */
14785 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
14786 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
14787 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
14788 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
14789 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
14790 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
14792 /* SSE2 */
14793 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
14795 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
14796 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
14798 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
14799 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
14801 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
14802 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
14803 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
14804 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
14805 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
14807 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
14808 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
14809 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
14810 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
14812 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
14813 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
14815 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
14817 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
14818 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
14820 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
14821 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
14822 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
14823 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
14824 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
14826 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
14828 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
14829 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
14830 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
14831 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
14833 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
14834 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
14835 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
14837 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
14838 def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
14839 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
14840 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
14842 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
14843 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
14844 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
14846 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
14847 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
14849 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
14850 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
14852 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
14853 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
14854 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
14856 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
14857 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
14858 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
14860 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
14861 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
14863 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
14864 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
14865 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
14866 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
14868 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
14869 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
14870 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
14871 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
14873 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
14874 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
14876 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
14878 /* Prescott New Instructions. */
14879 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
14880 void_ftype_pcvoid_unsigned_unsigned,
14881 IX86_BUILTIN_MONITOR);
14882 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
14883 void_ftype_unsigned_unsigned,
14884 IX86_BUILTIN_MWAIT);
14885 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
14886 v4sf_ftype_v4sf,
14887 IX86_BUILTIN_MOVSHDUP);
14888 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
14889 v4sf_ftype_v4sf,
14890 IX86_BUILTIN_MOVSLDUP);
14891 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
14892 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
14894 /* Access to the vec_init patterns. */
14895 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
14896 integer_type_node, NULL_TREE);
14897 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
14898 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
14900 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
14901 short_integer_type_node,
14902 short_integer_type_node,
14903 short_integer_type_node, NULL_TREE);
14904 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
14905 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
14907 ftype = build_function_type_list (V8QI_type_node, char_type_node,
14908 char_type_node, char_type_node,
14909 char_type_node, char_type_node,
14910 char_type_node, char_type_node,
14911 char_type_node, NULL_TREE);
14912 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
14913 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
14915 /* Access to the vec_extract patterns. */
14916 ftype = build_function_type_list (double_type_node, V2DF_type_node,
14917 integer_type_node, NULL_TREE);
14918 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
14919 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
14921 ftype = build_function_type_list (long_long_integer_type_node,
14922 V2DI_type_node, integer_type_node,
14923 NULL_TREE);
14924 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
14925 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
14927 ftype = build_function_type_list (float_type_node, V4SF_type_node,
14928 integer_type_node, NULL_TREE);
14929 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
14930 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
14932 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
14933 integer_type_node, NULL_TREE);
14934 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
14935 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
14937 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
14938 integer_type_node, NULL_TREE);
14939 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
14940 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
14942 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
14943 integer_type_node, NULL_TREE);
14944 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
14945 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
14947 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
14948 integer_type_node, NULL_TREE);
14949 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
14950 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
14952 /* Access to the vec_set patterns. */
14953 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
14954 intHI_type_node,
14955 integer_type_node, NULL_TREE);
14956 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
14957 ftype, IX86_BUILTIN_VEC_SET_V8HI);
14959 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
14960 intHI_type_node,
14961 integer_type_node, NULL_TREE);
14962 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
14963 ftype, IX86_BUILTIN_VEC_SET_V4HI);
14966 /* Errors in the source file can cause expand_expr to return const0_rtx
14967 where we expect a vector. To avoid crashing, use one of the vector
14968 clear instructions. */
14969 static rtx
14970 safe_vector_operand (rtx x, enum machine_mode mode)
14972 if (x == const0_rtx)
14973 x = CONST0_RTX (mode);
14974 return x;
14977 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
14979 static rtx
14980 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
14982 rtx pat, xops[3];
14983 tree arg0 = TREE_VALUE (arglist);
14984 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14985 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14986 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14987 enum machine_mode tmode = insn_data[icode].operand[0].mode;
14988 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14989 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
14991 if (VECTOR_MODE_P (mode0))
14992 op0 = safe_vector_operand (op0, mode0);
14993 if (VECTOR_MODE_P (mode1))
14994 op1 = safe_vector_operand (op1, mode1);
14996 if (optimize || !target
14997 || GET_MODE (target) != tmode
14998 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14999 target = gen_reg_rtx (tmode);
15001 if (GET_MODE (op1) == SImode && mode1 == TImode)
15003 rtx x = gen_reg_rtx (V4SImode);
15004 emit_insn (gen_sse2_loadd (x, op1));
15005 op1 = gen_lowpart (TImode, x);
15008 /* The insn must want input operands in the same modes as the
15009 result. */
15010 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15011 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15013 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15014 op0 = copy_to_mode_reg (mode0, op0);
15015 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15016 op1 = copy_to_mode_reg (mode1, op1);
15018 /* ??? Using ix86_fixup_binary_operands is problematic when
15019 we've got mismatched modes. Fake it. */
15021 xops[0] = target;
15022 xops[1] = op0;
15023 xops[2] = op1;
15025 if (tmode == mode0 && tmode == mode1)
15027 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15028 op0 = xops[1];
15029 op1 = xops[2];
15031 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15033 op0 = force_reg (mode0, op0);
15034 op1 = force_reg (mode1, op1);
15035 target = gen_reg_rtx (tmode);
15038 pat = GEN_FCN (icode) (target, op0, op1);
15039 if (! pat)
15040 return 0;
15041 emit_insn (pat);
15042 return target;
15045 /* Subroutine of ix86_expand_builtin to take care of stores. */
15047 static rtx
15048 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15050 rtx pat;
15051 tree arg0 = TREE_VALUE (arglist);
15052 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15053 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15054 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15055 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15056 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15058 if (VECTOR_MODE_P (mode1))
15059 op1 = safe_vector_operand (op1, mode1);
15061 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15062 op1 = copy_to_mode_reg (mode1, op1);
15064 pat = GEN_FCN (icode) (op0, op1);
15065 if (pat)
15066 emit_insn (pat);
15067 return 0;
15070 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
15072 static rtx
15073 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15074 rtx target, int do_load)
15076 rtx pat;
15077 tree arg0 = TREE_VALUE (arglist);
15078 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15079 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15080 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15082 if (optimize || !target
15083 || GET_MODE (target) != tmode
15084 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15085 target = gen_reg_rtx (tmode);
15086 if (do_load)
15087 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15088 else
15090 if (VECTOR_MODE_P (mode0))
15091 op0 = safe_vector_operand (op0, mode0);
15093 if ((optimize && !register_operand (op0, mode0))
15094 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15095 op0 = copy_to_mode_reg (mode0, op0);
15098 pat = GEN_FCN (icode) (target, op0);
15099 if (! pat)
15100 return 0;
15101 emit_insn (pat);
15102 return target;
15105 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15106 sqrtss, rsqrtss, rcpss. */
15108 static rtx
15109 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15111 rtx pat;
15112 tree arg0 = TREE_VALUE (arglist);
15113 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15114 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15115 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15117 if (optimize || !target
15118 || GET_MODE (target) != tmode
15119 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15120 target = gen_reg_rtx (tmode);
15122 if (VECTOR_MODE_P (mode0))
15123 op0 = safe_vector_operand (op0, mode0);
15125 if ((optimize && !register_operand (op0, mode0))
15126 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15127 op0 = copy_to_mode_reg (mode0, op0);
15129 op1 = op0;
15130 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15131 op1 = copy_to_mode_reg (mode0, op1);
15133 pat = GEN_FCN (icode) (target, op0, op1);
15134 if (! pat)
15135 return 0;
15136 emit_insn (pat);
15137 return target;
15140 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
15142 static rtx
15143 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15144 rtx target)
15146 rtx pat;
15147 tree arg0 = TREE_VALUE (arglist);
15148 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15149 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15150 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15151 rtx op2;
15152 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15153 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15154 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15155 enum rtx_code comparison = d->comparison;
15157 if (VECTOR_MODE_P (mode0))
15158 op0 = safe_vector_operand (op0, mode0);
15159 if (VECTOR_MODE_P (mode1))
15160 op1 = safe_vector_operand (op1, mode1);
15162 /* Swap operands if we have a comparison that isn't available in
15163 hardware. */
15164 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15166 rtx tmp = gen_reg_rtx (mode1);
15167 emit_move_insn (tmp, op1);
15168 op1 = op0;
15169 op0 = tmp;
15172 if (optimize || !target
15173 || GET_MODE (target) != tmode
15174 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15175 target = gen_reg_rtx (tmode);
15177 if ((optimize && !register_operand (op0, mode0))
15178 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15179 op0 = copy_to_mode_reg (mode0, op0);
15180 if ((optimize && !register_operand (op1, mode1))
15181 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15182 op1 = copy_to_mode_reg (mode1, op1);
15184 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15185 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15186 if (! pat)
15187 return 0;
15188 emit_insn (pat);
15189 return target;
15192 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
15194 static rtx
15195 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15196 rtx target)
15198 rtx pat;
15199 tree arg0 = TREE_VALUE (arglist);
15200 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15201 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15202 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15203 rtx op2;
15204 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15205 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15206 enum rtx_code comparison = d->comparison;
15208 if (VECTOR_MODE_P (mode0))
15209 op0 = safe_vector_operand (op0, mode0);
15210 if (VECTOR_MODE_P (mode1))
15211 op1 = safe_vector_operand (op1, mode1);
15213 /* Swap operands if we have a comparison that isn't available in
15214 hardware. */
15215 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15217 rtx tmp = op1;
15218 op1 = op0;
15219 op0 = tmp;
15222 target = gen_reg_rtx (SImode);
15223 emit_move_insn (target, const0_rtx);
15224 target = gen_rtx_SUBREG (QImode, target, 0);
15226 if ((optimize && !register_operand (op0, mode0))
15227 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15228 op0 = copy_to_mode_reg (mode0, op0);
15229 if ((optimize && !register_operand (op1, mode1))
15230 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15231 op1 = copy_to_mode_reg (mode1, op1);
15233 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15234 pat = GEN_FCN (d->icode) (op0, op1);
15235 if (! pat)
15236 return 0;
15237 emit_insn (pat);
15238 emit_insn (gen_rtx_SET (VOIDmode,
15239 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15240 gen_rtx_fmt_ee (comparison, QImode,
15241 SET_DEST (pat),
15242 const0_rtx)));
15244 return SUBREG_REG (target);
15247 /* Return the integer constant in ARG. Constrain it to be in the range
15248 of the subparts of VEC_TYPE; issue an error if not. */
15250 static int
15251 get_element_number (tree vec_type, tree arg)
15253 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15255 if (!host_integerp (arg, 1)
15256 || (elt = tree_low_cst (arg, 1), elt > max))
15258 error ("selector must be an integer constant in the range 0..%wi", max);
15259 return 0;
15262 return elt;
15265 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15266 ix86_expand_vector_init. We DO have language-level syntax for this, in
15267 the form of (type){ init-list }. Except that since we can't place emms
15268 instructions from inside the compiler, we can't allow the use of MMX
15269 registers unless the user explicitly asks for it. So we do *not* define
15270 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
15271 we have builtins invoked by mmintrin.h that gives us license to emit
15272 these sorts of instructions. */
15274 static rtx
15275 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15277 enum machine_mode tmode = TYPE_MODE (type);
15278 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15279 int i, n_elt = GET_MODE_NUNITS (tmode);
15280 rtvec v = rtvec_alloc (n_elt);
15282 gcc_assert (VECTOR_MODE_P (tmode));
15284 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15286 rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
15287 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15290 gcc_assert (arglist == NULL);
15292 if (!target || !register_operand (target, tmode))
15293 target = gen_reg_rtx (tmode);
15295 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15296 return target;
15299 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15300 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
15301 had a language-level syntax for referencing vector elements. */
15303 static rtx
15304 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15306 enum machine_mode tmode, mode0;
15307 tree arg0, arg1;
15308 int elt;
15309 rtx op0;
15311 arg0 = TREE_VALUE (arglist);
15312 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15314 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15315 elt = get_element_number (TREE_TYPE (arg0), arg1);
15317 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15318 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15319 gcc_assert (VECTOR_MODE_P (mode0));
15321 op0 = force_reg (mode0, op0);
15323 if (optimize || !target || !register_operand (target, tmode))
15324 target = gen_reg_rtx (tmode);
15326 ix86_expand_vector_extract (true, target, op0, elt);
15328 return target;
15331 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
15332 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
15333 a language-level syntax for referencing vector elements. */
15335 static rtx
15336 ix86_expand_vec_set_builtin (tree arglist)
15338 enum machine_mode tmode, mode1;
15339 tree arg0, arg1, arg2;
15340 int elt;
15341 rtx op0, op1, target;
15343 arg0 = TREE_VALUE (arglist);
15344 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15345 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15347 tmode = TYPE_MODE (TREE_TYPE (arg0));
15348 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15349 gcc_assert (VECTOR_MODE_P (tmode));
15351 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15352 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15353 elt = get_element_number (TREE_TYPE (arg0), arg2);
15355 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15356 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15358 op0 = force_reg (tmode, op0);
15359 op1 = force_reg (mode1, op1);
15361 /* OP0 is the source of these builtin functions and shouldn't be
15362 modified. Create a copy, use it and return it as target. */
15363 target = gen_reg_rtx (tmode);
15364 emit_move_insn (target, op0);
15365 ix86_expand_vector_set (true, target, op1, elt);
15367 return target;
15370 /* Expand an expression EXP that calls a built-in function,
15371 with result going to TARGET if that's convenient
15372 (and in mode MODE if that's convenient).
15373 SUBTARGET may be used as the target for computing one of EXP's operands.
15374 IGNORE is nonzero if the value is to be ignored. */
15376 static rtx
15377 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15378 enum machine_mode mode ATTRIBUTE_UNUSED,
15379 int ignore ATTRIBUTE_UNUSED)
15381 const struct builtin_description *d;
15382 size_t i;
15383 enum insn_code icode;
15384 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
15385 tree arglist = TREE_OPERAND (exp, 1);
15386 tree arg0, arg1, arg2;
15387 rtx op0, op1, op2, pat;
15388 enum machine_mode tmode, mode0, mode1, mode2;
15389 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15391 switch (fcode)
15393 case IX86_BUILTIN_EMMS:
15394 emit_insn (gen_mmx_emms ());
15395 return 0;
15397 case IX86_BUILTIN_SFENCE:
15398 emit_insn (gen_sse_sfence ());
15399 return 0;
15401 case IX86_BUILTIN_MASKMOVQ:
15402 case IX86_BUILTIN_MASKMOVDQU:
15403 icode = (fcode == IX86_BUILTIN_MASKMOVQ
15404 ? CODE_FOR_mmx_maskmovq
15405 : CODE_FOR_sse2_maskmovdqu);
15406 /* Note the arg order is different from the operand order. */
15407 arg1 = TREE_VALUE (arglist);
15408 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
15409 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15410 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15411 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15412 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15413 mode0 = insn_data[icode].operand[0].mode;
15414 mode1 = insn_data[icode].operand[1].mode;
15415 mode2 = insn_data[icode].operand[2].mode;
15417 op0 = force_reg (Pmode, op0);
15418 op0 = gen_rtx_MEM (mode1, op0);
15420 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15421 op0 = copy_to_mode_reg (mode0, op0);
15422 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15423 op1 = copy_to_mode_reg (mode1, op1);
15424 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
15425 op2 = copy_to_mode_reg (mode2, op2);
15426 pat = GEN_FCN (icode) (op0, op1, op2);
15427 if (! pat)
15428 return 0;
15429 emit_insn (pat);
15430 return 0;
15432 case IX86_BUILTIN_SQRTSS:
15433 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
15434 case IX86_BUILTIN_RSQRTSS:
15435 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
15436 case IX86_BUILTIN_RCPSS:
15437 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
15439 case IX86_BUILTIN_LOADUPS:
15440 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
15442 case IX86_BUILTIN_STOREUPS:
15443 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
15445 case IX86_BUILTIN_LOADHPS:
15446 case IX86_BUILTIN_LOADLPS:
15447 case IX86_BUILTIN_LOADHPD:
15448 case IX86_BUILTIN_LOADLPD:
15449 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
15450 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
15451 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
15452 : CODE_FOR_sse2_loadlpd);
15453 arg0 = TREE_VALUE (arglist);
15454 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15455 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15456 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15457 tmode = insn_data[icode].operand[0].mode;
15458 mode0 = insn_data[icode].operand[1].mode;
15459 mode1 = insn_data[icode].operand[2].mode;
15461 op0 = force_reg (mode0, op0);
15462 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
15463 if (optimize || target == 0
15464 || GET_MODE (target) != tmode
15465 || !register_operand (target, tmode))
15466 target = gen_reg_rtx (tmode);
15467 pat = GEN_FCN (icode) (target, op0, op1);
15468 if (! pat)
15469 return 0;
15470 emit_insn (pat);
15471 return target;
15473 case IX86_BUILTIN_STOREHPS:
15474 case IX86_BUILTIN_STORELPS:
15475 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
15476 : CODE_FOR_sse_storelps);
15477 arg0 = TREE_VALUE (arglist);
15478 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15479 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15480 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15481 mode0 = insn_data[icode].operand[0].mode;
15482 mode1 = insn_data[icode].operand[1].mode;
15484 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15485 op1 = force_reg (mode1, op1);
15487 pat = GEN_FCN (icode) (op0, op1);
15488 if (! pat)
15489 return 0;
15490 emit_insn (pat);
15491 return const0_rtx;
15493 case IX86_BUILTIN_MOVNTPS:
15494 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
15495 case IX86_BUILTIN_MOVNTQ:
15496 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
15498 case IX86_BUILTIN_LDMXCSR:
15499 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
15500 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
15501 emit_move_insn (target, op0);
15502 emit_insn (gen_sse_ldmxcsr (target));
15503 return 0;
15505 case IX86_BUILTIN_STMXCSR:
15506 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
15507 emit_insn (gen_sse_stmxcsr (target));
15508 return copy_to_mode_reg (SImode, target);
15510 case IX86_BUILTIN_SHUFPS:
15511 case IX86_BUILTIN_SHUFPD:
15512 icode = (fcode == IX86_BUILTIN_SHUFPS
15513 ? CODE_FOR_sse_shufps
15514 : CODE_FOR_sse2_shufpd);
15515 arg0 = TREE_VALUE (arglist);
15516 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15517 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15518 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15519 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15520 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15521 tmode = insn_data[icode].operand[0].mode;
15522 mode0 = insn_data[icode].operand[1].mode;
15523 mode1 = insn_data[icode].operand[2].mode;
15524 mode2 = insn_data[icode].operand[3].mode;
15526 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15527 op0 = copy_to_mode_reg (mode0, op0);
15528 if ((optimize && !register_operand (op1, mode1))
15529 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
15530 op1 = copy_to_mode_reg (mode1, op1);
15531 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15533 /* @@@ better error message */
15534 error ("mask must be an immediate");
15535 return gen_reg_rtx (tmode);
15537 if (optimize || target == 0
15538 || GET_MODE (target) != tmode
15539 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15540 target = gen_reg_rtx (tmode);
15541 pat = GEN_FCN (icode) (target, op0, op1, op2);
15542 if (! pat)
15543 return 0;
15544 emit_insn (pat);
15545 return target;
15547 case IX86_BUILTIN_PSHUFW:
15548 case IX86_BUILTIN_PSHUFD:
15549 case IX86_BUILTIN_PSHUFHW:
15550 case IX86_BUILTIN_PSHUFLW:
15551 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
15552 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
15553 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
15554 : CODE_FOR_mmx_pshufw);
15555 arg0 = TREE_VALUE (arglist);
15556 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15557 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15558 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15559 tmode = insn_data[icode].operand[0].mode;
15560 mode1 = insn_data[icode].operand[1].mode;
15561 mode2 = insn_data[icode].operand[2].mode;
15563 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15564 op0 = copy_to_mode_reg (mode1, op0);
15565 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15567 /* @@@ better error message */
15568 error ("mask must be an immediate");
15569 return const0_rtx;
15571 if (target == 0
15572 || GET_MODE (target) != tmode
15573 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15574 target = gen_reg_rtx (tmode);
15575 pat = GEN_FCN (icode) (target, op0, op1);
15576 if (! pat)
15577 return 0;
15578 emit_insn (pat);
15579 return target;
15581 case IX86_BUILTIN_PSLLWI128:
15582 icode = CODE_FOR_ashlv8hi3;
15583 goto do_pshifti;
15584 case IX86_BUILTIN_PSLLDI128:
15585 icode = CODE_FOR_ashlv4si3;
15586 goto do_pshifti;
15587 case IX86_BUILTIN_PSLLQI128:
15588 icode = CODE_FOR_ashlv2di3;
15589 goto do_pshifti;
15590 case IX86_BUILTIN_PSRAWI128:
15591 icode = CODE_FOR_ashrv8hi3;
15592 goto do_pshifti;
15593 case IX86_BUILTIN_PSRADI128:
15594 icode = CODE_FOR_ashrv4si3;
15595 goto do_pshifti;
15596 case IX86_BUILTIN_PSRLWI128:
15597 icode = CODE_FOR_lshrv8hi3;
15598 goto do_pshifti;
15599 case IX86_BUILTIN_PSRLDI128:
15600 icode = CODE_FOR_lshrv4si3;
15601 goto do_pshifti;
15602 case IX86_BUILTIN_PSRLQI128:
15603 icode = CODE_FOR_lshrv2di3;
15604 goto do_pshifti;
15605 do_pshifti:
15606 arg0 = TREE_VALUE (arglist);
15607 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15608 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15609 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15611 if (GET_CODE (op1) != CONST_INT)
15613 error ("shift must be an immediate");
15614 return const0_rtx;
15616 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
15617 op1 = GEN_INT (255);
15619 tmode = insn_data[icode].operand[0].mode;
15620 mode1 = insn_data[icode].operand[1].mode;
15621 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15622 op0 = copy_to_reg (op0);
15624 target = gen_reg_rtx (tmode);
15625 pat = GEN_FCN (icode) (target, op0, op1);
15626 if (!pat)
15627 return 0;
15628 emit_insn (pat);
15629 return target;
15631 case IX86_BUILTIN_PSLLW128:
15632 icode = CODE_FOR_ashlv8hi3;
15633 goto do_pshift;
15634 case IX86_BUILTIN_PSLLD128:
15635 icode = CODE_FOR_ashlv4si3;
15636 goto do_pshift;
15637 case IX86_BUILTIN_PSLLQ128:
15638 icode = CODE_FOR_ashlv2di3;
15639 goto do_pshift;
15640 case IX86_BUILTIN_PSRAW128:
15641 icode = CODE_FOR_ashrv8hi3;
15642 goto do_pshift;
15643 case IX86_BUILTIN_PSRAD128:
15644 icode = CODE_FOR_ashrv4si3;
15645 goto do_pshift;
15646 case IX86_BUILTIN_PSRLW128:
15647 icode = CODE_FOR_lshrv8hi3;
15648 goto do_pshift;
15649 case IX86_BUILTIN_PSRLD128:
15650 icode = CODE_FOR_lshrv4si3;
15651 goto do_pshift;
15652 case IX86_BUILTIN_PSRLQ128:
15653 icode = CODE_FOR_lshrv2di3;
15654 goto do_pshift;
15655 do_pshift:
15656 arg0 = TREE_VALUE (arglist);
15657 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15658 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15659 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15661 tmode = insn_data[icode].operand[0].mode;
15662 mode1 = insn_data[icode].operand[1].mode;
15664 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15665 op0 = copy_to_reg (op0);
15667 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
15668 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
15669 op1 = copy_to_reg (op1);
15671 target = gen_reg_rtx (tmode);
15672 pat = GEN_FCN (icode) (target, op0, op1);
15673 if (!pat)
15674 return 0;
15675 emit_insn (pat);
15676 return target;
15678 case IX86_BUILTIN_PSLLDQI128:
15679 case IX86_BUILTIN_PSRLDQI128:
15680 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
15681 : CODE_FOR_sse2_lshrti3);
15682 arg0 = TREE_VALUE (arglist);
15683 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15684 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15685 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15686 tmode = insn_data[icode].operand[0].mode;
15687 mode1 = insn_data[icode].operand[1].mode;
15688 mode2 = insn_data[icode].operand[2].mode;
15690 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15692 op0 = copy_to_reg (op0);
15693 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
15695 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15697 error ("shift must be an immediate");
15698 return const0_rtx;
15700 target = gen_reg_rtx (V2DImode);
15701 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
15702 op0, op1);
15703 if (! pat)
15704 return 0;
15705 emit_insn (pat);
15706 return target;
15708 case IX86_BUILTIN_FEMMS:
15709 emit_insn (gen_mmx_femms ());
15710 return NULL_RTX;
15712 case IX86_BUILTIN_PAVGUSB:
15713 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
15715 case IX86_BUILTIN_PF2ID:
15716 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
15718 case IX86_BUILTIN_PFACC:
15719 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
15721 case IX86_BUILTIN_PFADD:
15722 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
15724 case IX86_BUILTIN_PFCMPEQ:
15725 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
15727 case IX86_BUILTIN_PFCMPGE:
15728 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
15730 case IX86_BUILTIN_PFCMPGT:
15731 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
15733 case IX86_BUILTIN_PFMAX:
15734 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
15736 case IX86_BUILTIN_PFMIN:
15737 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
15739 case IX86_BUILTIN_PFMUL:
15740 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
15742 case IX86_BUILTIN_PFRCP:
15743 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
15745 case IX86_BUILTIN_PFRCPIT1:
15746 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
15748 case IX86_BUILTIN_PFRCPIT2:
15749 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
15751 case IX86_BUILTIN_PFRSQIT1:
15752 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
15754 case IX86_BUILTIN_PFRSQRT:
15755 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
15757 case IX86_BUILTIN_PFSUB:
15758 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
15760 case IX86_BUILTIN_PFSUBR:
15761 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
15763 case IX86_BUILTIN_PI2FD:
15764 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
15766 case IX86_BUILTIN_PMULHRW:
15767 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
15769 case IX86_BUILTIN_PF2IW:
15770 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
15772 case IX86_BUILTIN_PFNACC:
15773 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
15775 case IX86_BUILTIN_PFPNACC:
15776 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
15778 case IX86_BUILTIN_PI2FW:
15779 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
15781 case IX86_BUILTIN_PSWAPDSI:
15782 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
15784 case IX86_BUILTIN_PSWAPDSF:
15785 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
15787 case IX86_BUILTIN_SQRTSD:
15788 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
15789 case IX86_BUILTIN_LOADUPD:
15790 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
15791 case IX86_BUILTIN_STOREUPD:
15792 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
15794 case IX86_BUILTIN_MFENCE:
15795 emit_insn (gen_sse2_mfence ());
15796 return 0;
15797 case IX86_BUILTIN_LFENCE:
15798 emit_insn (gen_sse2_lfence ());
15799 return 0;
15801 case IX86_BUILTIN_CLFLUSH:
15802 arg0 = TREE_VALUE (arglist);
15803 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15804 icode = CODE_FOR_sse2_clflush;
15805 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
15806 op0 = copy_to_mode_reg (Pmode, op0);
15808 emit_insn (gen_sse2_clflush (op0));
15809 return 0;
15811 case IX86_BUILTIN_MOVNTPD:
15812 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
15813 case IX86_BUILTIN_MOVNTDQ:
15814 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
15815 case IX86_BUILTIN_MOVNTI:
15816 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
15818 case IX86_BUILTIN_LOADDQU:
15819 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
15820 case IX86_BUILTIN_STOREDQU:
15821 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
15823 case IX86_BUILTIN_MONITOR:
15824 arg0 = TREE_VALUE (arglist);
15825 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15826 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15827 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15828 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15829 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15830 if (!REG_P (op0))
15831 op0 = copy_to_mode_reg (SImode, op0);
15832 if (!REG_P (op1))
15833 op1 = copy_to_mode_reg (SImode, op1);
15834 if (!REG_P (op2))
15835 op2 = copy_to_mode_reg (SImode, op2);
15836 emit_insn (gen_sse3_monitor (op0, op1, op2));
15837 return 0;
15839 case IX86_BUILTIN_MWAIT:
15840 arg0 = TREE_VALUE (arglist);
15841 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15842 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15843 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15844 if (!REG_P (op0))
15845 op0 = copy_to_mode_reg (SImode, op0);
15846 if (!REG_P (op1))
15847 op1 = copy_to_mode_reg (SImode, op1);
15848 emit_insn (gen_sse3_mwait (op0, op1));
15849 return 0;
15851 case IX86_BUILTIN_LDDQU:
15852 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
15853 target, 1);
15855 case IX86_BUILTIN_VEC_INIT_V2SI:
15856 case IX86_BUILTIN_VEC_INIT_V4HI:
15857 case IX86_BUILTIN_VEC_INIT_V8QI:
15858 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
15860 case IX86_BUILTIN_VEC_EXT_V2DF:
15861 case IX86_BUILTIN_VEC_EXT_V2DI:
15862 case IX86_BUILTIN_VEC_EXT_V4SF:
15863 case IX86_BUILTIN_VEC_EXT_V4SI:
15864 case IX86_BUILTIN_VEC_EXT_V8HI:
15865 case IX86_BUILTIN_VEC_EXT_V2SI:
15866 case IX86_BUILTIN_VEC_EXT_V4HI:
15867 return ix86_expand_vec_ext_builtin (arglist, target);
15869 case IX86_BUILTIN_VEC_SET_V8HI:
15870 case IX86_BUILTIN_VEC_SET_V4HI:
15871 return ix86_expand_vec_set_builtin (arglist);
15873 default:
15874 break;
15877 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15878 if (d->code == fcode)
15880 /* Compares are treated specially. */
15881 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15882 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
15883 || d->icode == CODE_FOR_sse2_maskcmpv2df3
15884 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15885 return ix86_expand_sse_compare (d, arglist, target);
15887 return ix86_expand_binop_builtin (d->icode, arglist, target);
15890 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15891 if (d->code == fcode)
15892 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
15894 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15895 if (d->code == fcode)
15896 return ix86_expand_sse_comi (d, arglist, target);
15898 gcc_unreachable ();
15901 /* Store OPERAND to the memory after reload is completed. This means
15902 that we can't easily use assign_stack_local. */
15904 ix86_force_to_memory (enum machine_mode mode, rtx operand)
15906 rtx result;
15908 gcc_assert (reload_completed);
15909 if (TARGET_RED_ZONE)
15911 result = gen_rtx_MEM (mode,
15912 gen_rtx_PLUS (Pmode,
15913 stack_pointer_rtx,
15914 GEN_INT (-RED_ZONE_SIZE)));
15915 emit_move_insn (result, operand);
15917 else if (!TARGET_RED_ZONE && TARGET_64BIT)
15919 switch (mode)
15921 case HImode:
15922 case SImode:
15923 operand = gen_lowpart (DImode, operand);
15924 /* FALLTHRU */
15925 case DImode:
15926 emit_insn (
15927 gen_rtx_SET (VOIDmode,
15928 gen_rtx_MEM (DImode,
15929 gen_rtx_PRE_DEC (DImode,
15930 stack_pointer_rtx)),
15931 operand));
15932 break;
15933 default:
15934 gcc_unreachable ();
15936 result = gen_rtx_MEM (mode, stack_pointer_rtx);
15938 else
15940 switch (mode)
15942 case DImode:
15944 rtx operands[2];
15945 split_di (&operand, 1, operands, operands + 1);
15946 emit_insn (
15947 gen_rtx_SET (VOIDmode,
15948 gen_rtx_MEM (SImode,
15949 gen_rtx_PRE_DEC (Pmode,
15950 stack_pointer_rtx)),
15951 operands[1]));
15952 emit_insn (
15953 gen_rtx_SET (VOIDmode,
15954 gen_rtx_MEM (SImode,
15955 gen_rtx_PRE_DEC (Pmode,
15956 stack_pointer_rtx)),
15957 operands[0]));
15959 break;
15960 case HImode:
15961 /* Store HImodes as SImodes. */
15962 operand = gen_lowpart (SImode, operand);
15963 /* FALLTHRU */
15964 case SImode:
15965 emit_insn (
15966 gen_rtx_SET (VOIDmode,
15967 gen_rtx_MEM (GET_MODE (operand),
15968 gen_rtx_PRE_DEC (SImode,
15969 stack_pointer_rtx)),
15970 operand));
15971 break;
15972 default:
15973 gcc_unreachable ();
15975 result = gen_rtx_MEM (mode, stack_pointer_rtx);
15977 return result;
15980 /* Free operand from the memory. */
15981 void
15982 ix86_free_from_memory (enum machine_mode mode)
15984 if (!TARGET_RED_ZONE)
15986 int size;
15988 if (mode == DImode || TARGET_64BIT)
15989 size = 8;
15990 else
15991 size = 4;
15992 /* Use LEA to deallocate stack space. In peephole2 it will be converted
15993 to pop or add instruction if registers are available. */
15994 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15995 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15996 GEN_INT (size))));
16000 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16001 QImode must go into class Q_REGS.
16002 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
16003 movdf to do mem-to-mem moves through integer regs. */
16004 enum reg_class
16005 ix86_preferred_reload_class (rtx x, enum reg_class class)
16007 /* We're only allowed to return a subclass of CLASS. Many of the
16008 following checks fail for NO_REGS, so eliminate that early. */
16009 if (class == NO_REGS)
16010 return NO_REGS;
16012 /* All classes can load zeros. */
16013 if (x == CONST0_RTX (GET_MODE (x)))
16014 return class;
16016 /* Floating-point constants need more complex checks. */
16017 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16019 /* General regs can load everything. */
16020 if (reg_class_subset_p (class, GENERAL_REGS))
16021 return class;
16023 /* Floats can load 0 and 1 plus some others. Note that we eliminated
16024 zero above. We only want to wind up preferring 80387 registers if
16025 we plan on doing computation with them. */
16026 if (TARGET_80387
16027 && (TARGET_MIX_SSE_I387
16028 || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
16029 && standard_80387_constant_p (x))
16031 /* Limit class to non-sse. */
16032 if (class == FLOAT_SSE_REGS)
16033 return FLOAT_REGS;
16034 if (class == FP_TOP_SSE_REGS)
16035 return FP_TOP_REG;
16036 if (class == FP_SECOND_SSE_REGS)
16037 return FP_SECOND_REG;
16038 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16039 return class;
16042 return NO_REGS;
16044 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
16045 return NO_REGS;
16046 if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
16047 return NO_REGS;
16049 /* Generally when we see PLUS here, it's the function invariant
16050 (plus soft-fp const_int). Which can only be computed into general
16051 regs. */
16052 if (GET_CODE (x) == PLUS)
16053 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16055 /* QImode constants are easy to load, but non-constant QImode data
16056 must go into Q_REGS. */
16057 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16059 if (reg_class_subset_p (class, Q_REGS))
16060 return class;
16061 if (reg_class_subset_p (Q_REGS, class))
16062 return Q_REGS;
16063 return NO_REGS;
16066 return class;
16069 /* If we are copying between general and FP registers, we need a memory
16070 location. The same is true for SSE and MMX registers.
16072 The macro can't work reliably when one of the CLASSES is class containing
16073 registers from multiple units (SSE, MMX, integer). We avoid this by never
16074 combining those units in single alternative in the machine description.
16075 Ensure that this constraint holds to avoid unexpected surprises.
16077 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16078 enforce these sanity checks. */
16081 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16082 enum machine_mode mode, int strict)
16084 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16085 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16086 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16087 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16088 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16089 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16091 gcc_assert (!strict);
16092 return true;
16095 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16096 return true;
16098 /* ??? This is a lie. We do have moves between mmx/general, and for
16099 mmx/sse2. But by saying we need secondary memory we discourage the
16100 register allocator from using the mmx registers unless needed. */
16101 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16102 return true;
16104 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16106 /* SSE1 doesn't have any direct moves from other classes. */
16107 if (!TARGET_SSE2)
16108 return true;
16110 /* If the target says that inter-unit moves are more expensive
16111 than moving through memory, then don't generate them. */
16112 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16113 return true;
16115 /* Between SSE and general, we have moves no larger than word size. */
16116 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16117 return true;
16119 /* ??? For the cost of one register reformat penalty, we could use
16120 the same instructions to move SFmode and DFmode data, but the
16121 relevant move patterns don't support those alternatives. */
16122 if (mode == SFmode || mode == DFmode)
16123 return true;
16126 return false;
16129 /* Return true if the registers in CLASS cannot represent the change from
16130 modes FROM to TO. */
16132 bool
16133 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16134 enum reg_class class)
16136 if (from == to)
16137 return false;
16139 /* x87 registers can't do subreg at all, as all values are reformatted
16140 to extended precision. */
16141 if (MAYBE_FLOAT_CLASS_P (class))
16142 return true;
16144 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16146 /* Vector registers do not support QI or HImode loads. If we don't
16147 disallow a change to these modes, reload will assume it's ok to
16148 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
16149 the vec_dupv4hi pattern. */
16150 if (GET_MODE_SIZE (from) < 4)
16151 return true;
16153 /* Vector registers do not support subreg with nonzero offsets, which
16154 are otherwise valid for integer registers. Since we can't see
16155 whether we have a nonzero offset from here, prohibit all
16156 nonparadoxical subregs changing size. */
16157 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16158 return true;
16161 return false;
16164 /* Return the cost of moving data from a register in class CLASS1 to
16165 one in class CLASS2.
16167 It is not required that the cost always equal 2 when FROM is the same as TO;
16168 on some machines it is expensive to move between registers if they are not
16169 general registers. */
16172 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16173 enum reg_class class2)
16175 /* In case we require secondary memory, compute cost of the store followed
16176 by load. In order to avoid bad register allocation choices, we need
16177 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
16179 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16181 int cost = 1;
16183 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16184 MEMORY_MOVE_COST (mode, class1, 1));
16185 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16186 MEMORY_MOVE_COST (mode, class2, 1));
16188 /* In case of copying from general_purpose_register we may emit multiple
16189 stores followed by single load causing memory size mismatch stall.
16190 Count this as arbitrarily high cost of 20. */
16191 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16192 cost += 20;
16194 /* In the case of FP/MMX moves, the registers actually overlap, and we
16195 have to switch modes in order to treat them differently. */
16196 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16197 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16198 cost += 20;
16200 return cost;
16203 /* Moves between SSE/MMX and integer unit are expensive. */
16204 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16205 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16206 return ix86_cost->mmxsse_to_integer;
16207 if (MAYBE_FLOAT_CLASS_P (class1))
16208 return ix86_cost->fp_move;
16209 if (MAYBE_SSE_CLASS_P (class1))
16210 return ix86_cost->sse_move;
16211 if (MAYBE_MMX_CLASS_P (class1))
16212 return ix86_cost->mmx_move;
16213 return 2;
16216 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
16218 bool
16219 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16221 /* Flags and only flags can only hold CCmode values. */
16222 if (CC_REGNO_P (regno))
16223 return GET_MODE_CLASS (mode) == MODE_CC;
16224 if (GET_MODE_CLASS (mode) == MODE_CC
16225 || GET_MODE_CLASS (mode) == MODE_RANDOM
16226 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16227 return 0;
16228 if (FP_REGNO_P (regno))
16229 return VALID_FP_MODE_P (mode);
16230 if (SSE_REGNO_P (regno))
16232 /* We implement the move patterns for all vector modes into and
16233 out of SSE registers, even when no operation instructions
16234 are available. */
16235 return (VALID_SSE_REG_MODE (mode)
16236 || VALID_SSE2_REG_MODE (mode)
16237 || VALID_MMX_REG_MODE (mode)
16238 || VALID_MMX_REG_MODE_3DNOW (mode));
16240 if (MMX_REGNO_P (regno))
16242 /* We implement the move patterns for 3DNOW modes even in MMX mode,
16243 so if the register is available at all, then we can move data of
16244 the given mode into or out of it. */
16245 return (VALID_MMX_REG_MODE (mode)
16246 || VALID_MMX_REG_MODE_3DNOW (mode));
16249 if (mode == QImode)
16251 /* Take care for QImode values - they can be in non-QI regs,
16252 but then they do cause partial register stalls. */
16253 if (regno < 4 || TARGET_64BIT)
16254 return 1;
16255 if (!TARGET_PARTIAL_REG_STALL)
16256 return 1;
16257 return reload_in_progress || reload_completed;
16259 /* We handle both integer and floats in the general purpose registers. */
16260 else if (VALID_INT_MODE_P (mode))
16261 return 1;
16262 else if (VALID_FP_MODE_P (mode))
16263 return 1;
16264 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
16265 on to use that value in smaller contexts, this can easily force a
16266 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
16267 supporting DImode, allow it. */
16268 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16269 return 1;
16271 return 0;
16274 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
16275 tieable integer mode. */
16277 static bool
16278 ix86_tieable_integer_mode_p (enum machine_mode mode)
16280 switch (mode)
16282 case HImode:
16283 case SImode:
16284 return true;
16286 case QImode:
16287 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16289 case DImode:
16290 return TARGET_64BIT;
16292 default:
16293 return false;
16297 /* Return true if MODE1 is accessible in a register that can hold MODE2
16298 without copying. That is, all register classes that can hold MODE2
16299 can also hold MODE1. */
16301 bool
16302 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16304 if (mode1 == mode2)
16305 return true;
16307 if (ix86_tieable_integer_mode_p (mode1)
16308 && ix86_tieable_integer_mode_p (mode2))
16309 return true;
16311 /* MODE2 being XFmode implies fp stack or general regs, which means we
16312 can tie any smaller floating point modes to it. Note that we do not
16313 tie this with TFmode. */
16314 if (mode2 == XFmode)
16315 return mode1 == SFmode || mode1 == DFmode;
16317 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16318 that we can tie it with SFmode. */
16319 if (mode2 == DFmode)
16320 return mode1 == SFmode;
16322 /* If MODE2 is only appropriate for an SSE register, then tie with
16323 any other mode acceptable to SSE registers. */
16324 if (GET_MODE_SIZE (mode2) >= 8
16325 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16326 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16328 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16329 with any other mode acceptable to MMX registers. */
16330 if (GET_MODE_SIZE (mode2) == 8
16331 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
16332 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
16334 return false;
16337 /* Return the cost of moving data of mode M between a
16338 register and memory. A value of 2 is the default; this cost is
16339 relative to those in `REGISTER_MOVE_COST'.
16341 If moving between registers and memory is more expensive than
16342 between two registers, you should define this macro to express the
16343 relative cost.
16345 Model also increased moving costs of QImode registers in non
16346 Q_REGS classes.
16349 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
16351 if (FLOAT_CLASS_P (class))
16353 int index;
16354 switch (mode)
16356 case SFmode:
16357 index = 0;
16358 break;
16359 case DFmode:
16360 index = 1;
16361 break;
16362 case XFmode:
16363 index = 2;
16364 break;
16365 default:
16366 return 100;
16368 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
16370 if (SSE_CLASS_P (class))
16372 int index;
16373 switch (GET_MODE_SIZE (mode))
16375 case 4:
16376 index = 0;
16377 break;
16378 case 8:
16379 index = 1;
16380 break;
16381 case 16:
16382 index = 2;
16383 break;
16384 default:
16385 return 100;
16387 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
16389 if (MMX_CLASS_P (class))
16391 int index;
16392 switch (GET_MODE_SIZE (mode))
16394 case 4:
16395 index = 0;
16396 break;
16397 case 8:
16398 index = 1;
16399 break;
16400 default:
16401 return 100;
16403 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
16405 switch (GET_MODE_SIZE (mode))
16407 case 1:
16408 if (in)
16409 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
16410 : ix86_cost->movzbl_load);
16411 else
16412 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
16413 : ix86_cost->int_store[0] + 4);
16414 break;
16415 case 2:
16416 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
16417 default:
16418 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
16419 if (mode == TFmode)
16420 mode = XFmode;
16421 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
16422 * (((int) GET_MODE_SIZE (mode)
16423 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
16427 /* Compute a (partial) cost for rtx X. Return true if the complete
16428 cost has been computed, and false if subexpressions should be
16429 scanned. In either case, *TOTAL contains the cost result. */
16431 static bool
16432 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
16434 enum machine_mode mode = GET_MODE (x);
16436 switch (code)
16438 case CONST_INT:
16439 case CONST:
16440 case LABEL_REF:
16441 case SYMBOL_REF:
16442 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
16443 *total = 3;
16444 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
16445 *total = 2;
16446 else if (flag_pic && SYMBOLIC_CONST (x)
16447 && (!TARGET_64BIT
16448 || (!GET_CODE (x) != LABEL_REF
16449 && (GET_CODE (x) != SYMBOL_REF
16450 || !SYMBOL_REF_LOCAL_P (x)))))
16451 *total = 1;
16452 else
16453 *total = 0;
16454 return true;
16456 case CONST_DOUBLE:
16457 if (mode == VOIDmode)
16458 *total = 0;
16459 else
16460 switch (standard_80387_constant_p (x))
16462 case 1: /* 0.0 */
16463 *total = 1;
16464 break;
16465 default: /* Other constants */
16466 *total = 2;
16467 break;
16468 case 0:
16469 case -1:
16470 /* Start with (MEM (SYMBOL_REF)), since that's where
16471 it'll probably end up. Add a penalty for size. */
16472 *total = (COSTS_N_INSNS (1)
16473 + (flag_pic != 0 && !TARGET_64BIT)
16474 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
16475 break;
16477 return true;
16479 case ZERO_EXTEND:
16480 /* The zero extensions is often completely free on x86_64, so make
16481 it as cheap as possible. */
16482 if (TARGET_64BIT && mode == DImode
16483 && GET_MODE (XEXP (x, 0)) == SImode)
16484 *total = 1;
16485 else if (TARGET_ZERO_EXTEND_WITH_AND)
16486 *total = COSTS_N_INSNS (ix86_cost->add);
16487 else
16488 *total = COSTS_N_INSNS (ix86_cost->movzx);
16489 return false;
16491 case SIGN_EXTEND:
16492 *total = COSTS_N_INSNS (ix86_cost->movsx);
16493 return false;
16495 case ASHIFT:
16496 if (GET_CODE (XEXP (x, 1)) == CONST_INT
16497 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
16499 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16500 if (value == 1)
16502 *total = COSTS_N_INSNS (ix86_cost->add);
16503 return false;
16505 if ((value == 2 || value == 3)
16506 && ix86_cost->lea <= ix86_cost->shift_const)
16508 *total = COSTS_N_INSNS (ix86_cost->lea);
16509 return false;
16512 /* FALLTHRU */
16514 case ROTATE:
16515 case ASHIFTRT:
16516 case LSHIFTRT:
16517 case ROTATERT:
16518 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
16520 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16522 if (INTVAL (XEXP (x, 1)) > 32)
16523 *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
16524 else
16525 *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
16527 else
16529 if (GET_CODE (XEXP (x, 1)) == AND)
16530 *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
16531 else
16532 *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
16535 else
16537 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16538 *total = COSTS_N_INSNS (ix86_cost->shift_const);
16539 else
16540 *total = COSTS_N_INSNS (ix86_cost->shift_var);
16542 return false;
16544 case MULT:
16545 if (FLOAT_MODE_P (mode))
16547 *total = COSTS_N_INSNS (ix86_cost->fmul);
16548 return false;
16550 else
16552 rtx op0 = XEXP (x, 0);
16553 rtx op1 = XEXP (x, 1);
16554 int nbits;
16555 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16557 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16558 for (nbits = 0; value != 0; value &= value - 1)
16559 nbits++;
16561 else
16562 /* This is arbitrary. */
16563 nbits = 7;
16565 /* Compute costs correctly for widening multiplication. */
16566 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
16567 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
16568 == GET_MODE_SIZE (mode))
16570 int is_mulwiden = 0;
16571 enum machine_mode inner_mode = GET_MODE (op0);
16573 if (GET_CODE (op0) == GET_CODE (op1))
16574 is_mulwiden = 1, op1 = XEXP (op1, 0);
16575 else if (GET_CODE (op1) == CONST_INT)
16577 if (GET_CODE (op0) == SIGN_EXTEND)
16578 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
16579 == INTVAL (op1);
16580 else
16581 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
16584 if (is_mulwiden)
16585 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
16588 *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
16589 + nbits * ix86_cost->mult_bit)
16590 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
16592 return true;
16595 case DIV:
16596 case UDIV:
16597 case MOD:
16598 case UMOD:
16599 if (FLOAT_MODE_P (mode))
16600 *total = COSTS_N_INSNS (ix86_cost->fdiv);
16601 else
16602 *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
16603 return false;
16605 case PLUS:
16606 if (FLOAT_MODE_P (mode))
16607 *total = COSTS_N_INSNS (ix86_cost->fadd);
16608 else if (GET_MODE_CLASS (mode) == MODE_INT
16609 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
16611 if (GET_CODE (XEXP (x, 0)) == PLUS
16612 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
16613 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
16614 && CONSTANT_P (XEXP (x, 1)))
16616 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
16617 if (val == 2 || val == 4 || val == 8)
16619 *total = COSTS_N_INSNS (ix86_cost->lea);
16620 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16621 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
16622 outer_code);
16623 *total += rtx_cost (XEXP (x, 1), outer_code);
16624 return true;
16627 else if (GET_CODE (XEXP (x, 0)) == MULT
16628 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
16630 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
16631 if (val == 2 || val == 4 || val == 8)
16633 *total = COSTS_N_INSNS (ix86_cost->lea);
16634 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16635 *total += rtx_cost (XEXP (x, 1), outer_code);
16636 return true;
16639 else if (GET_CODE (XEXP (x, 0)) == PLUS)
16641 *total = COSTS_N_INSNS (ix86_cost->lea);
16642 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16643 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16644 *total += rtx_cost (XEXP (x, 1), outer_code);
16645 return true;
16648 /* FALLTHRU */
16650 case MINUS:
16651 if (FLOAT_MODE_P (mode))
16653 *total = COSTS_N_INSNS (ix86_cost->fadd);
16654 return false;
16656 /* FALLTHRU */
16658 case AND:
16659 case IOR:
16660 case XOR:
16661 if (!TARGET_64BIT && mode == DImode)
16663 *total = (COSTS_N_INSNS (ix86_cost->add) * 2
16664 + (rtx_cost (XEXP (x, 0), outer_code)
16665 << (GET_MODE (XEXP (x, 0)) != DImode))
16666 + (rtx_cost (XEXP (x, 1), outer_code)
16667 << (GET_MODE (XEXP (x, 1)) != DImode)));
16668 return true;
16670 /* FALLTHRU */
16672 case NEG:
16673 if (FLOAT_MODE_P (mode))
16675 *total = COSTS_N_INSNS (ix86_cost->fchs);
16676 return false;
16678 /* FALLTHRU */
16680 case NOT:
16681 if (!TARGET_64BIT && mode == DImode)
16682 *total = COSTS_N_INSNS (ix86_cost->add * 2);
16683 else
16684 *total = COSTS_N_INSNS (ix86_cost->add);
16685 return false;
16687 case COMPARE:
16688 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
16689 && XEXP (XEXP (x, 0), 1) == const1_rtx
16690 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
16691 && XEXP (x, 1) == const0_rtx)
16693 /* This kind of construct is implemented using test[bwl].
16694 Treat it as if we had an AND. */
16695 *total = (COSTS_N_INSNS (ix86_cost->add)
16696 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
16697 + rtx_cost (const1_rtx, outer_code));
16698 return true;
16700 return false;
16702 case FLOAT_EXTEND:
16703 if (!TARGET_SSE_MATH
16704 || mode == XFmode
16705 || (mode == DFmode && !TARGET_SSE2))
16706 *total = 0;
16707 return false;
16709 case ABS:
16710 if (FLOAT_MODE_P (mode))
16711 *total = COSTS_N_INSNS (ix86_cost->fabs);
16712 return false;
16714 case SQRT:
16715 if (FLOAT_MODE_P (mode))
16716 *total = COSTS_N_INSNS (ix86_cost->fsqrt);
16717 return false;
16719 case UNSPEC:
16720 if (XINT (x, 1) == UNSPEC_TP)
16721 *total = 0;
16722 return false;
16724 default:
16725 return false;
16729 #if TARGET_MACHO
16731 static int current_machopic_label_num;
16733 /* Given a symbol name and its associated stub, write out the
16734 definition of the stub. */
16736 void
16737 machopic_output_stub (FILE *file, const char *symb, const char *stub)
16739 unsigned int length;
16740 char *binder_name, *symbol_name, lazy_ptr_name[32];
16741 int label = ++current_machopic_label_num;
16743 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
16744 symb = (*targetm.strip_name_encoding) (symb);
16746 length = strlen (stub);
16747 binder_name = alloca (length + 32);
16748 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
16750 length = strlen (symb);
16751 symbol_name = alloca (length + 32);
16752 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
16754 sprintf (lazy_ptr_name, "L%d$lz", label);
16756 if (MACHOPIC_PURE)
16757 machopic_picsymbol_stub_section ();
16758 else
16759 machopic_symbol_stub_section ();
16761 fprintf (file, "%s:\n", stub);
16762 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
16764 if (MACHOPIC_PURE)
16766 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
16767 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
16768 fprintf (file, "\tjmp %%edx\n");
16770 else
16771 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
16773 fprintf (file, "%s:\n", binder_name);
16775 if (MACHOPIC_PURE)
16777 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
16778 fprintf (file, "\tpushl %%eax\n");
16780 else
16781 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
16783 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
16785 machopic_lazy_symbol_ptr_section ();
16786 fprintf (file, "%s:\n", lazy_ptr_name);
16787 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
16788 fprintf (file, "\t.long %s\n", binder_name);
16790 #endif /* TARGET_MACHO */
16792 /* Order the registers for register allocator. */
16794 void
16795 x86_order_regs_for_local_alloc (void)
16797 int pos = 0;
16798 int i;
16800 /* First allocate the local general purpose registers. */
16801 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
16802 if (GENERAL_REGNO_P (i) && call_used_regs[i])
16803 reg_alloc_order [pos++] = i;
16805 /* Global general purpose registers. */
16806 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
16807 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
16808 reg_alloc_order [pos++] = i;
16810 /* x87 registers come first in case we are doing FP math
16811 using them. */
16812 if (!TARGET_SSE_MATH)
16813 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
16814 reg_alloc_order [pos++] = i;
16816 /* SSE registers. */
16817 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16818 reg_alloc_order [pos++] = i;
16819 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16820 reg_alloc_order [pos++] = i;
16822 /* x87 registers. */
16823 if (TARGET_SSE_MATH)
16824 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
16825 reg_alloc_order [pos++] = i;
16827 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
16828 reg_alloc_order [pos++] = i;
16830 /* Initialize the rest of array as we do not allocate some registers
16831 at all. */
16832 while (pos < FIRST_PSEUDO_REGISTER)
16833 reg_alloc_order [pos++] = 0;
16836 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
16837 struct attribute_spec.handler. */
16838 static tree
16839 ix86_handle_struct_attribute (tree *node, tree name,
16840 tree args ATTRIBUTE_UNUSED,
16841 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
16843 tree *type = NULL;
16844 if (DECL_P (*node))
16846 if (TREE_CODE (*node) == TYPE_DECL)
16847 type = &TREE_TYPE (*node);
16849 else
16850 type = node;
16852 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
16853 || TREE_CODE (*type) == UNION_TYPE)))
16855 warning (OPT_Wattributes, "%qs attribute ignored",
16856 IDENTIFIER_POINTER (name));
16857 *no_add_attrs = true;
16860 else if ((is_attribute_p ("ms_struct", name)
16861 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
16862 || ((is_attribute_p ("gcc_struct", name)
16863 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
16865 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
16866 IDENTIFIER_POINTER (name));
16867 *no_add_attrs = true;
16870 return NULL_TREE;
16873 static bool
16874 ix86_ms_bitfield_layout_p (tree record_type)
16876 return (TARGET_MS_BITFIELD_LAYOUT &&
16877 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
16878 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
16881 /* Returns an expression indicating where the this parameter is
16882 located on entry to the FUNCTION. */
16884 static rtx
16885 x86_this_parameter (tree function)
16887 tree type = TREE_TYPE (function);
16889 if (TARGET_64BIT)
16891 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
16892 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
16895 if (ix86_function_regparm (type, function) > 0)
16897 tree parm;
16899 parm = TYPE_ARG_TYPES (type);
16900 /* Figure out whether or not the function has a variable number of
16901 arguments. */
16902 for (; parm; parm = TREE_CHAIN (parm))
16903 if (TREE_VALUE (parm) == void_type_node)
16904 break;
16905 /* If not, the this parameter is in the first argument. */
16906 if (parm)
16908 int regno = 0;
16909 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
16910 regno = 2;
16911 return gen_rtx_REG (SImode, regno);
16915 if (aggregate_value_p (TREE_TYPE (type), type))
16916 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
16917 else
16918 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
16921 /* Determine whether x86_output_mi_thunk can succeed. */
16923 static bool
16924 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
16925 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
16926 HOST_WIDE_INT vcall_offset, tree function)
16928 /* 64-bit can handle anything. */
16929 if (TARGET_64BIT)
16930 return true;
16932 /* For 32-bit, everything's fine if we have one free register. */
16933 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
16934 return true;
16936 /* Need a free register for vcall_offset. */
16937 if (vcall_offset)
16938 return false;
16940 /* Need a free register for GOT references. */
16941 if (flag_pic && !(*targetm.binds_local_p) (function))
16942 return false;
16944 /* Otherwise ok. */
16945 return true;
16948 /* Output the assembler code for a thunk function. THUNK_DECL is the
16949 declaration for the thunk function itself, FUNCTION is the decl for
16950 the target function. DELTA is an immediate constant offset to be
16951 added to THIS. If VCALL_OFFSET is nonzero, the word at
16952 *(*this + vcall_offset) should be added to THIS. */
16954 static void
16955 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
16956 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
16957 HOST_WIDE_INT vcall_offset, tree function)
16959 rtx xops[3];
16960 rtx this = x86_this_parameter (function);
16961 rtx this_reg, tmp;
16963 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
16964 pull it in now and let DELTA benefit. */
16965 if (REG_P (this))
16966 this_reg = this;
16967 else if (vcall_offset)
16969 /* Put the this parameter into %eax. */
16970 xops[0] = this;
16971 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
16972 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16974 else
16975 this_reg = NULL_RTX;
16977 /* Adjust the this parameter by a fixed constant. */
16978 if (delta)
16980 xops[0] = GEN_INT (delta);
16981 xops[1] = this_reg ? this_reg : this;
16982 if (TARGET_64BIT)
16984 if (!x86_64_general_operand (xops[0], DImode))
16986 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
16987 xops[1] = tmp;
16988 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
16989 xops[0] = tmp;
16990 xops[1] = this;
16992 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
16994 else
16995 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
16998 /* Adjust the this parameter by a value stored in the vtable. */
16999 if (vcall_offset)
17001 if (TARGET_64BIT)
17002 tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17003 else
17005 int tmp_regno = 2 /* ECX */;
17006 if (lookup_attribute ("fastcall",
17007 TYPE_ATTRIBUTES (TREE_TYPE (function))))
17008 tmp_regno = 0 /* EAX */;
17009 tmp = gen_rtx_REG (SImode, tmp_regno);
17012 xops[0] = gen_rtx_MEM (Pmode, this_reg);
17013 xops[1] = tmp;
17014 if (TARGET_64BIT)
17015 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17016 else
17017 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17019 /* Adjust the this parameter. */
17020 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17021 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17023 rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17024 xops[0] = GEN_INT (vcall_offset);
17025 xops[1] = tmp2;
17026 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17027 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17029 xops[1] = this_reg;
17030 if (TARGET_64BIT)
17031 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17032 else
17033 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17036 /* If necessary, drop THIS back to its stack slot. */
17037 if (this_reg && this_reg != this)
17039 xops[0] = this_reg;
17040 xops[1] = this;
17041 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17044 xops[0] = XEXP (DECL_RTL (function), 0);
17045 if (TARGET_64BIT)
17047 if (!flag_pic || (*targetm.binds_local_p) (function))
17048 output_asm_insn ("jmp\t%P0", xops);
17049 else
17051 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17052 tmp = gen_rtx_CONST (Pmode, tmp);
17053 tmp = gen_rtx_MEM (QImode, tmp);
17054 xops[0] = tmp;
17055 output_asm_insn ("jmp\t%A0", xops);
17058 else
17060 if (!flag_pic || (*targetm.binds_local_p) (function))
17061 output_asm_insn ("jmp\t%P0", xops);
17062 else
17063 #if TARGET_MACHO
17064 if (TARGET_MACHO)
17066 rtx sym_ref = XEXP (DECL_RTL (function), 0);
17067 tmp = (gen_rtx_SYMBOL_REF
17068 (Pmode,
17069 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17070 tmp = gen_rtx_MEM (QImode, tmp);
17071 xops[0] = tmp;
17072 output_asm_insn ("jmp\t%0", xops);
17074 else
17075 #endif /* TARGET_MACHO */
17077 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17078 output_set_got (tmp);
17080 xops[1] = tmp;
17081 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17082 output_asm_insn ("jmp\t{*}%1", xops);
17087 static void
17088 x86_file_start (void)
17090 default_file_start ();
17091 if (X86_FILE_START_VERSION_DIRECTIVE)
17092 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17093 if (X86_FILE_START_FLTUSED)
17094 fputs ("\t.global\t__fltused\n", asm_out_file);
17095 if (ix86_asm_dialect == ASM_INTEL)
17096 fputs ("\t.intel_syntax\n", asm_out_file);
17100 x86_field_alignment (tree field, int computed)
17102 enum machine_mode mode;
17103 tree type = TREE_TYPE (field);
17105 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17106 return computed;
17107 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17108 ? get_inner_array_type (type) : type);
17109 if (mode == DFmode || mode == DCmode
17110 || GET_MODE_CLASS (mode) == MODE_INT
17111 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17112 return MIN (32, computed);
17113 return computed;
17116 /* Output assembler code to FILE to increment profiler label # LABELNO
17117 for profiling a function entry. */
17118 void
17119 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17121 if (TARGET_64BIT)
17122 if (flag_pic)
17124 #ifndef NO_PROFILE_COUNTERS
17125 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17126 #endif
17127 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17129 else
17131 #ifndef NO_PROFILE_COUNTERS
17132 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17133 #endif
17134 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17136 else if (flag_pic)
17138 #ifndef NO_PROFILE_COUNTERS
17139 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17140 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17141 #endif
17142 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17144 else
17146 #ifndef NO_PROFILE_COUNTERS
17147 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17148 PROFILE_COUNT_REGISTER);
17149 #endif
17150 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17154 /* We don't have exact information about the insn sizes, but we may assume
17155 quite safely that we are informed about all 1 byte insns and memory
17156 address sizes. This is enough to eliminate unnecessary padding in
17157 99% of cases. */
17159 static int
17160 min_insn_size (rtx insn)
17162 int l = 0;
17164 if (!INSN_P (insn) || !active_insn_p (insn))
17165 return 0;
17167 /* Discard alignments we've emit and jump instructions. */
17168 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17169 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17170 return 0;
17171 if (GET_CODE (insn) == JUMP_INSN
17172 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17173 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17174 return 0;
17176 /* Important case - calls are always 5 bytes.
17177 It is common to have many calls in the row. */
17178 if (GET_CODE (insn) == CALL_INSN
17179 && symbolic_reference_mentioned_p (PATTERN (insn))
17180 && !SIBLING_CALL_P (insn))
17181 return 5;
17182 if (get_attr_length (insn) <= 1)
17183 return 1;
17185 /* For normal instructions we may rely on the sizes of addresses
17186 and the presence of symbol to require 4 bytes of encoding.
17187 This is not the case for jumps where references are PC relative. */
17188 if (GET_CODE (insn) != JUMP_INSN)
17190 l = get_attr_length_address (insn);
17191 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17192 l = 4;
17194 if (l)
17195 return 1+l;
17196 else
17197 return 2;
17200 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17201 window. */
17203 static void
17204 ix86_avoid_jump_misspredicts (void)
17206 rtx insn, start = get_insns ();
17207 int nbytes = 0, njumps = 0;
17208 int isjump = 0;
17210 /* Look for all minimal intervals of instructions containing 4 jumps.
17211 The intervals are bounded by START and INSN. NBYTES is the total
17212 size of instructions in the interval including INSN and not including
17213 START. When the NBYTES is smaller than 16 bytes, it is possible
17214 that the end of START and INSN ends up in the same 16byte page.
17216 The smallest offset in the page INSN can start is the case where START
17217 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
17218 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17220 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17223 nbytes += min_insn_size (insn);
17224 if (dump_file)
17225 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17226 INSN_UID (insn), min_insn_size (insn));
17227 if ((GET_CODE (insn) == JUMP_INSN
17228 && GET_CODE (PATTERN (insn)) != ADDR_VEC
17229 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17230 || GET_CODE (insn) == CALL_INSN)
17231 njumps++;
17232 else
17233 continue;
17235 while (njumps > 3)
17237 start = NEXT_INSN (start);
17238 if ((GET_CODE (start) == JUMP_INSN
17239 && GET_CODE (PATTERN (start)) != ADDR_VEC
17240 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17241 || GET_CODE (start) == CALL_INSN)
17242 njumps--, isjump = 1;
17243 else
17244 isjump = 0;
17245 nbytes -= min_insn_size (start);
17247 gcc_assert (njumps >= 0);
17248 if (dump_file)
17249 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17250 INSN_UID (start), INSN_UID (insn), nbytes);
17252 if (njumps == 3 && isjump && nbytes < 16)
17254 int padsize = 15 - nbytes + min_insn_size (insn);
17256 if (dump_file)
17257 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17258 INSN_UID (insn), padsize);
17259 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17264 /* AMD Athlon works faster
17265 when RET is not destination of conditional jump or directly preceded
17266 by other jump instruction. We avoid the penalty by inserting NOP just
17267 before the RET instructions in such cases. */
17268 static void
17269 ix86_pad_returns (void)
17271 edge e;
17272 edge_iterator ei;
17274 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17276 basic_block bb = e->src;
17277 rtx ret = BB_END (bb);
17278 rtx prev;
17279 bool replace = false;
17281 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17282 || !maybe_hot_bb_p (bb))
17283 continue;
17284 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17285 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17286 break;
17287 if (prev && GET_CODE (prev) == CODE_LABEL)
17289 edge e;
17290 edge_iterator ei;
17292 FOR_EACH_EDGE (e, ei, bb->preds)
17293 if (EDGE_FREQUENCY (e) && e->src->index >= 0
17294 && !(e->flags & EDGE_FALLTHRU))
17295 replace = true;
17297 if (!replace)
17299 prev = prev_active_insn (ret);
17300 if (prev
17301 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17302 || GET_CODE (prev) == CALL_INSN))
17303 replace = true;
17304 /* Empty functions get branch mispredict even when the jump destination
17305 is not visible to us. */
17306 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
17307 replace = true;
17309 if (replace)
17311 emit_insn_before (gen_return_internal_long (), ret);
17312 delete_insn (ret);
17317 /* Implement machine specific optimizations. We implement padding of returns
17318 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
17319 static void
17320 ix86_reorg (void)
17322 if (TARGET_ATHLON_K8 && optimize && !optimize_size)
17323 ix86_pad_returns ();
17324 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
17325 ix86_avoid_jump_misspredicts ();
17328 /* Return nonzero when QImode register that must be represented via REX prefix
17329 is used. */
17330 bool
17331 x86_extended_QIreg_mentioned_p (rtx insn)
17333 int i;
17334 extract_insn_cached (insn);
17335 for (i = 0; i < recog_data.n_operands; i++)
17336 if (REG_P (recog_data.operand[i])
17337 && REGNO (recog_data.operand[i]) >= 4)
17338 return true;
17339 return false;
17342 /* Return nonzero when P points to register encoded via REX prefix.
17343 Called via for_each_rtx. */
17344 static int
17345 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
17347 unsigned int regno;
17348 if (!REG_P (*p))
17349 return 0;
17350 regno = REGNO (*p);
17351 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
17354 /* Return true when INSN mentions register that must be encoded using REX
17355 prefix. */
17356 bool
17357 x86_extended_reg_mentioned_p (rtx insn)
17359 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
17362 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
17363 optabs would emit if we didn't have TFmode patterns. */
17365 void
17366 x86_emit_floatuns (rtx operands[2])
17368 rtx neglab, donelab, i0, i1, f0, in, out;
17369 enum machine_mode mode, inmode;
17371 inmode = GET_MODE (operands[1]);
17372 gcc_assert (inmode == SImode || inmode == DImode);
17374 out = operands[0];
17375 in = force_reg (inmode, operands[1]);
17376 mode = GET_MODE (out);
17377 neglab = gen_label_rtx ();
17378 donelab = gen_label_rtx ();
17379 i1 = gen_reg_rtx (Pmode);
17380 f0 = gen_reg_rtx (mode);
17382 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
17384 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
17385 emit_jump_insn (gen_jump (donelab));
17386 emit_barrier ();
17388 emit_label (neglab);
17390 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17391 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17392 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
17393 expand_float (f0, i0, 0);
17394 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
17396 emit_label (donelab);
17399 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17400 with all elements equal to VAR. Return true if successful. */
17402 static bool
17403 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
17404 rtx target, rtx val)
17406 enum machine_mode smode, wsmode, wvmode;
17407 rtx x;
17409 switch (mode)
17411 case V2SImode:
17412 case V2SFmode:
17413 if (!mmx_ok)
17414 return false;
17415 /* FALLTHRU */
17417 case V2DFmode:
17418 case V2DImode:
17419 case V4SFmode:
17420 case V4SImode:
17421 val = force_reg (GET_MODE_INNER (mode), val);
17422 x = gen_rtx_VEC_DUPLICATE (mode, val);
17423 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17424 return true;
17426 case V4HImode:
17427 if (!mmx_ok)
17428 return false;
17429 if (TARGET_SSE || TARGET_3DNOW_A)
17431 val = gen_lowpart (SImode, val);
17432 x = gen_rtx_TRUNCATE (HImode, val);
17433 x = gen_rtx_VEC_DUPLICATE (mode, x);
17434 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17435 return true;
17437 else
17439 smode = HImode;
17440 wsmode = SImode;
17441 wvmode = V2SImode;
17442 goto widen;
17445 case V8QImode:
17446 if (!mmx_ok)
17447 return false;
17448 smode = QImode;
17449 wsmode = HImode;
17450 wvmode = V4HImode;
17451 goto widen;
17452 case V8HImode:
17453 smode = HImode;
17454 wsmode = SImode;
17455 wvmode = V4SImode;
17456 goto widen;
17457 case V16QImode:
17458 smode = QImode;
17459 wsmode = HImode;
17460 wvmode = V8HImode;
17461 goto widen;
17462 widen:
17463 /* Replicate the value once into the next wider mode and recurse. */
17464 val = convert_modes (wsmode, smode, val, true);
17465 x = expand_simple_binop (wsmode, ASHIFT, val,
17466 GEN_INT (GET_MODE_BITSIZE (smode)),
17467 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17468 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
17470 x = gen_reg_rtx (wvmode);
17471 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
17472 gcc_unreachable ();
17473 emit_move_insn (target, gen_lowpart (mode, x));
17474 return true;
17476 default:
17477 return false;
17481 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17482 whose low element is VAR, and other elements are zero. Return true
17483 if successful. */
17485 static bool
17486 ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
17487 rtx target, rtx var)
17489 enum machine_mode vsimode;
17490 rtx x;
17492 switch (mode)
17494 case V2SFmode:
17495 case V2SImode:
17496 if (!mmx_ok)
17497 return false;
17498 /* FALLTHRU */
17500 case V2DFmode:
17501 case V2DImode:
17502 var = force_reg (GET_MODE_INNER (mode), var);
17503 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
17504 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17505 return true;
17507 case V4SFmode:
17508 case V4SImode:
17509 var = force_reg (GET_MODE_INNER (mode), var);
17510 x = gen_rtx_VEC_DUPLICATE (mode, var);
17511 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
17512 emit_insn (gen_rtx_SET (VOIDmode, target, x));
17513 return true;
17515 case V8HImode:
17516 case V16QImode:
17517 vsimode = V4SImode;
17518 goto widen;
17519 case V4HImode:
17520 case V8QImode:
17521 if (!mmx_ok)
17522 return false;
17523 vsimode = V2SImode;
17524 goto widen;
17525 widen:
17526 /* Zero extend the variable element to SImode and recurse. */
17527 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
17529 x = gen_reg_rtx (vsimode);
17530 if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
17531 gcc_unreachable ();
17533 emit_move_insn (target, gen_lowpart (mode, x));
17534 return true;
17536 default:
17537 return false;
17541 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
17542 consisting of the values in VALS. It is known that all elements
17543 except ONE_VAR are constants. Return true if successful. */
17545 static bool
17546 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
17547 rtx target, rtx vals, int one_var)
17549 rtx var = XVECEXP (vals, 0, one_var);
17550 enum machine_mode wmode;
17551 rtx const_vec, x;
17553 const_vec = copy_rtx (vals);
17554 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
17555 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
17557 switch (mode)
17559 case V2DFmode:
17560 case V2DImode:
17561 case V2SFmode:
17562 case V2SImode:
17563 /* For the two element vectors, it's just as easy to use
17564 the general case. */
17565 return false;
17567 case V4SFmode:
17568 case V4SImode:
17569 case V8HImode:
17570 case V4HImode:
17571 break;
17573 case V16QImode:
17574 wmode = V8HImode;
17575 goto widen;
17576 case V8QImode:
17577 wmode = V4HImode;
17578 goto widen;
17579 widen:
17580 /* There's no way to set one QImode entry easily. Combine
17581 the variable value with its adjacent constant value, and
17582 promote to an HImode set. */
17583 x = XVECEXP (vals, 0, one_var ^ 1);
17584 if (one_var & 1)
17586 var = convert_modes (HImode, QImode, var, true);
17587 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
17588 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17589 x = GEN_INT (INTVAL (x) & 0xff);
17591 else
17593 var = convert_modes (HImode, QImode, var, true);
17594 x = gen_int_mode (INTVAL (x) << 8, HImode);
17596 if (x != const0_rtx)
17597 var = expand_simple_binop (HImode, IOR, var, x, var,
17598 1, OPTAB_LIB_WIDEN);
17600 x = gen_reg_rtx (wmode);
17601 emit_move_insn (x, gen_lowpart (wmode, const_vec));
17602 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
17604 emit_move_insn (target, gen_lowpart (mode, x));
17605 return true;
17607 default:
17608 return false;
17611 emit_move_insn (target, const_vec);
17612 ix86_expand_vector_set (mmx_ok, target, var, one_var);
17613 return true;
17616 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
17617 all values variable, and none identical. */
17619 static void
17620 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
17621 rtx target, rtx vals)
17623 enum machine_mode half_mode = GET_MODE_INNER (mode);
17624 rtx op0 = NULL, op1 = NULL;
17625 bool use_vec_concat = false;
17627 switch (mode)
17629 case V2SFmode:
17630 case V2SImode:
17631 if (!mmx_ok && !TARGET_SSE)
17632 break;
17633 /* FALLTHRU */
17635 case V2DFmode:
17636 case V2DImode:
17637 /* For the two element vectors, we always implement VEC_CONCAT. */
17638 op0 = XVECEXP (vals, 0, 0);
17639 op1 = XVECEXP (vals, 0, 1);
17640 use_vec_concat = true;
17641 break;
17643 case V4SFmode:
17644 half_mode = V2SFmode;
17645 goto half;
17646 case V4SImode:
17647 half_mode = V2SImode;
17648 goto half;
17649 half:
17651 rtvec v;
17653 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
17654 Recurse to load the two halves. */
17656 op0 = gen_reg_rtx (half_mode);
17657 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
17658 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
17660 op1 = gen_reg_rtx (half_mode);
17661 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
17662 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
17664 use_vec_concat = true;
17666 break;
17668 case V8HImode:
17669 case V16QImode:
17670 case V4HImode:
17671 case V8QImode:
17672 break;
17674 default:
17675 gcc_unreachable ();
17678 if (use_vec_concat)
17680 if (!register_operand (op0, half_mode))
17681 op0 = force_reg (half_mode, op0);
17682 if (!register_operand (op1, half_mode))
17683 op1 = force_reg (half_mode, op1);
17685 emit_insn (gen_rtx_SET (VOIDmode, target,
17686 gen_rtx_VEC_CONCAT (mode, op0, op1)));
17688 else
17690 int i, j, n_elts, n_words, n_elt_per_word;
17691 enum machine_mode inner_mode;
17692 rtx words[4], shift;
17694 inner_mode = GET_MODE_INNER (mode);
17695 n_elts = GET_MODE_NUNITS (mode);
17696 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
17697 n_elt_per_word = n_elts / n_words;
17698 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
17700 for (i = 0; i < n_words; ++i)
17702 rtx word = NULL_RTX;
17704 for (j = 0; j < n_elt_per_word; ++j)
17706 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
17707 elt = convert_modes (word_mode, inner_mode, elt, true);
17709 if (j == 0)
17710 word = elt;
17711 else
17713 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
17714 word, 1, OPTAB_LIB_WIDEN);
17715 word = expand_simple_binop (word_mode, IOR, word, elt,
17716 word, 1, OPTAB_LIB_WIDEN);
17720 words[i] = word;
17723 if (n_words == 1)
17724 emit_move_insn (target, gen_lowpart (mode, words[0]));
17725 else if (n_words == 2)
17727 rtx tmp = gen_reg_rtx (mode);
17728 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
17729 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
17730 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
17731 emit_move_insn (target, tmp);
17733 else if (n_words == 4)
17735 rtx tmp = gen_reg_rtx (V4SImode);
17736 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
17737 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
17738 emit_move_insn (target, gen_lowpart (mode, tmp));
17740 else
17741 gcc_unreachable ();
17745 /* Initialize vector TARGET via VALS. Suppress the use of MMX
17746 instructions unless MMX_OK is true. */
17748 void
17749 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
17751 enum machine_mode mode = GET_MODE (target);
17752 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17753 int n_elts = GET_MODE_NUNITS (mode);
17754 int n_var = 0, one_var = -1;
17755 bool all_same = true, all_const_zero = true;
17756 int i;
17757 rtx x;
17759 for (i = 0; i < n_elts; ++i)
17761 x = XVECEXP (vals, 0, i);
17762 if (!CONSTANT_P (x))
17763 n_var++, one_var = i;
17764 else if (x != CONST0_RTX (inner_mode))
17765 all_const_zero = false;
17766 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
17767 all_same = false;
17770 /* Constants are best loaded from the constant pool. */
17771 if (n_var == 0)
17773 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
17774 return;
17777 /* If all values are identical, broadcast the value. */
17778 if (all_same
17779 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
17780 XVECEXP (vals, 0, 0)))
17781 return;
17783 /* Values where only one field is non-constant are best loaded from
17784 the pool and overwritten via move later. */
17785 if (n_var == 1)
17787 if (all_const_zero && one_var == 0
17788 && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
17789 XVECEXP (vals, 0, 0)))
17790 return;
17792 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
17793 return;
17796 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
17799 void
17800 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
17802 enum machine_mode mode = GET_MODE (target);
17803 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17804 bool use_vec_merge = false;
17805 rtx tmp;
17807 switch (mode)
17809 case V2SFmode:
17810 case V2SImode:
17811 if (mmx_ok)
17813 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
17814 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
17815 if (elt == 0)
17816 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
17817 else
17818 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
17819 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17820 return;
17822 break;
17824 case V2DFmode:
17825 case V2DImode:
17827 rtx op0, op1;
17829 /* For the two element vectors, we implement a VEC_CONCAT with
17830 the extraction of the other element. */
17832 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
17833 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
17835 if (elt == 0)
17836 op0 = val, op1 = tmp;
17837 else
17838 op0 = tmp, op1 = val;
17840 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
17841 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17843 return;
17845 case V4SFmode:
17846 switch (elt)
17848 case 0:
17849 use_vec_merge = true;
17850 break;
17852 case 1:
17853 /* tmp = target = A B C D */
17854 tmp = copy_to_reg (target);
17855 /* target = A A B B */
17856 emit_insn (gen_sse_unpcklps (target, target, target));
17857 /* target = X A B B */
17858 ix86_expand_vector_set (false, target, val, 0);
17859 /* target = A X C D */
17860 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17861 GEN_INT (1), GEN_INT (0),
17862 GEN_INT (2+4), GEN_INT (3+4)));
17863 return;
17865 case 2:
17866 /* tmp = target = A B C D */
17867 tmp = copy_to_reg (target);
17868 /* tmp = X B C D */
17869 ix86_expand_vector_set (false, tmp, val, 0);
17870 /* target = A B X D */
17871 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17872 GEN_INT (0), GEN_INT (1),
17873 GEN_INT (0+4), GEN_INT (3+4)));
17874 return;
17876 case 3:
17877 /* tmp = target = A B C D */
17878 tmp = copy_to_reg (target);
17879 /* tmp = X B C D */
17880 ix86_expand_vector_set (false, tmp, val, 0);
17881 /* target = A B X D */
17882 emit_insn (gen_sse_shufps_1 (target, target, tmp,
17883 GEN_INT (0), GEN_INT (1),
17884 GEN_INT (2+4), GEN_INT (0+4)));
17885 return;
17887 default:
17888 gcc_unreachable ();
17890 break;
17892 case V4SImode:
17893 /* Element 0 handled by vec_merge below. */
17894 if (elt == 0)
17896 use_vec_merge = true;
17897 break;
17900 if (TARGET_SSE2)
17902 /* With SSE2, use integer shuffles to swap element 0 and ELT,
17903 store into element 0, then shuffle them back. */
17905 rtx order[4];
17907 order[0] = GEN_INT (elt);
17908 order[1] = const1_rtx;
17909 order[2] = const2_rtx;
17910 order[3] = GEN_INT (3);
17911 order[elt] = const0_rtx;
17913 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17914 order[1], order[2], order[3]));
17916 ix86_expand_vector_set (false, target, val, 0);
17918 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17919 order[1], order[2], order[3]));
17921 else
17923 /* For SSE1, we have to reuse the V4SF code. */
17924 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
17925 gen_lowpart (SFmode, val), elt);
17927 return;
17929 case V8HImode:
17930 use_vec_merge = TARGET_SSE2;
17931 break;
17932 case V4HImode:
17933 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
17934 break;
17936 case V16QImode:
17937 case V8QImode:
17938 default:
17939 break;
17942 if (use_vec_merge)
17944 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
17945 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
17946 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17948 else
17950 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
17952 emit_move_insn (mem, target);
17954 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
17955 emit_move_insn (tmp, val);
17957 emit_move_insn (target, mem);
17961 void
17962 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
17964 enum machine_mode mode = GET_MODE (vec);
17965 enum machine_mode inner_mode = GET_MODE_INNER (mode);
17966 bool use_vec_extr = false;
17967 rtx tmp;
17969 switch (mode)
17971 case V2SImode:
17972 case V2SFmode:
17973 if (!mmx_ok)
17974 break;
17975 /* FALLTHRU */
17977 case V2DFmode:
17978 case V2DImode:
17979 use_vec_extr = true;
17980 break;
17982 case V4SFmode:
17983 switch (elt)
17985 case 0:
17986 tmp = vec;
17987 break;
17989 case 1:
17990 case 3:
17991 tmp = gen_reg_rtx (mode);
17992 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
17993 GEN_INT (elt), GEN_INT (elt),
17994 GEN_INT (elt+4), GEN_INT (elt+4)));
17995 break;
17997 case 2:
17998 tmp = gen_reg_rtx (mode);
17999 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18000 break;
18002 default:
18003 gcc_unreachable ();
18005 vec = tmp;
18006 use_vec_extr = true;
18007 elt = 0;
18008 break;
18010 case V4SImode:
18011 if (TARGET_SSE2)
18013 switch (elt)
18015 case 0:
18016 tmp = vec;
18017 break;
18019 case 1:
18020 case 3:
18021 tmp = gen_reg_rtx (mode);
18022 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18023 GEN_INT (elt), GEN_INT (elt),
18024 GEN_INT (elt), GEN_INT (elt)));
18025 break;
18027 case 2:
18028 tmp = gen_reg_rtx (mode);
18029 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18030 break;
18032 default:
18033 gcc_unreachable ();
18035 vec = tmp;
18036 use_vec_extr = true;
18037 elt = 0;
18039 else
18041 /* For SSE1, we have to reuse the V4SF code. */
18042 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18043 gen_lowpart (V4SFmode, vec), elt);
18044 return;
18046 break;
18048 case V8HImode:
18049 use_vec_extr = TARGET_SSE2;
18050 break;
18051 case V4HImode:
18052 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18053 break;
18055 case V16QImode:
18056 case V8QImode:
18057 /* ??? Could extract the appropriate HImode element and shift. */
18058 default:
18059 break;
18062 if (use_vec_extr)
18064 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18065 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18067 /* Let the rtl optimizers know about the zero extension performed. */
18068 if (inner_mode == HImode)
18070 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18071 target = gen_lowpart (SImode, target);
18074 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18076 else
18078 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18080 emit_move_insn (mem, vec);
18082 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18083 emit_move_insn (target, tmp);
18087 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
18088 pattern to reduce; DEST is the destination; IN is the input vector. */
18090 void
18091 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18093 rtx tmp1, tmp2, tmp3;
18095 tmp1 = gen_reg_rtx (V4SFmode);
18096 tmp2 = gen_reg_rtx (V4SFmode);
18097 tmp3 = gen_reg_rtx (V4SFmode);
18099 emit_insn (gen_sse_movhlps (tmp1, in, in));
18100 emit_insn (fn (tmp2, tmp1, in));
18102 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18103 GEN_INT (1), GEN_INT (1),
18104 GEN_INT (1+4), GEN_INT (1+4)));
18105 emit_insn (fn (dest, tmp2, tmp3));
18108 /* Implements target hook vector_mode_supported_p. */
18109 static bool
18110 ix86_vector_mode_supported_p (enum machine_mode mode)
18112 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18113 return true;
18114 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18115 return true;
18116 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18117 return true;
18118 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18119 return true;
18120 return false;
18123 /* Worker function for TARGET_MD_ASM_CLOBBERS.
18125 We do this in the new i386 backend to maintain source compatibility
18126 with the old cc0-based compiler. */
18128 static tree
18129 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18130 tree inputs ATTRIBUTE_UNUSED,
18131 tree clobbers)
18133 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18134 clobbers);
18135 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18136 clobbers);
18137 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18138 clobbers);
18139 return clobbers;
18142 /* Return true if this goes in small data/bss. */
18144 static bool
18145 ix86_in_large_data_p (tree exp)
18147 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18148 return false;
18150 /* Functions are never large data. */
18151 if (TREE_CODE (exp) == FUNCTION_DECL)
18152 return false;
18154 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18156 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18157 if (strcmp (section, ".ldata") == 0
18158 || strcmp (section, ".lbss") == 0)
18159 return true;
18160 return false;
18162 else
18164 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18166 /* If this is an incomplete type with size 0, then we can't put it
18167 in data because it might be too big when completed. */
18168 if (!size || size > ix86_section_threshold)
18169 return true;
18172 return false;
18174 static void
18175 ix86_encode_section_info (tree decl, rtx rtl, int first)
18177 default_encode_section_info (decl, rtl, first);
18179 if (TREE_CODE (decl) == VAR_DECL
18180 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18181 && ix86_in_large_data_p (decl))
18182 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
18185 /* Worker function for REVERSE_CONDITION. */
18187 enum rtx_code
18188 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
18190 return (mode != CCFPmode && mode != CCFPUmode
18191 ? reverse_condition (code)
18192 : reverse_condition_maybe_unordered (code));
18195 /* Output code to perform an x87 FP register move, from OPERANDS[1]
18196 to OPERANDS[0]. */
18198 const char *
18199 output_387_reg_move (rtx insn, rtx *operands)
18201 if (REG_P (operands[1])
18202 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18204 if (REGNO (operands[0]) == FIRST_STACK_REG
18205 && TARGET_USE_FFREEP)
18206 return "ffreep\t%y0";
18207 return "fstp\t%y0";
18209 if (STACK_TOP_P (operands[0]))
18210 return "fld%z1\t%y1";
18211 return "fst\t%y0";
18214 /* Output code to perform a conditional jump to LABEL, if C2 flag in
18215 FP status register is set. */
18217 void
18218 ix86_emit_fp_unordered_jump (rtx label)
18220 rtx reg = gen_reg_rtx (HImode);
18221 rtx temp;
18223 emit_insn (gen_x86_fnstsw_1 (reg));
18225 if (TARGET_USE_SAHF)
18227 emit_insn (gen_x86_sahf_1 (reg));
18229 temp = gen_rtx_REG (CCmode, FLAGS_REG);
18230 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
18232 else
18234 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
18236 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18237 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
18240 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
18241 gen_rtx_LABEL_REF (VOIDmode, label),
18242 pc_rtx);
18243 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
18244 emit_jump_insn (temp);
18247 /* Output code to perform a log1p XFmode calculation. */
18249 void ix86_emit_i387_log1p (rtx op0, rtx op1)
18251 rtx label1 = gen_label_rtx ();
18252 rtx label2 = gen_label_rtx ();
18254 rtx tmp = gen_reg_rtx (XFmode);
18255 rtx tmp2 = gen_reg_rtx (XFmode);
18257 emit_insn (gen_absxf2 (tmp, op1));
18258 emit_insn (gen_cmpxf (tmp,
18259 CONST_DOUBLE_FROM_REAL_VALUE (
18260 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
18261 XFmode)));
18262 emit_jump_insn (gen_bge (label1));
18264 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18265 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
18266 emit_jump (label2);
18268 emit_label (label1);
18269 emit_move_insn (tmp, CONST1_RTX (XFmode));
18270 emit_insn (gen_addxf3 (tmp, op1, tmp));
18271 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18272 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
18274 emit_label (label2);
18277 /* Solaris named-section hook. Parameters are as for
18278 named_section_real. */
18280 static void
18281 i386_solaris_elf_named_section (const char *name, unsigned int flags,
18282 tree decl)
18284 /* With Binutils 2.15, the "@unwind" marker must be specified on
18285 every occurrence of the ".eh_frame" section, not just the first
18286 one. */
18287 if (TARGET_64BIT
18288 && strcmp (name, ".eh_frame") == 0)
18290 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
18291 flags & SECTION_WRITE ? "aw" : "a");
18292 return;
18294 default_elf_asm_named_section (name, flags, decl);
18297 /* Return the mangling of TYPE if it is an extended fundamental type. */
18299 static const char *
18300 ix86_mangle_fundamental_type (tree type)
18302 switch (TYPE_MODE (type))
18304 case TFmode:
18305 /* __float128 is "g". */
18306 return "g";
18307 case XFmode:
18308 /* "long double" or __float80 is "e". */
18309 return "e";
18310 default:
18311 return NULL;
18315 /* For 32-bit code we can save PIC register setup by using
18316 __stack_chk_fail_local hidden function instead of calling
18317 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
18318 register, so it is better to call __stack_chk_fail directly. */
18320 static tree
18321 ix86_stack_protect_fail (void)
18323 return TARGET_64BIT
18324 ? default_external_stack_protect_fail ()
18325 : default_hidden_stack_protect_fail ();
18328 /* Select a format to encode pointers in exception handling data. CODE
18329 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
18330 true if the symbol may be affected by dynamic relocations.
18332 ??? All x86 object file formats are capable of representing this.
18333 After all, the relocation needed is the same as for the call insn.
18334 Whether or not a particular assembler allows us to enter such, I
18335 guess we'll have to see. */
18337 asm_preferred_eh_data_format (int code, int global)
18339 if (flag_pic)
18341 int type = DW_EH_PE_sdata8;
18342 if (!TARGET_64BIT
18343 || ix86_cmodel == CM_SMALL_PIC
18344 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
18345 type = DW_EH_PE_sdata4;
18346 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
18348 if (ix86_cmodel == CM_SMALL
18349 || (ix86_cmodel == CM_MEDIUM && code))
18350 return DW_EH_PE_udata4;
18351 return DW_EH_PE_absptr;
18354 #include "gt-i386.h"