1 /* Auxiliary functions for expand cpymem, setmem, cmpmem, load_multiple
2 and store_multiple pattern of Andes NDS32 cpu for GNU compiler
3 Copyright (C) 2012-2025 Free Software Foundation, Inc.
4 Contributed by Andes Technology Corporation.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* ------------------------------------------------------------------------ */
24 #define IN_TARGET_CODE 1
28 #include "coretypes.h"
38 #include "nds32-protos.h"
40 /* ------------------------------------------------------------------------ */
42 /* Auxiliary static function definitions. */
45 nds32_emit_load_store (rtx reg
, rtx mem
,
46 enum machine_mode mode
,
47 int offset
, bool load_p
)
50 new_mem
= adjust_address (mem
, mode
, offset
);
52 emit_move_insn (reg
, new_mem
);
54 emit_move_insn (new_mem
, reg
);
58 nds32_emit_post_inc_load_store (rtx reg
, rtx base_reg
,
59 enum machine_mode mode
,
62 gcc_assert (GET_MODE (reg
) == mode
);
63 gcc_assert (GET_MODE (base_reg
) == Pmode
);
65 /* Do not gen (set (reg) (mem (post_inc (reg)))) directly here since it may
66 not recognize by gcc, so let gcc combine it at auto_inc_dec pass. */
72 emit_move_insn (gen_rtx_MEM (mode
,
76 emit_move_insn (base_reg
,
77 plus_constant(Pmode
, base_reg
, GET_MODE_SIZE (mode
)));
81 nds32_emit_mem_move (rtx src
, rtx dst
,
82 enum machine_mode mode
,
85 gcc_assert (MEM_P (src
) && MEM_P (dst
));
86 rtx tmp_reg
= gen_reg_rtx (mode
);
87 nds32_emit_load_store (tmp_reg
, src
, mode
,
88 addr_offset
, /* load_p */ true);
89 nds32_emit_load_store (tmp_reg
, dst
, mode
,
90 addr_offset
, /* load_p */ false);
94 nds32_emit_mem_move_block (int base_regno
, int count
,
95 rtx
*dst_base_reg
, rtx
*dst_mem
,
96 rtx
*src_base_reg
, rtx
*src_mem
,
97 bool update_base_reg_p
)
101 emit_insn (nds32_expand_load_multiple (base_regno
, count
,
102 *src_base_reg
, *src_mem
,
103 update_base_reg_p
, &new_base_reg
));
104 if (update_base_reg_p
)
106 *src_base_reg
= new_base_reg
;
107 *src_mem
= gen_rtx_MEM (SImode
, *src_base_reg
);
110 emit_insn (nds32_expand_store_multiple (base_regno
, count
,
111 *dst_base_reg
, *dst_mem
,
112 update_base_reg_p
, &new_base_reg
));
114 if (update_base_reg_p
)
116 *dst_base_reg
= new_base_reg
;
117 *dst_mem
= gen_rtx_MEM (SImode
, *dst_base_reg
);
121 /* ------------------------------------------------------------------------ */
123 /* Auxiliary function for expand cpymem pattern. */
126 nds32_expand_cpymemsi_loop_unknown_size (rtx dstmem
, rtx srcmem
,
130 /* Emit loop version of cpymem.
132 andi $size_least_3_bit, $size, #~7
133 add $dst_end, $dst, $size
136 beqz $size_least_3_bit, .Lbyte_mode_entry ! Not large enough.
137 add $double_word_end, $dst, $size_least_3_bit
139 .Ldouble_word_mode_loop:
140 lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
141 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr
142 ! move will delete after register allocation
143 move $src_itr, $src_itr'
144 move $dst_itr, $dst_itr'
145 ! Not readch upper bound. Loop.
146 bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop
149 beq $dst_itr, $dst_end, .Lend_label
151 lbi.bi $tmp, [$src_itr], #1
152 sbi.bi $tmp, [$dst_itr], #1
153 ! Not readch upper bound. Loop.
154 bne $dst_itr, $dst_end, .Lbyte_mode_loop
157 rtx dst_base_reg
, src_base_reg
;
158 rtx dst_itr
, src_itr
;
159 rtx dstmem_m
, srcmem_m
, dst_itr_m
, src_itr_m
;
161 rtx size_least_3_bit
;
163 rtx double_word_mode_loop
, byte_mode_entry
, byte_mode_loop
, end_label
;
165 rtx mask_least_3_bit
;
167 bool align_to_4_bytes
= (INTVAL (alignment
) & 3) == 0;
169 if (TARGET_ISA_V3M
&& !align_to_4_bytes
)
172 if (TARGET_REDUCED_REGS
)
177 dst_itr
= gen_reg_rtx (Pmode
);
178 src_itr
= gen_reg_rtx (Pmode
);
179 dst_end
= gen_reg_rtx (Pmode
);
180 tmp
= gen_reg_rtx (QImode
);
181 mask_least_3_bit
= GEN_INT (~7);
183 double_word_mode_loop
= gen_label_rtx ();
184 byte_mode_entry
= gen_label_rtx ();
185 byte_mode_loop
= gen_label_rtx ();
186 end_label
= gen_label_rtx ();
188 dst_base_reg
= copy_to_mode_reg (Pmode
, XEXP (dstmem
, 0));
189 src_base_reg
= copy_to_mode_reg (Pmode
, XEXP (srcmem
, 0));
190 /* andi $size_least_3_bit, $size, #~7 */
191 size_least_3_bit
= expand_binop (SImode
, and_optab
, size
, mask_least_3_bit
,
192 NULL_RTX
, 0, OPTAB_WIDEN
);
193 /* add $dst_end, $dst, $size */
194 dst_end
= expand_binop (Pmode
, add_optab
, dst_base_reg
, size
,
195 NULL_RTX
, 0, OPTAB_WIDEN
);
197 /* move $dst_itr, $dst
198 move $src_itr, $src */
199 emit_move_insn (dst_itr
, dst_base_reg
);
200 emit_move_insn (src_itr
, src_base_reg
);
202 /* beqz $size_least_3_bit, .Lbyte_mode_entry ! Not large enough. */
203 emit_cmp_and_jump_insns (size_least_3_bit
, const0_rtx
, EQ
, NULL
,
204 SImode
, 1, byte_mode_entry
);
205 /* add $double_word_end, $dst, $size_least_3_bit */
206 double_word_end
= expand_binop (Pmode
, add_optab
,
207 dst_base_reg
, size_least_3_bit
,
208 NULL_RTX
, 0, OPTAB_WIDEN
);
210 /* .Ldouble_word_mode_loop: */
211 emit_label (double_word_mode_loop
);
212 /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
213 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */
218 nds32_emit_mem_move_block (start_regno
, 2,
219 &dst_itr_m
, &dstmem_m
,
220 &src_itr_m
, &srcmem_m
,
222 /* move $src_itr, $src_itr'
223 move $dst_itr, $dst_itr' */
224 emit_move_insn (dst_itr
, dst_itr_m
);
225 emit_move_insn (src_itr
, src_itr_m
);
227 /* ! Not readch upper bound. Loop.
228 bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
229 emit_cmp_and_jump_insns (double_word_end
, dst_itr
, NE
, NULL
,
230 Pmode
, 1, double_word_mode_loop
);
231 /* .Lbyte_mode_entry: */
232 emit_label (byte_mode_entry
);
234 /* beq $dst_itr, $dst_end, .Lend_label */
235 emit_cmp_and_jump_insns (dst_itr
, dst_end
, EQ
, NULL
,
236 Pmode
, 1, end_label
);
237 /* .Lbyte_mode_loop: */
238 emit_label (byte_mode_loop
);
240 /* lbi.bi $tmp, [$src_itr], #1 */
241 nds32_emit_post_inc_load_store (tmp
, src_itr
, QImode
, true);
243 /* sbi.bi $tmp, [$dst_itr], #1 */
244 nds32_emit_post_inc_load_store (tmp
, dst_itr
, QImode
, false);
245 /* ! Not readch upper bound. Loop.
246 bne $dst_itr, $dst_end, .Lbyte_mode_loop */
247 emit_cmp_and_jump_insns (dst_itr
, dst_end
, NE
, NULL
,
248 SImode
, 1, byte_mode_loop
);
251 emit_label (end_label
);
257 nds32_expand_cpymemsi_loop_known_size (rtx dstmem
, rtx srcmem
,
258 rtx size
, rtx alignment
)
260 rtx dst_base_reg
, src_base_reg
;
261 rtx dst_itr
, src_itr
;
262 rtx dstmem_m
, srcmem_m
, dst_itr_m
, src_itr_m
;
264 rtx double_word_mode_loop
, byte_mode_loop
;
267 bool align_to_4_bytes
= (INTVAL (alignment
) & 3) == 0;
268 unsigned HOST_WIDE_INT total_bytes
= UINTVAL (size
);
270 if (TARGET_ISA_V3M
&& !align_to_4_bytes
)
273 if (TARGET_REDUCED_REGS
)
278 dst_itr
= gen_reg_rtx (Pmode
);
279 src_itr
= gen_reg_rtx (Pmode
);
280 dst_end
= gen_reg_rtx (Pmode
);
281 tmp
= gen_reg_rtx (QImode
);
283 double_word_mode_loop
= gen_label_rtx ();
284 byte_mode_loop
= gen_label_rtx ();
286 dst_base_reg
= copy_to_mode_reg (Pmode
, XEXP (dstmem
, 0));
287 src_base_reg
= copy_to_mode_reg (Pmode
, XEXP (srcmem
, 0));
291 /* Emit total_bytes less than 8 loop version of cpymem.
292 add $dst_end, $dst, $size
295 lbi.bi $tmp, [$src_itr], #1
296 sbi.bi $tmp, [$dst_itr], #1
297 ! Not readch upper bound. Loop.
298 bne $dst_itr, $dst_end, .Lbyte_mode_loop */
300 /* add $dst_end, $dst, $size */
301 dst_end
= expand_binop (Pmode
, add_optab
, dst_base_reg
, size
,
302 NULL_RTX
, 0, OPTAB_WIDEN
);
303 /* move $dst_itr, $dst
304 move $src_itr, $src */
305 emit_move_insn (dst_itr
, dst_base_reg
);
306 emit_move_insn (src_itr
, src_base_reg
);
308 /* .Lbyte_mode_loop: */
309 emit_label (byte_mode_loop
);
311 /* lbi.bi $tmp, [$src_itr], #1 */
312 nds32_emit_post_inc_load_store (tmp
, src_itr
, QImode
, true);
314 /* sbi.bi $tmp, [$dst_itr], #1 */
315 nds32_emit_post_inc_load_store (tmp
, dst_itr
, QImode
, false);
316 /* ! Not readch upper bound. Loop.
317 bne $dst_itr, $dst_end, .Lbyte_mode_loop */
318 emit_cmp_and_jump_insns (dst_itr
, dst_end
, NE
, NULL
,
319 SImode
, 1, byte_mode_loop
);
322 else if (total_bytes
% 8 == 0)
324 /* Emit multiple of 8 loop version of cpymem.
326 add $dst_end, $dst, $size
330 .Ldouble_word_mode_loop:
331 lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
332 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr
333 ! move will delete after register allocation
334 move $src_itr, $src_itr'
335 move $dst_itr, $dst_itr'
336 ! Not readch upper bound. Loop.
337 bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
339 /* add $dst_end, $dst, $size */
340 dst_end
= expand_binop (Pmode
, add_optab
, dst_base_reg
, size
,
341 NULL_RTX
, 0, OPTAB_WIDEN
);
343 /* move $dst_itr, $dst
344 move $src_itr, $src */
345 emit_move_insn (dst_itr
, dst_base_reg
);
346 emit_move_insn (src_itr
, src_base_reg
);
348 /* .Ldouble_word_mode_loop: */
349 emit_label (double_word_mode_loop
);
350 /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
351 smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */
356 nds32_emit_mem_move_block (start_regno
, 2,
357 &dst_itr_m
, &dstmem_m
,
358 &src_itr_m
, &srcmem_m
,
360 /* move $src_itr, $src_itr'
361 move $dst_itr, $dst_itr' */
362 emit_move_insn (dst_itr
, dst_itr_m
);
363 emit_move_insn (src_itr
, src_itr_m
);
365 /* ! Not readch upper bound. Loop.
366 bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
367 emit_cmp_and_jump_insns (dst_end
, dst_itr
, NE
, NULL
,
368 Pmode
, 1, double_word_mode_loop
);
372 /* Handle size greater than 8, and not a multiple of 8. */
373 return nds32_expand_cpymemsi_loop_unknown_size (dstmem
, srcmem
,
381 nds32_expand_cpymemsi_loop (rtx dstmem
, rtx srcmem
,
382 rtx size
, rtx alignment
)
384 if (CONST_INT_P (size
))
385 return nds32_expand_cpymemsi_loop_known_size (dstmem
, srcmem
,
388 return nds32_expand_cpymemsi_loop_unknown_size (dstmem
, srcmem
,
393 nds32_expand_cpymemsi_unroll (rtx dstmem
, rtx srcmem
,
394 rtx total_bytes
, rtx alignment
)
396 rtx dst_base_reg
, src_base_reg
;
399 int maximum_bytes_per_inst
;
403 HOST_WIDE_INT remain_bytes
, remain_words
;
404 bool align_to_4_bytes
= (INTVAL (alignment
) & 3) == 0;
405 bool align_to_2_bytes
= (INTVAL (alignment
) & 1) == 0;
407 /* Because reduced-set regsiters has few registers
408 (r0~r5, r6~10, r15, r28~r31, where 'r15' and 'r28~r31'
409 cannot be used for register allocation),
410 using 8 registers (32 bytes) for moving memory block
411 may easily consume all of them.
412 It makes register allocation/spilling hard to work.
413 So we only allow maximum=4 registers (16 bytes) for
414 moving memory block under reduced-set registers. */
415 if (TARGET_REDUCED_REGS
)
423 /* $r25 is $tp so we use up to 8 registers. */
428 maximum_bytes_per_inst
= maximum_regs
* UNITS_PER_WORD
;
430 /* 1. Total_bytes is integer for sure.
431 2. Alignment is integer for sure.
432 3. Maximum 4 or 10 registers and up to 4 instructions,
433 4 * 4 * 4 = 64 bytes, 8 * 4 * 10 = 160 bytes.
434 4. The dstmem cannot be volatile memory access.
435 5. The srcmem cannot be volatile memory access.
436 6. Known shared alignment not align to 4 byte in v3m since lmw/smw *NOT*
437 support unalign access with v3m configure. */
438 if (GET_CODE (total_bytes
) != CONST_INT
439 || GET_CODE (alignment
) != CONST_INT
440 || INTVAL (total_bytes
) > maximum_bytes
441 || MEM_VOLATILE_P (dstmem
)
442 || MEM_VOLATILE_P (srcmem
)
443 || (TARGET_ISA_V3M
&& !align_to_4_bytes
))
446 dst_base_reg
= copy_to_mode_reg (SImode
, XEXP (dstmem
, 0));
447 src_base_reg
= copy_to_mode_reg (SImode
, XEXP (srcmem
, 0));
448 remain_bytes
= INTVAL (total_bytes
);
450 /* Do not update base address for last lmw/smw pair. */
451 inst_num
= ((INTVAL (total_bytes
) + (maximum_bytes_per_inst
- 1))
452 / maximum_bytes_per_inst
) - 1;
454 for (i
= 0; i
< inst_num
; i
++)
456 nds32_emit_mem_move_block (start_regno
, maximum_regs
,
457 &dst_base_reg
, &dstmem
,
458 &src_base_reg
, &srcmem
,
461 remain_bytes
-= maximum_bytes_per_inst
* inst_num
;
463 remain_words
= remain_bytes
/ UNITS_PER_WORD
;
464 remain_bytes
= remain_bytes
- (remain_words
* UNITS_PER_WORD
);
466 if (remain_words
!= 0)
468 if (remain_bytes
!= 0)
469 nds32_emit_mem_move_block (start_regno
, remain_words
,
470 &dst_base_reg
, &dstmem
,
471 &src_base_reg
, &srcmem
,
475 /* Do not update address if no further byte to move. */
476 if (remain_words
== 1)
478 /* emit move instruction if align to 4 byte and only 1
480 if (align_to_4_bytes
)
481 nds32_emit_mem_move (srcmem
, dstmem
, SImode
, 0);
484 tmp_reg
= gen_reg_rtx (SImode
);
486 gen_unaligned_load_w (tmp_reg
,
487 gen_rtx_MEM (SImode
, src_base_reg
)));
489 gen_unaligned_store_w (gen_rtx_MEM (SImode
, dst_base_reg
),
494 nds32_emit_mem_move_block (start_regno
, remain_words
,
495 &dst_base_reg
, &dstmem
,
496 &src_base_reg
, &srcmem
,
501 switch (remain_bytes
)
506 if (align_to_2_bytes
)
507 nds32_emit_mem_move (srcmem
, dstmem
, HImode
, 0);
510 nds32_emit_mem_move (srcmem
, dstmem
, QImode
, 0);
511 nds32_emit_mem_move (srcmem
, dstmem
, QImode
, 1);
514 if (remain_bytes
== 3)
515 nds32_emit_mem_move (srcmem
, dstmem
, QImode
, 2);
519 nds32_emit_mem_move (srcmem
, dstmem
, QImode
, 0);
527 /* Successfully create patterns, return true. */
531 /* Function to move block memory content by
532 using load_multiple and store_multiple.
533 This is auxiliary extern function to help create rtx template.
534 Check nds32-multiple.md file for the patterns. */
536 nds32_expand_cpymemsi (rtx dstmem
, rtx srcmem
, rtx total_bytes
, rtx alignment
)
538 if (nds32_expand_cpymemsi_unroll (dstmem
, srcmem
, total_bytes
, alignment
))
541 if (!optimize_size
&& optimize
> 2)
542 return nds32_expand_cpymemsi_loop (dstmem
, srcmem
, total_bytes
, alignment
);
547 /* ------------------------------------------------------------------------ */
549 /* Auxiliary function for expand setmem pattern. */
552 nds32_gen_dup_4_byte_to_word_value_aux (rtx value
, rtx value4word
)
554 gcc_assert (GET_MODE (value
) == QImode
|| CONST_INT_P (value
));
556 if (CONST_INT_P (value
))
558 unsigned HOST_WIDE_INT val
= UINTVAL (value
) & GET_MODE_MASK(QImode
);
559 rtx new_val
= gen_int_mode (val
| (val
<< 8)
560 | (val
<< 16) | (val
<< 24), SImode
);
561 /* Just calculate at here if it's constant value. */
562 emit_move_insn (value4word
, new_val
);
566 if (NDS32_EXT_DSP_P ())
569 insb $tmp, $value, 1 ! $tmp <- 0x0000abab
570 pkbb16 $tmp6, $tmp2, $tmp2 ! $value4word <- 0xabababab */
571 rtx tmp
= gen_reg_rtx (SImode
);
573 convert_move (tmp
, value
, true);
576 gen_insvsi_internal (tmp
, gen_int_mode (0x8, SImode
), tmp
));
578 emit_insn (gen_pkbbsi_1 (value4word
, tmp
, tmp
));
583 andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab
584 slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00
585 or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab
586 slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000
587 or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */
589 rtx tmp1
, tmp2
, tmp3
, tmp4
;
590 tmp1
= expand_binop (SImode
, and_optab
, value
,
591 gen_int_mode (0xff, SImode
),
592 NULL_RTX
, 0, OPTAB_WIDEN
);
593 tmp2
= expand_binop (SImode
, ashl_optab
, tmp1
,
594 gen_int_mode (8, SImode
),
595 NULL_RTX
, 0, OPTAB_WIDEN
);
596 tmp3
= expand_binop (SImode
, ior_optab
, tmp1
, tmp2
,
597 NULL_RTX
, 0, OPTAB_WIDEN
);
598 tmp4
= expand_binop (SImode
, ashl_optab
, tmp3
,
599 gen_int_mode (16, SImode
),
600 NULL_RTX
, 0, OPTAB_WIDEN
);
602 emit_insn (gen_iorsi3 (value4word
, tmp3
, tmp4
));
610 nds32_gen_dup_4_byte_to_word_value (rtx value
)
612 rtx value4word
= gen_reg_rtx (SImode
);
613 nds32_gen_dup_4_byte_to_word_value_aux (value
, value4word
);
619 nds32_gen_dup_8_byte_to_double_word_value (rtx value
)
621 rtx value4doubleword
= gen_reg_rtx (DImode
);
623 nds32_gen_dup_4_byte_to_word_value_aux (
624 value
, nds32_di_low_part_subreg(value4doubleword
));
626 emit_move_insn (nds32_di_high_part_subreg(value4doubleword
),
627 nds32_di_low_part_subreg(value4doubleword
));
628 return value4doubleword
;
633 emit_setmem_doubleword_loop (rtx itr
, rtx size
, rtx value
)
635 rtx word_mode_label
= gen_label_rtx ();
636 rtx word_mode_end_label
= gen_label_rtx ();
637 rtx byte_mode_size
= gen_reg_rtx (SImode
);
638 rtx byte_mode_size_tmp
= gen_reg_rtx (SImode
);
639 rtx word_mode_end
= gen_reg_rtx (SImode
);
640 rtx size_for_word
= gen_reg_rtx (SImode
);
642 /* and $size_for_word, $size, #~0x7 */
643 size_for_word
= expand_binop (SImode
, and_optab
, size
,
644 gen_int_mode (~0x7, SImode
),
645 NULL_RTX
, 0, OPTAB_WIDEN
);
647 emit_move_insn (byte_mode_size
, size
);
649 /* beqz $size_for_word, .Lbyte_mode_entry */
650 emit_cmp_and_jump_insns (size_for_word
, const0_rtx
, EQ
, NULL
,
651 SImode
, 1, word_mode_end_label
);
652 /* add $word_mode_end, $dst, $size_for_word */
653 word_mode_end
= expand_binop (Pmode
, add_optab
, itr
, size_for_word
,
654 NULL_RTX
, 0, OPTAB_WIDEN
);
656 /* andi $byte_mode_size, $size, 0x7 */
657 byte_mode_size_tmp
= expand_binop (SImode
, and_optab
, size
, GEN_INT (0x7),
658 NULL_RTX
, 0, OPTAB_WIDEN
);
660 emit_move_insn (byte_mode_size
, byte_mode_size_tmp
);
663 emit_label (word_mode_label
);
664 /* ! word-mode set loop
665 smw.bim $value4word, [$dst_itr], $value4word, 0
666 bne $word_mode_end, $dst_itr, .Lword_mode */
667 emit_insn (gen_unaligned_store_update_base_dw (itr
,
670 emit_cmp_and_jump_insns (word_mode_end
, itr
, NE
, NULL
,
671 Pmode
, 1, word_mode_label
);
673 emit_label (word_mode_end_label
);
675 return byte_mode_size
;
679 emit_setmem_byte_loop (rtx itr
, rtx size
, rtx value
, bool need_end
)
681 rtx end
= gen_reg_rtx (Pmode
);
682 rtx byte_mode_label
= gen_label_rtx ();
683 rtx end_label
= gen_label_rtx ();
685 value
= force_reg (QImode
, value
);
688 end
= expand_binop (Pmode
, add_optab
, itr
, size
,
689 NULL_RTX
, 0, OPTAB_WIDEN
);
690 /* beqz $byte_mode_size, .Lend
691 add $byte_mode_end, $dst_itr, $byte_mode_size */
692 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL
,
693 SImode
, 1, end_label
);
696 end
= expand_binop (Pmode
, add_optab
, itr
, size
,
697 NULL_RTX
, 0, OPTAB_WIDEN
);
700 emit_label (byte_mode_label
);
702 /* ! byte-mode set loop
703 sbi.bi $value, [$dst_itr] ,1
704 bne $byte_mode_end, $dst_itr, .Lbyte_mode */
705 nds32_emit_post_inc_load_store (value
, itr
, QImode
, false);
707 emit_cmp_and_jump_insns (end
, itr
, NE
, NULL
,
708 Pmode
, 1, byte_mode_label
);
710 emit_label (end_label
);
719 nds32_expand_setmem_loop (rtx dstmem
, rtx size
, rtx value
)
721 rtx value4doubleword
;
726 /* Emit loop version of setmem.
729 andi $tmp1, $val, 0xff ! $tmp1 <- 0x000000ab
730 slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00
731 or $tmp3, $val, $tmp2 ! $tmp3 <- 0x0000abab
732 slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000
733 or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab
735 and $size_for_word, $size, #-4
736 beqz $size_for_word, .Lword_mode_end
738 add $word_mode_end, $dst, $size_for_word
739 andi $byte_mode_size, $size, 3
743 smw.bim $value4word, [$dst], $value4word, 0
744 bne $word_mode_end, $dst, .Lword_mode
747 beqz $byte_mode_size, .Lend
748 add $byte_mode_end, $dst, $byte_mode_size
752 sbi.bi $value4word, [$dst] ,1
753 bne $byte_mode_end, $dst, .Lbyte_mode
756 dst
= copy_to_mode_reg (SImode
, XEXP (dstmem
, 0));
759 andi $tmp1, $value, 0xff ! $tmp1 <- 0x000000ab
760 slli $tmp2, $tmp1, 8 ! $tmp2 <- 0x0000ab00
761 or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab
762 slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000
763 or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */
764 value4doubleword
= nds32_gen_dup_8_byte_to_double_word_value (value
);
766 /* and $size_for_word, $size, #-4
767 beqz $size_for_word, .Lword_mode_end
769 add $word_mode_end, $dst, $size_for_word
770 andi $byte_mode_size, $size, 3
774 smw.bim $value4word, [$dst], $value4word, 0
775 bne $word_mode_end, $dst, .Lword_mode
777 byte_mode_size
= emit_setmem_doubleword_loop (dst
, size
, value4doubleword
);
779 /* beqz $byte_mode_size, .Lend
780 add $byte_mode_end, $dst, $byte_mode_size
784 sbi.bi $value, [$dst] ,1
785 bne $byte_mode_end, $dst, .Lbyte_mode
788 value4byte
= simplify_gen_subreg (QImode
, value4doubleword
, DImode
,
789 subreg_lowpart_offset (QImode
, DImode
));
791 emit_setmem_byte_loop (dst
, byte_mode_size
, value4byte
, false);
797 nds32_expand_setmem_loop_v3m (rtx dstmem
, rtx size
, rtx value
)
799 rtx base_reg
= copy_to_mode_reg (Pmode
, XEXP (dstmem
, 0));
800 rtx need_align_bytes
= gen_reg_rtx (SImode
);
801 rtx last_2_bit
= gen_reg_rtx (SImode
);
802 rtx byte_loop_base
= gen_reg_rtx (SImode
);
803 rtx byte_loop_size
= gen_reg_rtx (SImode
);
804 rtx remain_size
= gen_reg_rtx (SImode
);
806 rtx value4byte
, value4doubleword
;
808 rtx last_byte_loop_label
= gen_label_rtx ();
810 size
= force_reg (SImode
, size
);
812 value4doubleword
= nds32_gen_dup_8_byte_to_double_word_value (value
);
813 value4byte
= simplify_gen_subreg (QImode
, value4doubleword
, DImode
,
814 subreg_lowpart_offset (QImode
, DImode
));
816 emit_move_insn (byte_loop_size
, size
);
817 emit_move_insn (byte_loop_base
, base_reg
);
819 /* Jump to last byte loop if size is less than 16. */
820 emit_cmp_and_jump_insns (size
, gen_int_mode (16, SImode
), LE
, NULL
,
821 SImode
, 1, last_byte_loop_label
);
823 /* Make sure align to 4 byte first since v3m can't unalign access. */
824 emit_insn (gen_andsi3 (last_2_bit
,
826 gen_int_mode (0x3, SImode
)));
828 emit_insn (gen_subsi3 (need_align_bytes
,
829 gen_int_mode (4, SImode
),
832 /* Align to 4 byte. */
833 new_base_reg
= emit_setmem_byte_loop (base_reg
,
838 /* Calculate remain size. */
839 emit_insn (gen_subsi3 (remain_size
, size
, need_align_bytes
));
841 /* Set memory word by word. */
842 byte_mode_size
= emit_setmem_doubleword_loop (new_base_reg
,
846 emit_move_insn (byte_loop_base
, new_base_reg
);
847 emit_move_insn (byte_loop_size
, byte_mode_size
);
849 emit_label (last_byte_loop_label
);
851 /* And set memory for remain bytes. */
852 emit_setmem_byte_loop (byte_loop_base
, byte_loop_size
, value4byte
, false);
857 nds32_expand_setmem_unroll (rtx dstmem
, rtx size
, rtx value
,
858 rtx align ATTRIBUTE_UNUSED
,
859 rtx expected_align ATTRIBUTE_UNUSED
,
860 rtx expected_size ATTRIBUTE_UNUSED
)
862 unsigned maximum_regs
, maximum_bytes
, start_regno
, regno
;
864 rtx dst_base_reg
, new_base_reg
;
865 unsigned HOST_WIDE_INT remain_bytes
, remain_words
, prepare_regs
, fill_per_smw
;
866 unsigned HOST_WIDE_INT real_size
;
868 if (TARGET_REDUCED_REGS
)
881 real_size
= UINTVAL (size
) & GET_MODE_MASK(SImode
);
883 if (!(CONST_INT_P (size
) && real_size
<= maximum_bytes
))
886 remain_bytes
= real_size
;
888 gcc_assert (GET_MODE (value
) == QImode
|| CONST_INT_P (value
));
890 value4word
= nds32_gen_dup_4_byte_to_word_value (value
);
892 prepare_regs
= remain_bytes
/ UNITS_PER_WORD
;
894 dst_base_reg
= copy_to_mode_reg (SImode
, XEXP (dstmem
, 0));
896 if (prepare_regs
> maximum_regs
)
897 prepare_regs
= maximum_regs
;
899 fill_per_smw
= prepare_regs
* UNITS_PER_WORD
;
902 switch (prepare_regs
)
907 rtx reg0
= gen_rtx_REG (SImode
, regno
);
908 rtx reg1
= gen_rtx_REG (SImode
, regno
+1);
909 unsigned last_regno
= start_regno
+ prepare_regs
- 1;
911 emit_move_insn (reg0
, value4word
);
912 emit_move_insn (reg1
, value4word
);
913 rtx regd
= gen_rtx_REG (DImode
, regno
);
916 /* Try to utilize movd44! */
917 while (regno
<= last_regno
)
919 if ((regno
+ 1) <=last_regno
)
921 rtx reg
= gen_rtx_REG (DImode
, regno
);
922 emit_move_insn (reg
, regd
);
927 rtx reg
= gen_rtx_REG (SImode
, regno
);
928 emit_move_insn (reg
, reg0
);
936 rtx reg
= gen_rtx_REG (SImode
, regno
++);
937 emit_move_insn (reg
, value4word
);
945 for (;remain_bytes
>= fill_per_smw
;remain_bytes
-= fill_per_smw
)
947 emit_insn (nds32_expand_store_multiple (start_regno
, prepare_regs
,
948 dst_base_reg
, dstmem
,
949 true, &new_base_reg
));
950 dst_base_reg
= new_base_reg
;
951 dstmem
= gen_rtx_MEM (SImode
, dst_base_reg
);
954 remain_words
= remain_bytes
/ UNITS_PER_WORD
;
958 emit_insn (nds32_expand_store_multiple (start_regno
, remain_words
,
959 dst_base_reg
, dstmem
,
960 true, &new_base_reg
));
961 dst_base_reg
= new_base_reg
;
962 dstmem
= gen_rtx_MEM (SImode
, dst_base_reg
);
965 remain_bytes
= remain_bytes
- (remain_words
* UNITS_PER_WORD
);
969 value
= simplify_gen_subreg (QImode
, value4word
, SImode
,
970 subreg_lowpart_offset(QImode
, SImode
));
972 for (;remain_bytes
;--remain_bytes
, ++offset
)
974 nds32_emit_load_store (value
, dstmem
, QImode
, offset
, false);
982 nds32_expand_setmem (rtx dstmem
, rtx size
, rtx value
, rtx align
,
986 bool align_to_4_bytes
= (INTVAL (align
) & 3) == 0;
988 /* Only expand at O3 */
989 if (optimize_size
|| optimize
< 3)
992 if (TARGET_ISA_V3M
&& !align_to_4_bytes
)
993 return nds32_expand_setmem_loop_v3m (dstmem
, size
, value
);
995 if (nds32_expand_setmem_unroll (dstmem
, size
, value
,
996 align
, expected_align
, expected_size
))
999 return nds32_expand_setmem_loop (dstmem
, size
, value
);
1002 /* ------------------------------------------------------------------------ */
1004 /* Auxiliary function for expand strlen pattern. */
1007 nds32_expand_strlen (rtx result
, rtx str
,
1008 rtx target_char
, rtx align ATTRIBUTE_UNUSED
)
1010 rtx base_reg
, backup_base_reg
;
1012 rtx target_char_ptr
, length
;
1013 rtx loop_label
, tmp
;
1015 if (optimize_size
|| optimize
< 3)
1018 gcc_assert (MEM_P (str
));
1019 gcc_assert (CONST_INT_P (target_char
) || REG_P (target_char
));
1021 base_reg
= copy_to_mode_reg (SImode
, XEXP (str
, 0));
1022 loop_label
= gen_label_rtx ();
1024 ffb_result
= gen_reg_rtx (Pmode
);
1025 tmp
= gen_reg_rtx (SImode
);
1026 backup_base_reg
= gen_reg_rtx (SImode
);
1028 /* Emit loop version of strlen.
1029 move $backup_base, $base
1031 lmw.bim $tmp, [$base], $tmp, 0
1032 ffb $ffb_result, $tmp, $target_char ! is there $target_char?
1033 beqz $ffb_result, .Lloop
1034 add $last_char_ptr, $base, $ffb_result
1035 sub $length, $last_char_ptr, $backup_base */
1037 /* move $backup_base, $base */
1038 emit_move_insn (backup_base_reg
, base_reg
);
1041 emit_label (loop_label
);
1042 /* lmw.bim $tmp, [$base], $tmp, 0 */
1043 emit_insn (gen_unaligned_load_update_base_w (base_reg
, tmp
, base_reg
));
1045 /* ffb $ffb_result, $tmp, $target_char ! is there $target_char? */
1046 emit_insn (gen_unspec_ffb (ffb_result
, tmp
, target_char
));
1048 /* beqz $ffb_result, .Lloop */
1049 emit_cmp_and_jump_insns (ffb_result
, const0_rtx
, EQ
, NULL
,
1050 SImode
, 1, loop_label
);
1052 /* add $target_char_ptr, $base, $ffb_result */
1053 target_char_ptr
= expand_binop (Pmode
, add_optab
, base_reg
,
1054 ffb_result
, NULL_RTX
, 0, OPTAB_WIDEN
);
1056 /* sub $length, $target_char_ptr, $backup_base */
1057 length
= expand_binop (Pmode
, sub_optab
, target_char_ptr
,
1058 backup_base_reg
, NULL_RTX
, 0, OPTAB_WIDEN
);
1060 emit_move_insn (result
, length
);
1065 /* ------------------------------------------------------------------------ */
1067 /* Functions to expand load_multiple and store_multiple.
1068 They are auxiliary extern functions to help create rtx template.
1069 Check nds32-multiple.md file for the patterns. */
1071 nds32_expand_load_multiple (int base_regno
, int count
,
1072 rtx base_addr
, rtx basemem
,
1073 bool update_base_reg_p
,
1074 rtx
*update_base_reg
)
1080 rtx new_addr
, mem
, reg
;
1082 /* Generate a unaligned load to prevent load instruction pull out from
1083 parallel, and then it will generate lwi, and lose unaligned acces */
1086 reg
= gen_rtx_REG (SImode
, base_regno
);
1087 if (update_base_reg_p
)
1089 *update_base_reg
= gen_reg_rtx (SImode
);
1090 return gen_unaligned_load_update_base_w (*update_base_reg
, reg
, base_addr
);
1093 return gen_unaligned_load_w (reg
, gen_rtx_MEM (SImode
, base_addr
));
1096 /* Create the pattern that is presented in nds32-multiple.md. */
1097 if (update_base_reg_p
)
1099 result
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
+ 1));
1104 result
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
1108 if (update_base_reg_p
)
1111 new_addr
= plus_constant (Pmode
, base_addr
, offset
);
1112 *update_base_reg
= gen_reg_rtx (SImode
);
1114 XVECEXP (result
, 0, 0) = gen_rtx_SET (*update_base_reg
, new_addr
);
1117 for (par_index
= 0; par_index
< count
; par_index
++)
1119 offset
= par_index
* 4;
1120 /* 4-byte for loading data to each register. */
1121 new_addr
= plus_constant (Pmode
, base_addr
, offset
);
1122 mem
= adjust_automodify_address_nv (basemem
, SImode
,
1124 reg
= gen_rtx_REG (SImode
, base_regno
+ par_index
);
1126 XVECEXP (result
, 0, (par_index
+ start_idx
)) = gen_rtx_SET (reg
, mem
);
1133 nds32_expand_store_multiple (int base_regno
, int count
,
1134 rtx base_addr
, rtx basemem
,
1135 bool update_base_reg_p
,
1136 rtx
*update_base_reg
)
1142 rtx new_addr
, mem
, reg
;
1146 reg
= gen_rtx_REG (SImode
, base_regno
);
1147 if (update_base_reg_p
)
1149 *update_base_reg
= gen_reg_rtx (SImode
);
1150 return gen_unaligned_store_update_base_w (*update_base_reg
, base_addr
, reg
);
1153 return gen_unaligned_store_w (gen_rtx_MEM (SImode
, base_addr
), reg
);
1156 /* Create the pattern that is presented in nds32-multiple.md. */
1158 if (update_base_reg_p
)
1160 result
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
+ 1));
1165 result
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
1169 if (update_base_reg_p
)
1172 new_addr
= plus_constant (Pmode
, base_addr
, offset
);
1173 *update_base_reg
= gen_reg_rtx (SImode
);
1175 XVECEXP (result
, 0, 0) = gen_rtx_SET (*update_base_reg
, new_addr
);
1178 for (par_index
= 0; par_index
< count
; par_index
++)
1180 offset
= par_index
* 4;
1181 /* 4-byte for storing data to memory. */
1182 new_addr
= plus_constant (Pmode
, base_addr
, offset
);
1183 mem
= adjust_automodify_address_nv (basemem
, SImode
,
1185 reg
= gen_rtx_REG (SImode
, base_regno
+ par_index
);
1187 XVECEXP (result
, 0, par_index
+ start_idx
) = gen_rtx_SET (mem
, reg
);
1193 /* ------------------------------------------------------------------------ */