1 /* memcpy for the Visium processor.
3 Copyright (c) 2015 Rolls-Royce Controls and Data Services Limited.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 * Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 * Neither the name of Rolls-Royce Controls and Data Services Limited nor
15 the names of its contributors may be used to endorse or promote products
16 derived from this software without specific prior written permission.
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28 THE POSSIBILITY OF SUCH DAMAGE. */
30 /* This file must be kept in sync with libgcc/config/visium/memcpy.c */
34 #include "../../string/local.h"
36 #define INST_BARRIER __asm__ __volatile__ ("":::"memory");
38 #define MOVE_32_OBJECTS(in,out) \
117 #define MOVE_16_OBJECTS(in,out) \
160 #define MOVE_12_OBJECTS(in,out) \
194 #define MOVE_11_OBJECTS(in,out) \
226 #define MOVE_10_OBJECTS(in,out) \
255 #define MOVE_9_OBJECTS(in,out) \
282 #define MOVE_8_OBJECTS(in,out) \
307 #define MOVE_7_OBJECTS(in,out) \
330 #define MOVE_6_OBJECTS(in,out) \
352 #define MOVE_5_OBJECTS(in,out) \
372 #define MOVE_4_OBJECTS(in,out) \
388 #define MOVE_3_OBJECTS(in,out) \
402 #define MOVE_2_OBJECTS(in,out) \
414 #define MOVE_1_OBJECT(in,out) \
426 __inhibit_loop_to_libcall
427 __int_memcpy (void *__restrict s1
, const void *__restrict s2
, size_t n
)
436 /* This code currently give a stall for any value with a 1->2 in the low 5
437 bits, i.e. 1,2, 33,34 ? not acceptable! */
438 switch (value
& 0x1f)
443 MOVE_1_OBJECT (in
, out
);
446 MOVE_2_OBJECTS (in
, out
);
449 MOVE_3_OBJECTS (in
, out
);
452 MOVE_4_OBJECTS (in
, out
);
455 MOVE_5_OBJECTS (in
, out
);
458 MOVE_6_OBJECTS (in
, out
);
461 MOVE_7_OBJECTS (in
, out
);
464 MOVE_8_OBJECTS (in
, out
);
467 MOVE_9_OBJECTS (in
, out
);
470 MOVE_10_OBJECTS (in
, out
);
473 MOVE_11_OBJECTS (in
, out
);
476 MOVE_12_OBJECTS (in
, out
);
479 MOVE_9_OBJECTS (in
, out
);
480 MOVE_4_OBJECTS (in
, out
);
483 MOVE_12_OBJECTS (in
, out
);
484 MOVE_2_OBJECTS (in
, out
);
487 MOVE_11_OBJECTS (in
, out
);
488 MOVE_4_OBJECTS (in
, out
);
491 MOVE_16_OBJECTS (in
, out
);
494 MOVE_11_OBJECTS (in
, out
);
495 MOVE_6_OBJECTS (in
, out
);
498 MOVE_9_OBJECTS (in
, out
);
499 MOVE_9_OBJECTS (in
, out
);
502 MOVE_16_OBJECTS (in
, out
);
503 MOVE_3_OBJECTS (in
, out
);
506 MOVE_16_OBJECTS (in
, out
);
507 MOVE_4_OBJECTS (in
, out
);
510 MOVE_16_OBJECTS (in
, out
);
511 MOVE_5_OBJECTS (in
, out
);
514 MOVE_16_OBJECTS (in
, out
);
515 MOVE_6_OBJECTS (in
, out
);
518 MOVE_16_OBJECTS (in
, out
);
519 MOVE_7_OBJECTS (in
, out
);
522 MOVE_16_OBJECTS (in
, out
);
523 MOVE_8_OBJECTS (in
, out
);
526 MOVE_16_OBJECTS (in
, out
);
527 MOVE_9_OBJECTS (in
, out
);
530 MOVE_16_OBJECTS (in
, out
);
531 MOVE_10_OBJECTS (in
, out
);
534 MOVE_16_OBJECTS (in
, out
);
535 MOVE_11_OBJECTS (in
, out
);
538 MOVE_16_OBJECTS (in
, out
);
539 MOVE_8_OBJECTS (in
, out
);
540 MOVE_4_OBJECTS (in
, out
);
543 MOVE_16_OBJECTS (in
, out
);
544 MOVE_9_OBJECTS (in
, out
);
545 MOVE_4_OBJECTS (in
, out
);
548 MOVE_16_OBJECTS (in
, out
);
549 MOVE_12_OBJECTS (in
, out
);
550 MOVE_2_OBJECTS (in
, out
);
553 MOVE_16_OBJECTS (in
, out
);
554 MOVE_11_OBJECTS (in
, out
);
555 MOVE_4_OBJECTS (in
, out
);
559 /* This loop governs the asmptoptic behaviour of this algorithm, for long
562 for (loop_var
= 0; loop_var
< count
; loop_var
++)
563 MOVE_32_OBJECTS (in
, out
);
567 __inhibit_loop_to_libcall
568 __shrt_int_memcpy (void *__restrict s1
, const void *__restrict s2
, size_t n
)
572 const short int *in
= s2
;
577 /* This code currently give a stall for any value with a 1->2 in the low 5
578 bits, i.e. 1,2, 33,34 ? not acceptable! */
579 switch (value
& 0x1f)
584 MOVE_1_OBJECT (in
, out
);
587 MOVE_2_OBJECTS (in
, out
);
590 MOVE_3_OBJECTS (in
, out
);
593 MOVE_4_OBJECTS (in
, out
);
596 MOVE_5_OBJECTS (in
, out
);
599 MOVE_6_OBJECTS (in
, out
);
602 MOVE_7_OBJECTS (in
, out
);
605 MOVE_8_OBJECTS (in
, out
);
608 MOVE_9_OBJECTS (in
, out
);
611 MOVE_10_OBJECTS (in
, out
);
614 MOVE_11_OBJECTS (in
, out
);
617 MOVE_12_OBJECTS (in
, out
);
620 MOVE_9_OBJECTS (in
, out
);
621 MOVE_4_OBJECTS (in
, out
);
624 MOVE_12_OBJECTS (in
, out
);
625 MOVE_2_OBJECTS (in
, out
);
628 MOVE_11_OBJECTS (in
, out
);
629 MOVE_4_OBJECTS (in
, out
);
632 MOVE_16_OBJECTS (in
, out
);
635 MOVE_11_OBJECTS (in
, out
);
636 MOVE_6_OBJECTS (in
, out
);
639 MOVE_9_OBJECTS (in
, out
);
640 MOVE_9_OBJECTS (in
, out
);
643 MOVE_16_OBJECTS (in
, out
);
644 MOVE_3_OBJECTS (in
, out
);
647 MOVE_16_OBJECTS (in
, out
);
648 MOVE_4_OBJECTS (in
, out
);
651 MOVE_16_OBJECTS (in
, out
);
652 MOVE_5_OBJECTS (in
, out
);
655 MOVE_16_OBJECTS (in
, out
);
656 MOVE_6_OBJECTS (in
, out
);
659 MOVE_16_OBJECTS (in
, out
);
660 MOVE_7_OBJECTS (in
, out
);
663 MOVE_16_OBJECTS (in
, out
);
664 MOVE_8_OBJECTS (in
, out
);
667 MOVE_16_OBJECTS (in
, out
);
668 MOVE_9_OBJECTS (in
, out
);
671 MOVE_16_OBJECTS (in
, out
);
672 MOVE_10_OBJECTS (in
, out
);
675 MOVE_16_OBJECTS (in
, out
);
676 MOVE_11_OBJECTS (in
, out
);
679 MOVE_16_OBJECTS (in
, out
);
680 MOVE_8_OBJECTS (in
, out
);
681 MOVE_4_OBJECTS (in
, out
);
684 MOVE_16_OBJECTS (in
, out
);
685 MOVE_9_OBJECTS (in
, out
);
686 MOVE_4_OBJECTS (in
, out
);
689 MOVE_16_OBJECTS (in
, out
);
690 MOVE_12_OBJECTS (in
, out
);
691 MOVE_2_OBJECTS (in
, out
);
694 MOVE_16_OBJECTS (in
, out
);
695 MOVE_11_OBJECTS (in
, out
);
696 MOVE_4_OBJECTS (in
, out
);
700 /* This loop governs the asmptoptic behaviour of this algorithm, for long
703 for (loop_var
= 0; loop_var
< count
; loop_var
++)
704 MOVE_32_OBJECTS (in
, out
);
709 __inhibit_loop_to_libcall
710 __byte_memcpy (void *__restrict s1
, const void *__restrict s2
, size_t n
)
719 /* This code currently give a stall for any value with a 1->2 in the low 5
720 bits, i.e. 1,2, 33,34 ? not acceptable! */
721 switch (value
& 0x1f)
726 MOVE_1_OBJECT (in
, out
);
729 MOVE_2_OBJECTS (in
, out
);
732 MOVE_3_OBJECTS (in
, out
);
735 MOVE_4_OBJECTS (in
, out
);
738 MOVE_5_OBJECTS (in
, out
);
741 MOVE_6_OBJECTS (in
, out
);
744 MOVE_7_OBJECTS (in
, out
);
747 MOVE_8_OBJECTS (in
, out
);
750 MOVE_9_OBJECTS (in
, out
);
753 MOVE_10_OBJECTS (in
, out
);
756 MOVE_11_OBJECTS (in
, out
);
759 MOVE_12_OBJECTS (in
, out
);
762 MOVE_9_OBJECTS (in
, out
);
763 MOVE_4_OBJECTS (in
, out
);
766 MOVE_12_OBJECTS (in
, out
);
767 MOVE_2_OBJECTS (in
, out
);
770 MOVE_11_OBJECTS (in
, out
);
771 MOVE_4_OBJECTS (in
, out
);
774 MOVE_16_OBJECTS (in
, out
);
777 MOVE_11_OBJECTS (in
, out
);
778 MOVE_6_OBJECTS (in
, out
);
781 MOVE_9_OBJECTS (in
, out
);
782 MOVE_9_OBJECTS (in
, out
);
785 MOVE_16_OBJECTS (in
, out
);
786 MOVE_3_OBJECTS (in
, out
);
789 MOVE_16_OBJECTS (in
, out
);
790 MOVE_4_OBJECTS (in
, out
);
793 MOVE_16_OBJECTS (in
, out
);
794 MOVE_5_OBJECTS (in
, out
);
797 MOVE_16_OBJECTS (in
, out
);
798 MOVE_6_OBJECTS (in
, out
);
801 MOVE_16_OBJECTS (in
, out
);
802 MOVE_7_OBJECTS (in
, out
);
805 MOVE_16_OBJECTS (in
, out
);
806 MOVE_8_OBJECTS (in
, out
);
809 MOVE_16_OBJECTS (in
, out
);
810 MOVE_9_OBJECTS (in
, out
);
813 MOVE_16_OBJECTS (in
, out
);
814 MOVE_10_OBJECTS (in
, out
);
817 MOVE_16_OBJECTS (in
, out
);
818 MOVE_11_OBJECTS (in
, out
);
821 MOVE_16_OBJECTS (in
, out
);
822 MOVE_8_OBJECTS (in
, out
);
823 MOVE_4_OBJECTS (in
, out
);
826 MOVE_16_OBJECTS (in
, out
);
827 MOVE_9_OBJECTS (in
, out
);
828 MOVE_4_OBJECTS (in
, out
);
831 MOVE_16_OBJECTS (in
, out
);
832 MOVE_12_OBJECTS (in
, out
);
833 MOVE_2_OBJECTS (in
, out
);
836 MOVE_16_OBJECTS (in
, out
);
837 MOVE_11_OBJECTS (in
, out
);
838 MOVE_4_OBJECTS (in
, out
);
842 /* This loop governs the asmptoptic behaviour of this algorithm, for long
845 for (loop_var
= 0; loop_var
< count
; loop_var
++)
846 MOVE_32_OBJECTS (in
, out
);
850 /* Exposed interface. */
853 __inhibit_loop_to_libcall
854 memcpy (void *__restrict s1
, const void *__restrict s2
, size_t n
)
858 /* None of the following handles copying zero bytes. */
861 unsigned test
= (unsigned) s2
| (unsigned) s1
| (unsigned) n
;
864 __byte_memcpy (s1
, s2
, n
);
866 __shrt_int_memcpy (s1
, s2
, n
>> 1);
868 #ifdef __VISIUM_ARCH_BMI__
869 __asm__
__volatile__ ("bmd %0,%1,%2"
870 : "+t" (s1
), "+u" (s2
), "+v" (n
)
872 : "r4", "r5", "r6", "memory");
874 __int_memcpy (s1
, s2
, n
>> 2);
875 #endif /* __VISIUM_ARCH_BMI__ */