4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
32 * Copy string s2 to s1. s1 must be large enough. Return s1.
34 * Fast assembler language version of the following C-program strcpy
35 * which represents the `standard' for the C-library.
40 * register const char *s2;
44 * while(*s1++ = *s2++)
51 #include <sys/asm_linkage.h>
53 ! This is
a 32-bit implementation of strcpy. It works by
54 ! first checking the alignment of its source pointer.
And,
55 ! if it is
not aligned
, attempts to copy bytes until it is.
56 ! once this has occurred
, the copy takes place
, while checking
57 ! for zero bytes
, based upon destination alignment.
58 ! Methods exist to handle per-byte
, half-word
, and word sized
65 sub %o1
, %o0
, %o3
! src
- dst
66 andcc
%o1
, 3, %o4
! src word aligned ?
68 mov
%o0
, %o2
! save
dst
70 cmp %o4
, 2 ! src halfword aligned
72 ldub
[%o2
+ %o3
], %o1
! src
[0]
74 stb %o1
, [%o2
] ! store first byte
76 cmp %o4
, 3 ! only one byte needed to align?
78 inc
%o2
! src+
+, dst+
+
81 lduh
[%o2
+ %o3
], %o1
! src
[]
82 srl
%o1
, 8, %o4
! %o4
<7:0> = first byte
83 tst
%o4
! first byte zero ?
85 stb %o4
, [%o2
] ! store first byte
86 andcc
%o1
, 0xff, %g0
! second byte zero ?
88 stb %o1
, [%o2
+ 1] ! store second byte
89 add %o2
, 2, %o2
! src
+= 2, dst += 2
92 sethi
%hi
(0x01010101), %o4
! Alan Mycroft
's magic1
93 sethi %hi(0x80808080), %o5 ! Alan Mycroft's magic2
94 or %o4
, %lo
(0x01010101), %o4
95 andcc
%o2
, 3, %o1
! destination word aligned?
96 bnz
.dstnotaligned ! nope
97 or %o5
, %lo
(0x80808080), %o5
100 lduw
[%o2
+ %o3
], %o1
! src word
101 add %o2
, 4, %o2
! src
+= 4, dst += 4
102 andn
%o5
, %o1
, %g1
! ~word
& 0x80808080
103 sub %o1
, %o4
, %o1
! word
- 0x01010101
104 andcc
%o1
, %g1
, %g0
! ((word
- 0x01010101) & ~word
& 0x80808080)
105 add %o1
, %o4
, %o1
! restore word
106 bz
,a .copyword ! no zero byte if magic expression == 0
107 st %o1
, [%o2
- 4] ! store word to
dst (address pre-incremented
)
110 set
0xff000000, %o4
! mask for
1st byte
111 srl
%o1
, 24, %o3
! %o3
<7:0> = first byte
112 andcc
%o1
, %o4
, %g0
! first byte zero?
114 stb %o3
, [%o2
- 4] ! store first byte
115 set
0x00ff0000, %o5
! mask for
2nd byte
116 srl
%o1
, 16, %o3
! %o3
<7:0> = second byte
117 andcc
%o1
, %o5
, %g0
! second byte zero?
119 stb %o3
, [%o2
- 3] ! store second byte
120 srl
%o4
, 16, %o4
! 0x0000ff00 = mask for
3rd byte
121 andcc
%o1
, %o4
, %g0
! third byte zero?
122 srl
%o1
, 8, %o3
! %o3
<7:0> = third byte
124 stb %o3
, [%o2
- 2] ! store third byte
125 stb %o1
, [%o2
- 1] ! store fourth byte
128 retl
! done with leaf function
132 cmp %o1
, 2 ! dst half word aligned?
133 be,a .storehalfword2 ! yup, store half word at a time
134 lduw
[%o2
+ %o3
], %o1
! src word
137 lduw
[%o2
+ %o3
], %o1
! src word
138 add %o2
, 4, %o2
! src
+= 4, dst += 4
139 sub %o1
, %o4
, %g1
! x
- 0x01010101
140 andn
%g1
, %o1
, %g1
! (x
- 0x01010101) & ~x
141 andcc
%g1
, %o5
, %g0
! ((x
- 0x01010101) & ~x
& 0x80808080)
142 bnz
.zerobyte ! word has zero byte, handle end cases
143 srl
%o1
, 24, %g1
! %g1
<7:0> = first byte
144 stb %g1
, [%o2
- 4] ! store first byte; half-word aligned now
145 srl
%o1
, 8, %g1
! %g1
<15:0> = byte
2, 3
146 sth %g1
, [%o2
- 3] ! store bytes
2, 3
147 ba .storebyte ! next word
148 stb %o1
, [%o2
- 1] ! store fourth byte
151 lduw
[%o2
+ %o3
], %o1
! src word
153 add %o2
, 4, %o2
! src
+= 4, dst += 4
154 sub %o1
, %o4
, %g1
! x
- 0x01010101
155 andn
%g1
, %o1
, %g1
! (x
- 0x01010101) & ~x
156 andcc
%g1
, %o5
, %g0
! ((x
- 0x01010101) & ~x
& 0x80808080)
157 bnz
.zerobyte ! word has zero byte, handle end cases
158 srl
%o1
, 16, %g1
! get first
and second byte
159 sth %g1
, [%o2
- 4] ! store first
and second byte
160 ba .storehalfword ! next word
161 sth %o1
, [%o2
- 2] ! store third
and fourth byte
163 ! DO
NOT remove these NOPs. It will slow down the halfword loop by
15%