8354 sync regcomp(3C) with upstream (fix make catalog)
[unleashed/tickless.git] / usr / src / lib / libc / sparc / gen / strcpy.s
blob1afc5c14cdfb90bbeb624f182b687a4e2cf07b4d
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 .file "strcpy.s"
30 * strcpy(s1, s2)
32 * Copy string s2 to s1. s1 must be large enough. Return s1.
34 * Fast assembler language version of the following C-program strcpy
35 * which represents the `standard' for the C-library.
37 * char *
38 * strcpy(s1, s2)
39 * register char *s1;
40 * register const char *s2;
41 * {
42 * char *os1 = s1;
44 * while(*s1++ = *s2++)
45 * ;
46 * return(os1);
47 * }
51 #include <sys/asm_linkage.h>
53 ! This is a 32-bit implementation of strcpy. It works by
54 ! first checking the alignment of its source pointer. And,
55 ! if it is not aligned, attempts to copy bytes until it is.
56 ! once this has occurred, the copy takes place, while checking
57 ! for zero bytes, based upon destination alignment.
58 ! Methods exist to handle per-byte, half-word, and word sized
59 ! copies.
61 ENTRY(strcpy)
63 .align 32
65 sub %o1, %o0, %o3 ! src - dst
66 andcc %o1, 3, %o4 ! src word aligned ?
67 bz .srcaligned ! yup
68 mov %o0, %o2 ! save dst
70 cmp %o4, 2 ! src halfword aligned
71 be .s2aligned ! yup
72 ldub [%o2 + %o3], %o1 ! src[0]
73 tst %o1 ! byte zero?
74 stb %o1, [%o2] ! store first byte
75 bz .done ! yup, done
76 cmp %o4, 3 ! only one byte needed to align?
77 bz .srcaligned ! yup
78 inc %o2 ! src++, dst++
80 .s2aligned:
81 lduh [%o2 + %o3], %o1 ! src[]
82 srl %o1, 8, %o4 ! %o4<7:0> = first byte
83 tst %o4 ! first byte zero ?
84 bz .done ! yup, done
85 stb %o4, [%o2] ! store first byte
86 andcc %o1, 0xff, %g0 ! second byte zero ?
87 bz .done ! yup, done
88 stb %o1, [%o2 + 1] ! store second byte
89 add %o2, 2, %o2 ! src += 2, dst += 2
91 .srcaligned:
92 sethi %hi(0x01010101), %o4 ! Alan Mycroft's magic1
93 sethi %hi(0x80808080), %o5 ! Alan Mycroft's magic2
94 or %o4, %lo(0x01010101), %o4
95 andcc %o2, 3, %o1 ! destination word aligned?
96 bnz .dstnotaligned ! nope
97 or %o5, %lo(0x80808080), %o5
99 .copyword:
100 lduw [%o2 + %o3], %o1 ! src word
101 add %o2, 4, %o2 ! src += 4, dst += 4
102 andn %o5, %o1, %g1 ! ~word & 0x80808080
103 sub %o1, %o4, %o1 ! word - 0x01010101
104 andcc %o1, %g1, %g0 ! ((word - 0x01010101) & ~word & 0x80808080)
105 add %o1, %o4, %o1 ! restore word
106 bz,a .copyword ! no zero byte if magic expression == 0
107 st %o1, [%o2 - 4] ! store word to dst (address pre-incremented)
109 .zerobyte:
110 set 0xff000000, %o4 ! mask for 1st byte
111 srl %o1, 24, %o3 ! %o3<7:0> = first byte
112 andcc %o1, %o4, %g0 ! first byte zero?
113 bz .done ! yup, done
114 stb %o3, [%o2 - 4] ! store first byte
115 set 0x00ff0000, %o5 ! mask for 2nd byte
116 srl %o1, 16, %o3 ! %o3<7:0> = second byte
117 andcc %o1, %o5, %g0 ! second byte zero?
118 bz .done ! yup, done
119 stb %o3, [%o2 - 3] ! store second byte
120 srl %o4, 16, %o4 ! 0x0000ff00 = mask for 3rd byte
121 andcc %o1, %o4, %g0 ! third byte zero?
122 srl %o1, 8, %o3 ! %o3<7:0> = third byte
123 bz .done ! yup, done
124 stb %o3, [%o2 - 2] ! store third byte
125 stb %o1, [%o2 - 1] ! store fourth byte
127 .done:
128 retl ! done with leaf function
129 .empty
131 .dstnotaligned:
132 cmp %o1, 2 ! dst half word aligned?
133 be,a .storehalfword2 ! yup, store half word at a time
134 lduw [%o2 + %o3], %o1 ! src word
136 .storebyte:
137 lduw [%o2 + %o3], %o1 ! src word
138 add %o2, 4, %o2 ! src += 4, dst += 4
139 sub %o1, %o4, %g1 ! x - 0x01010101
140 andn %g1, %o1, %g1 ! (x - 0x01010101) & ~x
141 andcc %g1, %o5, %g0 ! ((x - 0x01010101) & ~x & 0x80808080)
142 bnz .zerobyte ! word has zero byte, handle end cases
143 srl %o1, 24, %g1 ! %g1<7:0> = first byte
144 stb %g1, [%o2 - 4] ! store first byte; half-word aligned now
145 srl %o1, 8, %g1 ! %g1<15:0> = byte 2, 3
146 sth %g1, [%o2 - 3] ! store bytes 2, 3
147 ba .storebyte ! next word
148 stb %o1, [%o2 - 1] ! store fourth byte
150 .storehalfword:
151 lduw [%o2 + %o3], %o1 ! src word
152 .storehalfword2:
153 add %o2, 4, %o2 ! src += 4, dst += 4
154 sub %o1, %o4, %g1 ! x - 0x01010101
155 andn %g1, %o1, %g1 ! (x - 0x01010101) & ~x
156 andcc %g1, %o5, %g0 ! ((x - 0x01010101) & ~x & 0x80808080)
157 bnz .zerobyte ! word has zero byte, handle end cases
158 srl %o1, 16, %g1 ! get first and second byte
159 sth %g1, [%o2 - 4] ! store first and second byte
160 ba .storehalfword ! next word
161 sth %o1, [%o2 - 2] ! store third and fourth byte
163 ! DO NOT remove these NOPs. It will slow down the halfword loop by 15%
165 nop ! padding
166 nop ! padding
168 SET_SIZE(strcpy)