1 /* Copy SRC to DEST returning the address of the terminating '\0' in DEST.
3 Copyright (C) 1998, 1999, 2002, 2003, 2004 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and
6 Jakub Jelinek <jj@ultra.linux.cz>.
8 The GNU C Library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
13 The GNU C Library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public
19 License along with the GNU C Library; if not, write to the Free
20 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 .register %g2, #scratch
27 .register %g3, #scratch
28 .register %g6, #scratch
31 /* Normally, this uses
32 ((xword - 0x0101010101010101) & 0x8080808080808080) test
33 to find out if any byte in xword could be zero. This is fast, but
34 also gives false alarm for any byte in range 0x81-0xff. It does
35 not matter for correctness, as if this test tells us there could
36 be some zero byte, we check it byte by byte, but if bytes with
37 high bits set are common in the strings, then this will give poor
38 performance. You can #define EIGHTBIT_NOT_RARE and the algorithm
39 will use one tick slower, but more precise test
40 ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080),
41 which does not give any false alarms (but if some bits are set,
42 one cannot assume from it which bytes are zero and which are not).
43 It is yet to be measured, what is the correct default for glibc
44 in these days for an average user.
50 sethi %hi(0x01010101), %g1 /* IEU0 Group */
51 or %g1, %lo(0x01010101), %g1 /* IEU0 Group */
52 andcc %o0, 7, %g0 /* IEU1 */
53 sllx %g1, 32, %g2 /* IEU0 Group */
55 bne,pn %icc, 12f /* CTI */
56 andcc %o1, 7, %g3 /* IEU1 */
57 or %g1, %g2, %g1 /* IEU0 Group */
58 bne,pn %icc, 14f /* CTI */
60 sllx %g1, 7, %g2 /* IEU0 Group */
61 1: ldx [%o1], %o3 /* Load */
62 add %o1, 8, %o1 /* IEU1 */
63 2: mov %o3, %g3 /* IEU0 Group */
65 sub %o3, %g1, %o2 /* IEU1 */
66 3: ldxa [%o1] ASI_PNF, %o3 /* Load */
67 #ifdef EIGHTBIT_NOT_RARE
68 andn %o2, %g3, %o2 /* IEU0 Group */
70 add %o0, 8, %o0 /* IEU0 Group */
71 andcc %o2, %g2, %g0 /* IEU1 */
73 add %o1, 8, %o1 /* IEU0 Group */
74 be,a,pt %xcc, 2b /* CTI */
75 stx %g3, [%o0 - 8] /* Store */
76 srlx %g3, 56, %g5 /* IEU0 Group */
78 andcc %g5, 0xff, %g0 /* IEU1 Group */
79 be,pn %icc, 11f /* CTI */
80 srlx %g3, 48, %g4 /* IEU0 */
81 andcc %g4, 0xff, %g0 /* IEU1 Group */
83 be,pn %icc, 10f /* CTI */
84 srlx %g3, 40, %g5 /* IEU0 */
85 andcc %g5, 0xff, %g0 /* IEU1 Group */
86 be,pn %icc, 9f /* CTI */
88 srlx %g3, 32, %g4 /* IEU0 */
89 andcc %g4, 0xff, %g0 /* IEU1 Group */
90 be,pn %icc, 8f /* CTI */
91 srlx %g3, 24, %g5 /* IEU0 */
93 andcc %g5, 0xff, %g0 /* IEU1 Group */
94 be,pn %icc, 7f /* CTI */
95 srlx %g3, 16, %g4 /* IEU0 */
96 andcc %g4, 0xff, %g0 /* IEU1 Group */
98 be,pn %icc, 6f /* CTI */
99 srlx %g3, 8, %g5 /* IEU0 */
100 andcc %g5, 0xff, %g0 /* IEU1 Group */
101 be,pn %icc, 5f /* CTI */
103 sub %o3, %g1, %o2 /* IEU0 */
104 stx %g3, [%o0 - 8] /* Store Group */
105 andcc %g3, 0xff, %g0 /* IEU1 */
106 bne,pt %icc, 3b /* CTI */
108 mov %o3, %g3 /* IEU0 Group */
109 4: retl /* CTI+IEU1 Group */
110 sub %o0, 1, %o0 /* IEU0 */
113 6: ba,pt %xcc, 23f /* CTI Group */
114 sub %o0, 3, %g6 /* IEU0 */
115 5: sub %o0, 2, %g6 /* IEU0 Group */
116 stb %g5, [%o0 - 2] /* Store */
118 srlx %g3, 16, %g4 /* IEU0 Group */
119 23: sth %g4, [%o0 - 4] /* Store */
120 srlx %g3, 32, %g4 /* IEU0 Group */
121 stw %g4, [%o0 - 8] /* Store */
123 retl /* CTI+IEU1 Group */
124 mov %g6, %o0 /* IEU0 */
125 8: ba,pt %xcc, 24f /* CTI Group */
126 sub %o0, 5, %g6 /* IEU0 */
128 7: sub %o0, 4, %g6 /* IEU0 Group */
129 stb %g5, [%o0 - 4] /* Store */
130 srlx %g3, 32, %g4 /* IEU0 Group */
131 24: stw %g4, [%o0 - 8] /* Store */
133 retl /* CTI+IEU1 Group */
134 mov %g6, %o0 /* IEU0 */
135 10: ba,pt %xcc, 25f /* CTI Group */
136 sub %o0, 7, %g6 /* IEU0 */
138 9: sub %o0, 6, %g6 /* IEU0 Group */
139 stb %g5, [%o0 - 6] /* Store */
140 srlx %g3, 48, %g4 /* IEU0 */
141 25: sth %g4, [%o0 - 8] /* Store Group */
143 retl /* CTI+IEU1 Group */
144 mov %g6, %o0 /* IEU0 */
145 11: stb %g5, [%o0 - 8] /* Store Group */
146 retl /* CTI+IEU1 Group */
148 sub %o0, 8, %o0 /* IEU0 */
151 12: or %g1, %g2, %g1 /* IEU0 Group */
152 ldub [%o1], %o3 /* Load */
153 sllx %g1, 7, %g2 /* IEU0 Group */
154 stb %o3, [%o0] /* Store Group */
156 13: add %o0, 1, %o0 /* IEU0 */
157 add %o1, 1, %o1 /* IEU1 */
158 andcc %o3, 0xff, %g0 /* IEU1 Group */
159 be,pn %icc, 4b /* CTI */
161 lduba [%o1] ASI_PNF, %o3 /* Load */
162 andcc %o0, 7, %g0 /* IEU1 Group */
163 bne,a,pt %icc, 13b /* CTI */
164 stb %o3, [%o0] /* Store */
166 andcc %o1, 7, %g3 /* IEU1 Group */
167 be,a,pt %icc, 1b /* CTI */
168 ldx [%o1], %o3 /* Load */
169 14: orcc %g0, 64, %g4 /* IEU1 Group */
171 sllx %g3, 3, %g5 /* IEU0 */
172 sub %o1, %g3, %o1 /* IEU0 Group */
173 sub %g4, %g5, %g4 /* IEU1 */
174 /* %g1 = 0101010101010101 *
175 * %g2 = 8080808080808080 *
176 * %g3 = source alignment *
177 * %g5 = number of bits to shift left *
178 * %g4 = number of bits to shift right */
179 ldxa [%o1] ASI_PNF, %o5 /* Load Group */
181 addcc %o1, 8, %o1 /* IEU1 */
182 15: sllx %o5, %g5, %o3 /* IEU0 Group */
183 ldxa [%o1] ASI_PNF, %o5 /* Load */
184 srlx %o5, %g4, %o4 /* IEU0 Group */
186 add %o0, 8, %o0 /* IEU1 */
187 or %o3, %o4, %o3 /* IEU0 Group */
188 add %o1, 8, %o1 /* IEU1 */
189 sub %o3, %g1, %o4 /* IEU0 Group */
191 #ifdef EIGHTBIT_NOT_RARE
192 andn %o4, %o3, %o4 /* IEU0 Group */
194 andcc %o4, %g2, %g0 /* IEU1 Group */
195 be,a,pt %xcc, 15b /* CTI */
196 stx %o3, [%o0 - 8] /* Store */
197 srlx %o3, 56, %o4 /* IEU0 Group */
199 andcc %o4, 0xff, %g0 /* IEU1 Group */
200 be,pn %icc, 22f /* CTI */
201 srlx %o3, 48, %o4 /* IEU0 */
202 andcc %o4, 0xff, %g0 /* IEU1 Group */
204 be,pn %icc, 21f /* CTI */
205 srlx %o3, 40, %o4 /* IEU0 */
206 andcc %o4, 0xff, %g0 /* IEU1 Group */
207 be,pn %icc, 20f /* CTI */
209 srlx %o3, 32, %o4 /* IEU0 */
210 andcc %o4, 0xff, %g0 /* IEU1 Group */
211 be,pn %icc, 19f /* CTI */
212 srlx %o3, 24, %o4 /* IEU0 */
214 andcc %o4, 0xff, %g0 /* IEU1 Group */
215 be,pn %icc, 18f /* CTI */
216 srlx %o3, 16, %o4 /* IEU0 */
217 andcc %o4, 0xff, %g0 /* IEU1 Group */
219 be,pn %icc, 17f /* CTI */
220 srlx %o3, 8, %o4 /* IEU0 */
221 andcc %o4, 0xff, %g0 /* IEU1 Group */
222 be,pn %icc, 16f /* CTI */
224 andcc %o3, 0xff, %g0 /* IEU1 Group */
225 bne,pn %icc, 15b /* CTI */
226 stx %o3, [%o0 - 8] /* Store */
227 retl /* CTI+IEU1 Group */
229 sub %o0, 1, %o0 /* IEU0 */
232 17: ba,pt %xcc, 26f /* CTI Group */
233 subcc %o0, 3, %g6 /* IEU1 */
234 18: ba,pt %xcc, 27f /* CTI Group */
235 subcc %o0, 4, %g6 /* IEU1 */
237 19: ba,pt %xcc, 28f /* CTI Group */
238 subcc %o0, 5, %g6 /* IEU1 */
239 16: subcc %o0, 2, %g6 /* IEU1 Group */
240 srlx %o3, 8, %o4 /* IEU0 */
242 stb %o4, [%o0 - 2] /* Store */
243 26: srlx %o3, 16, %o4 /* IEU0 Group */
244 stb %o4, [%o0 - 3] /* Store */
245 27: srlx %o3, 24, %o4 /* IEU0 Group */
247 stb %o4, [%o0 - 4] /* Store */
248 28: srlx %o3, 32, %o4 /* IEU0 Group */
249 stw %o4, [%o0 - 8] /* Store */
250 retl /* CTI+IEU1 Group */
252 mov %g6, %o0 /* IEU0 */
255 21: ba,pt %xcc, 29f /* CTI Group */
256 subcc %o0, 7, %g6 /* IEU1 */
257 22: ba,pt %xcc, 30f /* CTI Group */
258 subcc %o0, 8, %g6 /* IEU1 */
260 20: subcc %o0, 6, %g6 /* IEU1 Group */
261 srlx %o3, 40, %o4 /* IEU0 */
262 stb %o4, [%o0 - 6] /* Store */
263 29: srlx %o3, 48, %o4 /* IEU0 Group */
265 stb %o4, [%o0 - 7] /* Store */
266 30: srlx %o3, 56, %o4 /* IEU0 Group */
267 stb %o4, [%o0 - 8] /* Store */
268 retl /* CTI+IEU1 Group */
270 mov %g6, %o0 /* IEU0 */
273 weak_alias (__stpcpy, stpcpy)
274 libc_hidden_def (__stpcpy)
275 libc_hidden_builtin_def (stpcpy)