8354 sync regcomp(3C) with upstream (fix make catalog)
[unleashed/tickless.git] / usr / src / lib / libc / sparc / gen / strncpy.s
blobc919be9bcedcece2035ffd98f8e8b1ac41e18c1e
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 .file "strncpy.s"
30 * strncpy(s1, s2)
32 * Copy string s2 to s1, truncating or null-padding to always copy n bytes
33 * return s1.
35 * Fast assembler language version of the following C-program for strncpy
36 * which represents the `standard' for the C-library.
38 * char *
39 * strncpy(char *s1, const char *s2, size_t n)
40 * {
41 * char *os1 = s1;
43 * n++;
44 * while ((--n != 0) && ((*s1++ = *s2++) != '\0'))
45 * ;
46 * if (n != 0)
47 * while (--n != 0)
48 * *s1++ = '\0';
49 * return (os1);
50 * }
53 #include <sys/asm_linkage.h>
55 ! strncpy works similarly to strcpy, except that n bytes of s2
56 ! are copied to s1. If a null character is reached in s2 yet more
57 ! bytes remain to be copied, strncpy will copy null bytes into
58 ! the destination string.
60 ! This implementation works by first aligning the src ptr and
61 ! performing small copies until it is aligned. Then, the string
62 ! is copied based upon destination alignment. (byte, half-word,
63 ! word, etc.)
65 ENTRY(strncpy)
67 .align 32
68 subcc %g0, %o2, %o4 ! n = -n
69 bz .doneshort ! if n == 0, done
70 cmp %o2, 7 ! n < 7 ?
71 add %o1, %o2, %o3 ! src = src + n
72 blu .shortcpy ! n < 7, use byte-wise copy
73 add %o0, %o2, %o2 ! dst = dst + n
74 andcc %o1, 3, %o5 ! src word aligned ?
75 bz .wordaligned ! yup
76 save %sp, -0x40, %sp ! create new register window
77 sub %i5, 4, %i5 ! bytes until src aligned
78 nop ! align loop on 16-byte boundary
79 nop ! align loop on 16-byte boundary
81 .alignsrc:
82 ldub [%i3 + %i4], %i1 ! src[]
83 stb %i1, [%i2 + %i4] ! dst[] = src[]
84 inccc %i4 ! src++, dst++, n--
85 bz .done ! n == 0, done
86 tst %i1 ! end of src reached (null byte) ?
87 bz,a .bytepad ! yes, at least one byte to pad here
88 add %i2, %i4, %l0 ! need single dest pointer for fill
89 inccc %i5 ! src aligned now?
90 bnz .alignsrc ! no, copy another byte
91 .empty
93 .wordaligned:
94 add %i2, %i4, %l0 ! dst
95 sethi %hi(0x01010101), %l1 ! Alan Mycroft's magic1
96 sub %i2, 4, %i2 ! adjust for dest pre-incr in cpy loops
97 or %l1, %lo(0x01010101),%l1! finish loading magic1
98 andcc %l0, 3, %g1 ! destination word aligned ?
99 bnz .dstnotaligned ! nope
100 sll %l1, 7, %i5 ! create Alan Mycroft's magic2
102 .storeword:
103 lduw [%i3 + %i4], %i1 ! src dword
104 addcc %i4, 4, %i4 ! n += 4, src += 4, dst += 4
105 bcs .lastword ! if counter wraps, last word
106 andn %i5, %i1, %g1 ! ~dword & 0x80808080
107 sub %i1, %l1, %l0 ! dword - 0x01010101
108 andcc %l0, %g1, %g0 ! ((dword - 0x01010101) & ~dword & 0x80808080)
109 bz,a .storeword ! no zero byte if magic expression == 0
110 stw %i1, [%i2 + %i4] ! store word to dst (address pre-incremented)
112 ! n has not expired, but src is at the end. we need to push out the
113 ! remaining src bytes and then start padding with null bytes
115 .zerobyte:
116 add %i2, %i4, %l0 ! pointer to dest string
117 srl %i1, 24, %g1 ! first byte
118 stb %g1, [%l0] ! store it
119 sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1
120 sra %g1, 31, %g1 ! byte == 0 ? -1 : 0
121 andn %i1, %g1, %i1 ! if byte == 0, start padding with null bytes
122 srl %i1, 16, %g1 ! second byte
123 stb %g1, [%l0 + 1] ! store it
124 and %g1, 0xff, %g1 ! isolate byte
125 sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1
126 sra %g1, 31, %g1 ! byte == 0 ? -1 : 0
127 andn %i1, %g1, %i1 ! if byte == 0, start padding with null bytes
128 srl %i1, 8, %g1 ! third byte
129 stb %g1, [%l0 + 2] ! store it
130 and %g1, 0xff, %g1 ! isolate byte
131 sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1
132 sra %g1, 31, %g1 ! byte == 0 ? -1 : 0
133 andn %i1, %g1, %i1 ! if byte == 0, start padding with null bytes
134 stb %i1, [%l0 + 3] ! store fourth byte
135 addcc %i4, 8, %g0 ! number of pad bytes < 8 ?
136 bcs .bytepad ! yes, do simple byte wise fill
137 add %l0, 4, %l0 ! dst += 4
138 andcc %l0, 3, %l1 ! dst offset relative to word boundary
139 bz .fillaligned ! dst already word aligned
141 ! here there is a least one more byte to zero out: otherwise we would
142 ! have exited through label .lastword
144 sub %l1, 4, %l1 ! bytes to align dst to word boundary
145 .makealigned:
146 stb %g0, [%l0] ! dst[] = 0
147 addcc %i4, 1, %i4 ! n--
148 bz .done ! n == 0, we are done
149 addcc %l1, 1, %l1 ! any more byte needed to align
150 bnz .makealigned ! yup, pad another byte
151 add %l0, 1, %l0 ! dst++
152 nop ! pad to align copy loop below
154 ! here we know that there at least another 4 bytes to pad, since
155 ! we don't get here unless there were >= 8 bytes to pad to begin
156 ! with, and we have padded at most 3 bytes suring dst aligning
158 .fillaligned:
159 add %i4, 3, %i2 ! round up to next word boundary
160 and %i2, -4, %l1 ! pointer to next word boundary
161 and %i2, 4, %i2 ! word count odd ? 4 : 0
162 stw %g0, [%l0] ! store first word
163 addcc %l1, %i2, %l1 ! dword count == 1 ?
164 add %i4, %i2, %i4 ! if word count odd, n -= 4
165 bz .bytepad ! if word count == 1, pad bytes left
166 add %l0, %i2, %l0 ! bump dst if word count odd
168 .fillword:
169 addcc %l1, 8, %l1 ! count -= 8
170 stw %g0, [%l0] ! dst[n] = 0
171 stw %g0, [%l0 + 4] ! dst[n+4] = 0
172 add %l0, 8, %l0 ! dst += 8
173 bcc .fillword ! fill words until count == 0
174 addcc %i4, 8, %i4 ! n -= 8
175 bz .done ! if n == 0, we are done
176 .empty
178 .bytepad:
179 and %i4, 1, %i2 ! byte count odd ? 1 : 0
180 stb %g0, [%l0] ! store first byte
181 addcc %i4, %i2, %i4 ! byte count == 1 ?
182 bz .done ! yup, we are done
183 add %l0, %i2, %l0 ! bump pointer if odd
185 .fillbyte:
186 addcc %i4, 2, %i4 ! n -= 2
187 stb %g0, [%l0] ! dst[n] = 0
188 stb %g0, [%l0 + 1] ! dst[n+1] = 0
189 bnz .fillbyte ! fill until n == 0
190 add %l0, 2, %l0 ! dst += 2
192 .done:
193 ret ! done
194 restore %i0, %g0, %o0 ! restore reg window, return dst
196 ! this is the last word. It may contain null bytes. store bytes
197 ! until n == 0. if null byte encountered, continue
199 .lastword:
200 sub %i4, 4, %i4 ! undo counter pre-increment
201 add %i2, 4, %i2 ! adjust dst for counter un-bumping
203 srl %i1, 24, %g1 ! first byte
204 stb %g1, [%i2 + %i4] ! store it
205 inccc %i4 ! n--
206 bz .done ! if n == 0, we're done
207 sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1
208 sra %g1, 31, %g1 ! byte == 0 ? -1 : 0
209 andn %i1, %g1, %i1 ! if byte == 0, start padding with null
210 srl %i1, 16, %g1 ! second byte
211 stb %g1, [%i2 + %i4] ! store it
212 inccc %i4 ! n--
213 bz .done ! if n == 0, we're done
214 and %g1, 0xff, %g1 ! isolate byte
215 sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1
216 sra %g1, 31, %g1 ! byte == 0 ? -1 : 0
217 andn %i1, %g1, %i1 ! if byte == 0, start padding with null
218 srl %i1, 8, %g1 ! third byte
219 stb %g1, [%i2 + %i4] ! store it
220 inccc %i4 ! n--
221 bz .done ! if n == 0, we're done
222 and %g1, 0xff, %g1 ! isolate byte
223 sub %g1, 1, %g1 ! byte == 0 ? -1 : byte - 1
224 sra %g1, 31, %g1 ! byte == 0 ? -1 : 0
225 andn %i1, %g1, %i1 ! if byte == 0, start padding with null
226 ba .done ! here n must be zero, we are done
227 stb %i1, [%i2 + %i4] ! store fourth byte
229 .dstnotaligned:
230 cmp %g1, 2 ! dst half word aligned?
231 be .storehalfword2 ! yup, store half word at a time
232 .empty
233 .storebyte:
234 lduw [%i3 + %i4], %i1 ! x = src[]
235 addcc %i4, 4, %i4 ! src += 4, dst += 4, n -= 4
236 bcs .lastword ! if counter wraps, last word
237 andn %i5, %i1, %g1 ! ~x & 0x80808080
238 sub %i1, %l1, %l0 ! x - 0x01010101
239 andcc %l0, %g1, %g0 ! ((x - 0x01010101) & ~x & 0x80808080)
240 bnz .zerobyte ! end of src found, may need to pad
241 add %i2, %i4, %l0 ! dst (in pointer form)
242 srl %i1, 24, %g1 ! %g1<7:0> = 1st byte; half-word aligned now
243 stb %g1, [%l0] ! store first byte
244 srl %i1, 8, %g1 ! %g1<15:0> = bytes 2, 3
245 sth %g1, [%l0 + 1] ! store bytes 2, 3
246 ba .storebyte ! next word
247 stb %i1, [%l0 + 3] ! store fourth byte
251 .storehalfword:
252 lduw [%i3 + %i4], %i1 ! x = src[]
253 .storehalfword2:
254 addcc %i4, 4, %i4 ! src += 4, dst += 4, n -= 4
255 bcs .lastword ! if counter wraps, last word
256 andn %i5, %i1, %g1 ! ~x & 0x80808080
257 sub %i1, %l1, %l0 ! x - 0x01010101
258 andcc %l0, %g1, %g0 ! ((x -0x01010101) & ~x & 0x8080808080)
259 bnz .zerobyte ! x has zero byte, handle end cases
260 add %i2, %i4, %l0 ! dst (in pointer form)
261 srl %i1, 16, %g1 ! %g1<15:0> = bytes 1, 2
262 sth %g1, [%l0] ! store bytes 1, 2
263 ba .storehalfword ! next dword
264 sth %i1, [%l0 + 2] ! store bytes 3, 4
266 .shortcpy:
267 ldub [%o3 + %o4], %o5 ! src[]
268 stb %o5, [%o2 + %o4] ! dst[] = src[]
269 inccc %o4 ! src++, dst++, n--
270 bz .doneshort ! if n == 0, done
271 tst %o5 ! src[] == 0 ?
272 bnz,a .shortcpy ! nope, next byte
273 nop ! empty delay slot
275 .padbyte:
276 stb %g0, [%o2 + %o4] ! dst[] = 0
277 .padbyte2:
278 addcc %o4, 1, %o4 ! dst++, n--
279 bnz,a .padbyte2 ! if n != 0, next byte
280 stb %g0, [%o2 + %o4] ! dst[] = 0
281 nop ! align label below to 16-byte boundary
283 .doneshort:
284 retl ! return from leaf
285 nop ! empty delay slot
286 SET_SIZE(strncpy)