import less(1)
[unleashed/tickless.git] / usr / src / lib / libc / amd64 / gen / strncat.s
blob02460d6beeb0f1598e2c9d2f40b628a29d9600e1
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 .file "strncat.s"
29 / strncat(s1, s2, n)
31 / Concatenates s2 on the end of s1. s1's space must be large enough.
32 / At most n characters are moved.
33 / Returns s1.
35 / Fast assembly language version of the following C-program strncat
36 / which represents the `standard' for the C-library.
38 / char *
39 / strncat(char *s1, const char *s2, size_t n)
40 / {
41 / char *os1 = s1;
43 / n++;
44 / while (*s1++)
45 / ;
46 / --s1;
47 / while (*s1++ = *s2++)
48 / if (--n == 0) {
49 / s1[-1] = '\0';
50 / break;
51 / }
52 / return (os1);
53 / }
55 / In this assembly language version, the following expression is used
56 / to check if a 32-bit word data contains a null byte or not:
57 / (((A & 0x7f7f7f7f) + 0x7f7f7f7f) | A) & 0x80808080
58 / If the above expression geneates a value other than 0x80808080,
59 / that means the 32-bit word data contains a null byte.
61 / The above has been extended for 64-bit support.
64 #include "SYS.h"
66 ENTRY(strncat) /* (char *, char *, size_t) */
67 movq %rdi, %rax / save return value
68 movabsq $0x7f7f7f7f7f7f7f7f, %r8 / %r8 = 0x7f...
69 movq %r8, %r9
70 notq %r9 / %r9 = 0x80...
71 testq $7, %rdi / if %rdi not quadword aligned
72 jnz .L1 / goto .L1
73 .align 4
74 .L2:
75 movq (%rdi), %r11 / move 1 quadword from (%rdi) to %r11
76 movq %r8, %rcx
77 andq %r11, %rcx / %rcx = %r11 & 0x7f7f7f7f
78 addq $8, %rdi / next quadword
79 addq %r8, %rcx / %rcx += 0x7f7f7f7f
80 orq %r11, %rcx / %rcx |= %r11
81 andq %r9, %rcx / %rcx &= 0x80808080
82 cmpq %r9, %rcx / if no null byte in this quadword
83 je .L2 / goto .L2
84 subq $8, %rdi / post-incremented
85 .L1:
86 cmpb $0, (%rdi) / if a byte in (%rdi) is null
87 je .L3 / goto .L3
88 incq %rdi / next byte
89 testq $7, %rdi / if %rdi not quadword aligned
90 jnz .L1 / goto .L1
91 jmp .L2 / goto .L2 (%rdi quadword aligned)
92 .align 4
93 .L3:
94 / %rdi points to a null byte in destination string
96 testq $7, %rsi / if %rsi not quadword aligned
97 jnz .L4 / goto .L4
98 cmpq $8, %rdx / if number of bytes < 8
99 jb .L7 / goto .L7
100 .align 4
101 .L5:
102 movq (%rsi), %r11 / move 1 quadword from (%rsi) to %r11
103 movq %r8, %rcx
104 andq %r11, %rcx / %rcx = %r11 & 0x7f7f7f7f
105 addq $8, %rsi / next quadword
106 addq %r8, %rcx / %rcx += 0x7f7f7f7f
107 orq %r11, %rcx / %rcx |= %r11
108 andq %r9, %rcx / %rcx &= 0x80808080
109 cmpq %r9, %rcx / if null byte in this quadword
110 jne .L6 / goto .L6
111 movq %r11, (%rdi) / copy this quadword to (%rdi)
112 subq $8, %rdx / decrement number of bytes by 8
113 addq $8, %rdi / next quadword
114 cmpq $8, %rdx / if number of bytes >= 8
115 jae .L5 / goto .L5
116 jmp .L7 / goto .L7
117 .L6:
118 subq $8, %rsi / post-incremented
119 .align 4
120 .L7:
121 / number of bytes < 8 or a null byte found in the quadword
122 cmpq $0, %rdx / if number of bytes == 0
123 jz .L8 / goto .L8 (finished)
124 movb (%rsi), %r11b / %r11b = a byte in (%rsi)
125 decq %rdx / decrement number of bytes by 1
126 movb %r11b, (%rdi) / copy %r11b to (%rdi)
127 incq %rsi / next byte
128 incq %rdi / next byte
129 cmpb $0, %r11b / compare %r11b with a null byte
130 je .L9 / if %r11b is a null, goto .L9
131 jmp .L7 / goto .L7
132 .align 4
134 .L4:
135 / %rsi not aligned
136 cmpq $0, %rdx / if number of bytes == 0
137 jz .L8 / goto .L8 (finished)
138 movb (%rsi), %r11b / %r11b = a byte in (%rsi)
139 decq %rdx / decrement number of bytes by 1
140 movb %r11b, (%rdi) / copy %r11b to (%rdi)
141 incq %rdi / next byte
142 incq %rsi / next byte
143 cmpb $0, %r11b / compare %r11b with a null byte
144 je .L9 / if %r11b is a null, goto .L9
145 jmp .L4 / goto .L4
146 .align 4
147 .L8:
148 movb $0, (%rdi) / null termination
149 .L9:
151 SET_SIZE(strncat)