import less(1)
[unleashed/tickless.git] / usr / src / lib / libc / amd64 / gen / strcat.s
blobbc9c1d99edce2618727b4af9f1432d916ed8b886
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 .file "strcat.s"
29 / strcat(s1, s2)
31 / Concatenates s2 on the end of s1. s1's space must be large enough.
32 / Returns s1.
34 / Fast assembly language version of the following C-program strcat
35 / which represents the `standard' for the C-library.
37 / char *
38 / strcat(char *s1, const char *s2)
39 / {
40 / char *os1 = s1;
42 / while (*s1++)
43 / ;
44 / --s1;
45 / while (*s1++ = *s2++)
46 / ;
47 / return (os1);
48 / }
50 / In this assembly language version, the following expression is used
51 / to check if a 32-bit word data contains a null byte or not:
52 / (((A & 0x7f7f7f7f) + 0x7f7f7f7f) | A) & 0x80808080
53 / If the above expression geneates a value other than 0x80808080,
54 / that means the 32-bit word data contains a null byte.
56 / The above has been extended for 64-bit support.
59 #include "SYS.h"
61 ENTRY(strcat) /* (char *s1, char *s2) */
62 / find a null byte in destination string
63 movq %rdi,%rax / prepare return value
64 movabsq $0x7f7f7f7f7f7f7f7f, %r8 / %r8 = 0x7f...
65 movq %r8, %r9
66 notq %r9 / %r9 = 0x80...
67 testq $7, %rdi / if %rdi not quadword aligned
68 jnz .L1 / goto .L1
69 .align 4
70 .L2:
71 movq (%rdi), %rdx / move 1 quadword from (%rdi) to %rdx
72 movq %r8, %rcx
73 andq %rdx, %rcx / %rcx = %rdx & 0x7f7f7f7f7f7f7f7f
74 addq $8, %rdi / next quadword
75 addq %r8, %rcx / %rcx += 0x7f7f7f7f7f7f7f7f
76 orq %rdx, %rcx / %rcx |= %rdx
77 andq %r9, %rcx / %rcx &= 0x8080808080808080
78 cmpq %r9, %rcx / if no null byte in this quadword
79 je .L2 / goto .L2
80 subq $8, %rdi / post-incremented
81 .L1:
82 cmpb $0, (%rdi) / if a byte in (%rdi) is null
83 je .L3 / goto .L3
84 incq %rdi / next byte
85 testq $7, %rdi / if %rdi not quadword aligned
86 jnz .L1 / goto .L1
87 jmp .L2 / goto .L2 (%rdi quadword aligned)
88 .align 4
89 .L3:
90 / %rdi points to a null byte in destination string
91 testq $7, %rsi / if %rsi not quadword aligned
92 jnz .L4 / goto .L4
93 .align 4
94 .L5:
95 movq (%rsi), %rdx / move 1 quadword from (%rsi) to %rdx
96 movq %r8, %rcx
97 andq %rdx, %rcx / %rcx = %rdx & 0x7f7f7f7f7f7f7f7f
98 addq $8, %rsi / next quadword
99 addq %r8, %rcx / %rcx += 0x7f7f7f7f7f7f7f7f
100 orq %rdx, %rcx / %rcx |= %rdx
101 andq %r9, %rcx / %rcx &= 0x8080808080808080
102 cmpq %r9, %rcx / if null byte in this quadaword
103 jne .L7 / goto .L7
104 movq %rdx, (%rdi) / copy this quadword to (%rdi)
105 addq $8, %rdi / next quadword
106 jmp .L5 / goto .L5
107 .L7:
108 subq $8, %rsi / post-incremented
109 .align 4
110 .L4:
111 movb (%rsi), %dl / %dl = a byte in (%rsi)
112 cmpb $0, %dl / compare %dl with a null byte
113 movb %dl, (%rdi) / copy %dl to (%rdi)
114 je .L6 / if %dl is a null, goto .L6
115 incq %rsi / next byte
116 incq %rdi / next byte
117 testq $7, %rsi / if %rsi not word aligned
118 jnz .L4 / goto .L4
119 jmp .L5 / goto .L5 (%rsi word aligned)
120 .align 4
121 .L6:
123 SET_SIZE(strcat)