4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2009, Intel Corporation
24 * All rights reserved.
28 * strlen - calculate the length of string
32 #include "proc64_id.h"
34 #define LABEL(s) .strlen##s
37 * This implementation uses SSE instructions to compare up to 16 bytes
38 * at a time looking for the end of string (null char).
40 ENTRY
(strlen
) /* (const char *s) */
41 mov
%rdi
, %rsi
/* keep original %rdi value */
43 pxor
%xmm0
, %xmm0
/* 16 null chars */
45 jz LABEL
(align16_loop
) /* string is 16 byte aligned */
48 * Unaligned case. Round down to 16-byte boundary before comparing
49 * 16 bytes for a null char. The code then compensates for any extra chars
50 * preceding the start of the string.
53 and $
0xfffffffffffffff0, %rsi
59 shr
%cl
, %edx
/* Compensate for bytes preceding the string */
62 sub %rcx
, %rsi
/* no null, adjust to next 16-byte boundary */
63 pxor
%xmm0
, %xmm0
/* clear xmm0, may have been changed... */
66 LABEL
(align16_loop
): /* 16 byte aligned */
67 pcmpeqb
(%rsi
), %xmm0
/* look for null bytes */
68 pmovmskb
%xmm0
, %edx
/* move each byte mask of %xmm0 to edx */
70 add $
16, %rsi
/* prepare to search next 16 bytes */
71 test
%edx
, %edx
/* if no null byte, %edx must be 0 */
72 jnz LABEL
(exit
) /* found a null */
90 jz LABEL
(align16_loop
)
96 * Check to see if BSF is fast on this processor. If not, use a different
97 * exit tail to find first bit set indicating null byte match.
99 testl $USE_BSF
, .memops_method(%rip)
102 lea
-16(%rdi
, %rsi
), %rax
/* calculate exact offset */
103 bsf
%edx
, %ecx
/* Least significant 1 bit is index of null */
104 lea
(%rax
, %rcx
),%rax
108 * This exit tail does not use the bsf instruction.
112 lea
-16(%rdi
, %rsi
), %rax
116 jnz LABEL
(exit_tail0
)
119 jnz LABEL
(exit_tail1
)
123 jnz LABEL
(exit_tail2
)
126 jnz LABEL
(exit_tail3
)
129 jnz LABEL
(exit_tail4
)
132 jnz LABEL
(exit_tail5
)
135 jnz LABEL
(exit_tail6
)
143 jnz LABEL
(exit_tail0
)
146 jnz LABEL
(exit_tail1
)
149 jnz LABEL
(exit_tail2
)
152 jnz LABEL
(exit_tail3
)
155 jnz LABEL
(exit_tail4
)
158 jnz LABEL
(exit_tail5
)
161 jnz LABEL
(exit_tail6
)