4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
29 #include <sys/asm_linkage.h>
31 ANSI_PRAGMA_WEAK
(memset
,function
)
34 pushl
%edi
/ save register variable
35 movl
8(%esp
),%edi
/ %edi
= string address
36 movl
12(%esp
),%eax
/ %al
= byte to duplicate
37 movl
16(%esp
),%ecx
/ %ecx
= number of copies
39 / For all basic blocks in this routine
, maintain the following
40 / entry conditions
: %eax each byte is set to desired byte.
41 / NOTE
: .byteset doesn't require this
42 / %ecx contains # bytes to set
43 / %edi contain address to set
45 cld / make sure we go the right way...
46 cmpl $20,%ecx / strings with fewer than 20 chars should be byte set
49 andl $0xff, %eax / trim anything above low byte
50 imul $0x01010101, %eax / extend low byte to each byte
52 cmpl $256, %ecx / smaller areas don't benefit from alignment
55 cmpl $
511, %ecx
/ areas smaller than this should
be wordset
59 / prep work for sse temporal
and non-temporal
62 pushl
%ebx
/ more registers are needed
63 pushl
%esi
/ for alignment work
66 / align address to
64 byte boundaries.
69 movl
%ecx
, %ebx
/ save byte count
70 movl
%edi
, %esi
/ esi is scratch register
71 andl $
63, %esi
/ bytes to align to
64 byte align addr
72 neg %esi
/ compute count of bytes
73 addl $
64, %esi
/ needed to align
74 andl $
63, %esi
/ to
64 byte align addr
75 jz
.sse_aligned / skip alignment if not needed
76 subl
%esi
, %ebx
/ ebx contains remainder of bytes to set
77 movl
%esi
, %ecx
/ alignment bytes
78 shrl $
2,%ecx
/ %ecx
= number of words to set
81 andl $
3,%ecx
/ %ecx
= number of bytes left
83 movl
%ebx
, %ecx
/ remainder to
be set
87 shr $
6, %ecx
/ number of
64 byte blocks to set
90 / load xmm0 with bytes to
be set
92 subl $
16,%esp
/ give ourselves some working room on the stack
93 movl
%eax
,(%esp
) / copy eax into each of
4 bytes
94 movl
%eax
,4(%esp
) / avoid pushl since it causes more interlocking
97 movups
(%esp
), %xmm0
/ unaligned load from stack into xmm0
98 addl $
16,%esp
/ restore stack position
100 cmpl $
262143, %ebx
/ blocks smaller than this allocate in the cache
102 jmp
.sse_nt_loop / branch across alignment nops
107 movntps
%xmm0
, (%edi
) / block non-temporal store
108 movntps
%xmm0
, 16(%edi
) / use sse rather than sse2
109 movntps
%xmm0
, 32(%edi
) / so we work more places
110 movntps
%xmm0
, 48(%edi
) /
112 addl $
64, %edi
/ increment dest address
113 dec %ecx
/ dec count of blocks
114 jnz
.sse_nt_loop / jump if not done
116 andl $
63, %ebx
/ remainder of bytes to copy
117 movl
%ebx
, %ecx
/ ecx contains remainer of bytes to set
118 popl
%esi
/ restore stack config
120 #if defined(_SSE2_INSN)
122 #elif defined(_SSE_INSN)
125 #error "Must have either SSE or SSE2"
127 cmpl $
20, %ecx
/ compare
and jump accordingly
133 movaps
%xmm0
, (%edi
) / block copy w
/ SSE
134 movaps
%xmm0
, 16(%edi
)
135 movaps
%xmm0
, 32(%edi
)
136 movaps
%xmm0
, 48(%edi
)
138 addl $
64, %edi
/ increment addr
139 dec %ecx
/ dec count of blocks
140 jnz
.sse_loop / jump if not done
142 andl $
63, %ebx
/ remainder of bytes to copy
143 movl
%ebx
, %ecx
/ in
%ecx as normal
144 popl
%esi
/ restore stack config
151 movl
%edi
, %edx
/ save current store ptr
152 andl $
7, %edi
/ check alignment
153 movl
%edx
,%edi
/ %edi
= string address
158 pushl
%ebx
/ more registers are needed
167 subl
%esi
, %ebx
/ ebx contains remainder of bytes to copy
171 popl
%esi
/ restore stack config
175 movl
%ecx
, %edx
/ save cont
176 shrl $
2,%ecx
/ %ecx
= number of words to set
179 andl $
3,%ecx
/ %ecx
= number of bytes left
183 movl
8(%esp
),%eax
/ return string address
184 popl
%edi
/ restore register variable