1 /* $NetBSD: bzero.S,v 1.10 2011/01/19 02:47:01 matt Exp $ */
4 * Copyright (C) 2001 Martin J. Laubach <mjl@NetBSD.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 /*----------------------------------------------------------------------*/
31 #include <machine/asm.h>
34 #if defined(LIBC_SCCS) && !defined(lint)
35 __RCSID("$NetBSD: bzero.S,v 1.10 2011/01/19 02:47:01 matt Exp $")
36 #endif /* LIBC_SCCS && !lint */
42 #define USE_STSWX 0 /* don't. slower than trivial copy loop */
44 /*----------------------------------------------------------------------*/
46 void bzero(void *b %r3, size_t len %r4);
47 void * memset(void *b %r3, int c %r4, size_t len %r5);
49 /*----------------------------------------------------------------------*/
58 li r_val, 0 /* Value to stuff in */
66 beqlr- cr1 /* Nothing to do */
68 rlwimi %r0, %r4, 8, 16, 23 /* word extend fill value */
69 rlwimi %r0, %r0, 16, 0, 15
71 bne- simple_fill /* =! 0, use trivial fill */
74 /*----------------------------------------------------------------------*/
76 /* First find out cache line size */
81 lwz %r5,cache_info@got(%r10)
84 ori %r5,%r5,cache_info@l
88 bne+ cb_cacheline_known
90 /*----------------------------------------------------------------------*/
92 #define CPU_CACHELINE 1
93 #define CPU_CACHEINFO 5
95 #define STKFRAME_SZ 64
108 stwu %r1, -STKFRAME_SZ(%r1)
110 stw %r31, R31_SAVE(%r1)
111 mr %r31, %r5 /* cache info */
114 stw %r30, R30_SAVE(%r1)
115 PIC_TOCSETUP(cb_memset,%r30)
118 stw %r8, R8_SAVE(%r1)
119 stw %r3, R3_SAVE(%r1)
120 stw %r4, R4_SAVE(%r1)
121 stw %r0, R0_SAVE(%r1)
123 li %r0, CTL_MACHDEP /* Construct MIB */
125 li %r0, CPU_CACHEINFO
128 li %r0, 4*4 /* Oldlenp := 4*4 */
129 stw %r0, OLDPLEN(%r1)
132 li %r4, 2 /* namelen */
133 /* %r5 already contains &cache_info */
134 addi %r6, %r1, OLDPLEN
137 bl PIC_PLT(_C_LABEL(sysctl))
139 cmpwi %r3, 0 /* Check result */
142 /* Failure, try older sysctl */
144 li %r0, CTL_MACHDEP /* Construct MIB */
146 li %r0, CPU_CACHELINE
149 li %r0, 4 /* Oldlenp := 4 */
150 stw %r0, OLDPLEN(%r1)
153 li %r4, 2 /* namelen */
155 addi %r6, %r1, OLDPLEN
158 bl PIC_PLT(_C_LABEL(sysctl))
160 lwz %r3, R3_SAVE(%r1)
161 lwz %r4, R4_SAVE(%r1)
162 lwz %r8, R8_SAVE(%r1)
163 lwz %r0, R0_SAVE(%r1)
165 lwz %r31, R31_SAVE(%r1)
167 lwz %r30, R30_SAVE(%r1)
169 addi %r1, %r1, STKFRAME_SZ
173 cntlzw %r6, %r9 /* compute shift value */
181 lwz %r6, cache_sh@got(%r10)
185 stw %r5, cache_sh@l(%r6)
187 /*----------------------------------------------------------------------*/
188 /* Okay, we know the cache line size (%r9) and shift value (%r10) */
191 lwz %r5, cache_info@got(%r10)
193 lwz %r5, cache_sh@got(%r10)
196 lis %r9, cache_info+4@ha
197 lwz %r9, cache_info+4@l(%r9)
198 lis %r10, cache_sh@ha
199 lwz %r10, cache_sh@l(%r10)
203 #ifdef MULTIPROCESSOR
204 mfsprg %r10, 0 /* Get cpu_info pointer */
206 lis %r10, cpu_info_store@ha
207 addi %r10, %r10, cpu_info_store@l
209 lwz %r9, CPU_CI+4(%r10) /* Load D$ line size */
210 cntlzw %r10, %r9 /* Calculate shift.. */
214 /* Back in memory filling business */
216 cmplwi cr1, r_len, 0 /* Nothing to do? */
218 cmplw r_len, %r5 /* <= 2*CL bytes to move? */
219 beqlr- cr1 /* then do nothing */
221 blt+ simple_fill /* a trivial fill routine */
223 /* Word align the block, fill bytewise until dst even*/
225 andi. %r5, r_dst, 0x03
227 beq+ cb_aligned_w /* already aligned to word? */
229 subf %r5, %r5, %r6 /* bytes to fill to align4 */
233 add r_dst, %r5, r_dst
238 1: stbu r_val, 1(r_dst) /* Fill bytewise */
243 subf r_len, %r5, r_len
245 cb_aligned_w: /* Cache block align, fill wordwise until dst aligned */
247 /* I know I have something to do since we had > 2*CL initially */
248 /* so no need to check for r_len = 0 */
250 subi %r6, %r9, 1 /* CL mask */
254 beq cb_aligned_cb /* already on CL boundary? */
256 subf %r5, %r5, %r6 /* words to fill to alignment */
259 subf r_len, %r5, r_len
262 1: stwu r_val, 4(r_dst) /* Fill wordwise */
266 cb_aligned_cb: /* no need to check r_len, see above */
268 srw. %r5, r_len, %r10 /* Number of cache blocks */
273 subf r_len, %r5, r_len
275 1: dcbz 0, r_dst /* Clear blockwise */
276 add r_dst, r_dst, %r9
279 cblocks_done: /* still CL aligned, but less than CL bytes left */
284 blt- sf_bytewise /* <8 remaining? */
287 /*----------------------------------------------------------------------*/
291 beqlr- /* Nothing to do */
295 cmplwi cr1, r_len, 12 /* < 12 bytes to move? */
297 cmplwi cr1, r_len, 8 /* < 8 bytes to move? */
299 andi. %r5, r_dst, 0x03 /* bytes to fill to align4 */
300 blt cr1, sf_bytewise /* trivial byte mover */
304 beq+ sf_aligned_w /* dest is word aligned */
309 add r_dst, %r5, r_dst
311 mtctr %r5 /* nope, then fill bytewise */
312 subi r_dst, r_dst, 1 /* until it is */
313 1: stbu r_val, 1(r_dst)
318 subf r_len, %r5, r_len
320 sf_aligned_w: /* no need to check r_len since it were >= 8 bytes initially */
328 slwi %r5, %r5, 3 /* adjust len */
329 subf. r_len, %r5, r_len
331 1: stswi %r6, r_dst, 8
335 srwi %r5, r_len, 2 /* words to fill */
339 subf. r_len, %r5, r_len /* adjust len for fill */
342 1: stwu r_val, 4(r_dst)
347 sf_word_done: bne- sf_bytewise
349 sf_return: mr %r3, %r8 /* restore orig ptr */
350 blr /* for memset functionality */
364 1: stbu r_val, 1(r_dst)
367 mr %r3, %r8 /* restore orig ptr */
368 blr /* for memset functionality */
371 /*----------------------------------------------------------------------*/
374 cache_info: .long -1, -1, -1, -1
378 /*----------------------------------------------------------------------*/