1 /* $NetBSD: memset.S,v 1.2 2013/03/17 02:12:41 christos Exp $ */
4 * Copyright (c) 1996-2002 Eduardo Horvath
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 #include "strmacros.h"
27 #if defined(LIBC_SCCS) && !defined(lint)
28 RCSID("$NetBSD: memset.S,v 1.2 2013/03/17 02:12:41 christos Exp $")
29 #endif /* LIBC_SCCS and not lint */
33 * XXXXXXXXXXXXXXXXXXXX
34 * We need to make sure that this doesn't use floating point
35 * before our trap handlers are installed or we could panic
36 * XXXXXXXXXXXXXXXXXXXX
39 * memset(addr, c, len)
41 * We want to use VIS instructions if we're clearing out more than
42 * 256 bytes, but to do that we need to properly save and restore the
43 * FP registers. Unfortunately the code to do that in the kernel needs
44 * to keep track of the current owner of the FPU, hence the different
47 * XXXXX To produce more efficient code, we do not allow lengths
48 * greater than 0x80000000000000000, which are negative numbers.
49 * This should not really be an issue since the VA hole should
50 * cause any such ranges to fail anyway.
52 #if !defined(_KERNEL) || defined(_RUMPKERNEL)
54 ! %o0 = addr, %o1 = len
59 ! %o0 = addr, %o1 = pattern, %o2 = len
60 mov %o0, %o4 ! Save original pointer
63 btst 7, %o0 ! Word aligned?
67 deccc %o2 ! Store up to 7 bytes
68 bge,a,pt CCCR, Lmemset_internal
71 retl ! Duplicate Lmemset_done
75 * Duplicate the pattern so it fills 64-bits.
77 andcc %o1, 0x0ff, %o1 ! No need to extend zero
79 sllx %o1, 8, %o3 ! sigh. all dependent insns.
86 #ifdef USE_BLOCK_STORE_LOAD
87 !! Now we are 64-bit aligned
88 cmp %o2, 256 ! Use block clear if len > 256
89 bge,pt CCCR, Lmemset_block ! use block store insns
90 #endif /* USE_BLOCK_STORE_LOAD */
93 bl,pn CCCR, Lmemset_cleanup ! Less than 8 bytes left
99 stx %o1, [%o0 - 8] ! Do 1 longword at a time
102 * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
103 * -6 => two bytes, etc. Mop up this remainder, if any.
107 bz,pt CCCR, 5f ! if (len & 4) {
109 stw %o1, [%o0] ! *(int *)addr = 0;
110 inc 4, %o0 ! addr += 4;
113 bz,pt CCCR, 7f ! if (len & 2) {
115 sth %o1, [%o0] ! *(short *)addr = 0;
116 inc 2, %o0 ! addr += 2;
119 bnz,a %icc, Lmemset_done ! if (len & 1)
120 stb %o1, [%o0] ! *addr = 0;
123 mov %o4, %o0 ! Restore ponter for memset (ugh)
125 #ifdef USE_BLOCK_STORE_LOAD
127 sethi %hi(block_disable), %o3
128 ldx [ %o3 + %lo(block_disable) ], %o3
129 brnz,pn %o3, Lmemset_longs
130 !! Make sure our trap table is installed
131 set _C_LABEL(trapbase), %o5
134 brnz,pn %o3, Lmemset_longs ! No, then don't use block load/store
139 * Here we use VIS instructions to do a block clear of a page.
140 * But before we can do that we need to save and enable the FPU.
141 * The last owner of the FPU registers is fplwp, and
142 * fplwp->l_md.md_fpstate is the current fpstate. If that's not
143 * null, call savefpstate() with it to store our current fp state.
145 * Next, allocate an aligned fpstate on the stack. We will properly
146 * nest calls on a particular stack so this should not be a problem.
148 * Now we grab either curlwp (or if we're on the interrupt stack
149 * lwp0). We stash its existing fpstate in a local register and
150 * put our new fpstate in curlwp->p_md.md_fpstate. We point
151 * fplwp at curlwp (or lwp0) and enable the FPU.
153 * If we are ever preempted, our FPU state will be saved in our
154 * fpstate. Then, when we're resumed and we take an FPDISABLED
155 * trap, the trap handler will be able to fish our FPU state out
156 * of curlwp (or lwp0).
158 * On exiting this routine we undo the damage: restore the original
159 * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable
166 !! We are now 8-byte aligned. We need to become 64-byte aligned.
178 brz %i1, 3f ! Skip the memory op
179 fzero %f0 ! if pattern is 0
182 stx %i1, [%i0] ! Flush this puppy to RAM
186 stw %i1, [%i0] ! Flush this puppy to RAM
189 fmovsa %icc, %f0, %f1
193 fmovd %f0, %f2 ! Duplicate the pattern
201 !! Remember: we were 8 bytes too far
202 dec 56, %i2 ! Go one iteration too far
204 stda %f0, [%i0] ASI_STORE ! Store 64 bytes
205 deccc BLOCK_SIZE, %i2
211 * We've saved our possible fpstate, now disable the fpu
212 * and continue with life.
215 addcc %i2, 56, %i2 ! Restore the count
216 ba,pt %xcc, Lmemset_longs ! Finish up the remainder
218 #endif /* USE_BLOCK_STORE_LOAD */