Fix up mix of man(7)/mdoc(7).
[netbsd-mini2440.git] / sys / lib / libkern / arch / hppa / bcopy.S
blobbedf8cccd1c94abc1b568c85f609407b16f51b9c
1 /*      $NetBSD: bcopy.S,v 1.9 2009/12/01 09:06:17 skrll Exp $  */
3 /*
4  * Copyright (c) 2002 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Matthew Fredette.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
33  * Copy routines for NetBSD/hppa.
34  */
36 #undef _LOCORE
37 #define _LOCORE /* XXX fredette - unfortunate */
39 #include <machine/asm.h>
40 #include <machine/frame.h>
41 #include <machine/reg.h>
43 #if defined(LIBC_SCCS) && !defined(lint)
44 RCSID("$NetBSD: bcopy.S,v 1.9 2009/12/01 09:06:17 skrll Exp $")
45 #endif /* LIBC_SCCS and not lint */
48  * The stbys instruction is a little asymmetric.  When (%r2 & 3)
49  * is zero, stbys,b,m %r1, 4(%r2) works like stws,ma.  You
50  * might then wish that when (%r2 & 3) == 0, stbys,e,m %r1, -4(%r2)
51  * worked like stws,mb.  But it doesn't.
52  *
53  * This macro works around this problem.  It requires that %t2
54  * hold the number of bytes that will be written by this store
55  * (meaning that it ranges from one to four).
56  *
57  * Watch the delay-slot trickery here.  The comib is used to set
58  * up which instruction, either the stws or the stbys, is run
59  * in the delay slot of the b instruction.
60  */
61 #define _STBYS_E_M(r, dst_spc, dst_off)                           \
62         comib,<>        4, %t2, 4                               ! \
63         b               4                                       ! \
64         stws,mb         r, -4(dst_spc, dst_off)                 ! \
65         stbys,e,m       r, 0(dst_spc, dst_off)
68  * This macro does a bulk copy with no shifting.  cmplt and m are
69  * the completer and displacement multiplier, respectively, for
70  * the load and store instructions.
71  */
72 #define _COPY(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \
73                                                                 ! \
74         /*                                                      ! \
75          * Loop storing 16 bytes at a time.  Since count        ! \
76          * may be > INT_MAX, we have to be careful and          ! \
77          * avoid comparisons that treat it as a signed          ! \
78          * quantity, until after this loop, when count          ! \
79          * is guaranteed to be less than 16.                    ! \
80          */                                                     ! \
81         comib,>>=,n     15, count, _LABEL(_skip16)              ! \
82 .label _LABEL(_loop16)                                          ! \
83         addi            -16, count, count                       ! \
84         ldws,cmplt      m*4(src_spc, src_off), %t1              ! \
85         ldws,cmplt      m*4(src_spc, src_off), %t2              ! \
86         ldws,cmplt      m*4(src_spc, src_off), %t3              ! \
87         ldws,cmplt      m*4(src_spc, src_off), %t4              ! \
88         stws,cmplt      %t1, m*4(dst_spc, dst_off)              ! \
89         stws,cmplt      %t2, m*4(dst_spc, dst_off)              ! \
90         stws,cmplt      %t3, m*4(dst_spc, dst_off)              ! \
91         comib,<<        15, count, _LABEL(_loop16)              ! \
92         stws,cmplt      %t4, m*4(dst_spc, dst_off)              ! \
93 .label _LABEL(_skip16)                                          ! \
94                                                                 ! \
95         /* Loop storing 4 bytes at a time. */                   ! \
96         addib,<,n       -4, count, _LABEL(_skip4)               ! \
97 .label _LABEL(_loop4)                                           ! \
98         ldws,cmplt      m*4(src_spc, src_off), %t1              ! \
99         addib,>=        -4, count, _LABEL(_loop4)               ! \
100         stws,cmplt      %t1, m*4(dst_spc, dst_off)              ! \
101 .label _LABEL(_skip4)                                           ! \
102         /* Restore the correct count. */                        ! \
103         addi            4, count, count                         ! \
104                                                                 ! \
105 .label _LABEL(_do1)                                             ! \
106                                                                 ! \
107         /* Loop storing 1 byte at a time. */                    ! \
108         addib,<,n       -1, count, _LABEL(_skip1)               ! \
109 .label _LABEL(_loop1)                                           ! \
110         ldbs,cmplt      m*1(src_spc, src_off), %t1              ! \
111         addib,>=        -1, count, _LABEL(_loop1)               ! \
112         stbs,cmplt      %t1, m*1(dst_spc, dst_off)              ! \
113 .label _LABEL(_skip1)                                           ! \
114         /* Restore the correct count. */                        ! \
115         b               _LABEL(_done)                           ! \
116         addi            1, count, count
119  * This macro is definitely strange.  It exists purely to
120  * allow the _COPYS macro to be reused, but because it
121  * requires this long attempt to explain it, I'm starting
122  * to doubt the value of that.
124  * Part of the expansion of the _COPYS macro below are loops
125  * that copy four words or one word at a time, performing shifts
126  * to get data to line up correctly in the destination buffer.
128  * The _COPYS macro is used when copying backwards, as well
129  * as forwards.  The 4-word loop always loads into %t1, %t2, %t3,
130  * and %t4 in that order.  This means that when copying forward,
131  * %t1 will have the word from the lowest address, and %t4 will
132  * have the word from the highest address.  When copying
133  * backwards, the opposite is true.
135  * The shift instructions need pairs of registers with adjacent
136  * words, with the register containing the word from the lowest
137  * address *always* coming first.  It is this assymetry that
138  * gives rise to this macro - depending on which direction
139  * we're copying in, these ordered pairs are different.
141  * Fortunately, we can compute those register numbers at compile
142  * time, and assemble them manually into a shift instruction.
143  * That's what this macro does.
145  * This macro takes two arguments.  n ranges from 0 to 3 and
146  * is the "shift number", i.e., n = 0 means we're doing the
147  * shift for what will be the first store.
149  * m is the displacement multiplier from the _COPYS macro call.
150  * This is 1 for a forward copy and -1 for a backwards copy.
151  * So, the ((m + 1) / 2) term yields 0 for a backwards copy and
152  * 1 for a forward copy, and the ((m - 1) / 2) term yields
153  * 0 for a forward copy, and -1 for a backwards copy.
154  * These terms are used to discriminate the register computations
155  * below.
157  * When copying forward, then, the first register used with
158  * the first vshd will be 19 + (3 - ((0 - 1) & 3)), or %t4,
159  * which matches _COPYS' requirement that the word last loaded
160  * be in %t4.  The first register used for the second vshd
161  * will then "wrap" around to 19 + (3 - ((1 - 1) & 3)), or %t1.
162  * And so on to %t2 and %t3.
164  * When copying forward, the second register used with the first
165  * vshd will be (19 + (3 - ((n + 0) & 3)), or %t1.  It will
166  * continue to be %t2, then %t3, and finally %t4.
168  * When copying backwards, the values for the first and second
169  * register for each vshd are reversed from the forwards case.
170  * (Symmetry reclaimed!)  Proving this is "left as an exercise
171  * for the reader" (remember the different discriminating values!)
172  */
173 #define _VSHD(n, m, t)                                            \
174         .word (0xd0000000                                       | \
175         ((19 + (3 - ((n - 1 * ((m + 1) / 2)) & 3))) << 16)      | \
176         ((19 + (3 - ((n + 1 * ((m - 1) / 2)) & 3))) << 21)      | \
177         (t))
180  * This macro does a bulk copy with shifting.  cmplt and m are
181  * the completer and displacement multiplier, respectively, for
182  * the load and store instructions.  It is assumed that the
183  * word last loaded is already in %t4.
184  */
185 #define _COPYS(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \
186                                                                 ! \
187         /*                                                      ! \
188          * Loop storing 16 bytes at a time.  Since count        ! \
189          * may be > INT_MAX, we have to be careful and          ! \
190          * avoid comparisons that treat it as a signed          ! \
191          * quantity, until after this loop, when count          ! \
192          * is guaranteed to be less than 16.                    ! \
193          */                                                     ! \
194         comib,>>=,n     15, count, _LABEL(S_skip16)             ! \
195 .label _LABEL(S_loop16)                                         ! \
196         addi            -16, count, count                       ! \
197         ldws,cmplt      m*4(src_spc, src_off), %t1              ! \
198         ldws,cmplt      m*4(src_spc, src_off), %t2              ! \
199         ldws,cmplt      m*4(src_spc, src_off), %t3              ! \
200         _VSHD(0, m, 1)  /* vshd %t4, %t1, %r1 */                ! \
201         ldws,cmplt      m*4(src_spc, src_off), %t4              ! \
202         _VSHD(1, m, 22) /* vshd %t1, %t2, %t1 */                ! \
203         _VSHD(2, m, 21) /* vshd %t2, %t3, %t2 */                ! \
204         _VSHD(3, m, 20) /* vshd %t3, %t4, %t3 */                ! \
205         stws,cmplt      %r1, m*4(dst_spc, dst_off)              ! \
206         stws,cmplt      %t1, m*4(dst_spc, dst_off)              ! \
207         stws,cmplt      %t2, m*4(dst_spc, dst_off)              ! \
208         comib,<<        15, count, _LABEL(S_loop16)             ! \
209         stws,cmplt      %t3, m*4(dst_spc, dst_off)              ! \
210 .label _LABEL(S_skip16)                                         ! \
211                                                                 ! \
212         /* Loop storing 4 bytes at a time. */                   ! \
213         addib,<,n       -4, count, _LABEL(S_skip4)              ! \
214 .label _LABEL(S_loop4)                                          ! \
215         ldws,cmplt      m*4(src_spc, src_off), %t1              ! \
216         _VSHD(0, m, 1)  /* into %r1 (1) */                      ! \
217         copy            %t1, %t4                                ! \
218         addib,>=        -4, count, _LABEL(S_loop4)              ! \
219         stws,cmplt      %r1, m*4(dst_spc, dst_off)              ! \
220 .label _LABEL(S_skip4)                                          ! \
221                                                                 ! \
222         /*                                                      ! \
223          * We now need to "back up" src_off by the              ! \
224          * number of bytes remaining in the FIFO                ! \
225          * (i.e., the number of bytes remaining in %t4),        ! \
226          * because (the correct) count still includes           ! \
227          * these bytes, and we intent to keep it that           ! \
228          * way, and finish with the single-byte copier.         ! \
229          *                                                      ! \
230          * The number of bytes remaining in the FIFO is         ! \
231          * related to the shift count, so recover it,           ! \
232          * restoring the correct count at the same time.        ! \
233          */                                                     ! \
234         mfctl   %cr11, %t1                                      ! \
235         addi    4, count, count                                 ! \
236         shd     %r0, %t1, 3, %t1                                ! \
237                                                                 ! \
238         /*                                                      ! \
239          * If we're copying forward, the shift count            ! \
240          * is the number of bytes remaining in the              ! \
241          * FIFO, and we want to subtract it from src_off.       ! \
242          * If we're copying backwards, (4 - shift count)        ! \
243          * is the number of bytes remaining in the FIFO,        ! \
244          * and we want to add it to src_off.                    ! \
245          *                                                      ! \
246          * We observe that x + (4 - y) = x - (y - 4),           ! \
247          * and introduce this instruction to add -4 when        ! \
248          * m is -1, although this does mean one extra           ! \
249          * instruction in the forward case.                     ! \
250          */                                                     ! \
251         addi    4*((m - 1) / 2), %t1, %t1                       ! \
252                                                                 ! \
253         /* Now branch to the byte-at-a-time loop. */            ! \
254         b       _LABEL(_do1)                                    ! \
255         sub     src_off, %t1, src_off
258  * This macro copies a region in the forward direction.
259  */
260 #define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count)  \
261                                                                 ! \
262         /*                                                      ! \
263          * Since in the shifting-left case we will              ! \
264          * load 8 bytes before checking count, to               ! \
265          * keep things simple, branch to the byte               ! \
266          * copier unless we're copying at least 8.              ! \
267          */                                                     ! \
268         comib,>>,n      8, count, _LABEL(_do1)                  ! \
269                                                                 ! \
270         /*                                                      ! \
271          * Once we 4-byte align the source offset,              ! \
272          * figure out how many bytes from the region            ! \
273          * will be in the first 4-byte word we read.            ! \
274          * Ditto for writing the destination offset.            ! \
275          */                                                     ! \
276         extru           src_off, 31, 2, %t1                     ! \
277         extru           dst_off, 31, 2, %t2                     ! \
278         subi            4, %t1, %t1                             ! \
279         subi            4, %t2, %t2                             ! \
280                                                                 ! \
281         /*                                                      ! \
282          * Calculate the byte shift required.  A                ! \
283          * positive value means a source 4-byte word            ! \
284          * has to be shifted to the right to line up            ! \
285          * as a destination 4-byte word.                        ! \
286          */                                                     ! \
287         sub             %t1, %t2, %t1                           ! \
288                                                                 ! \
289         /* 4-byte align src_off. */                             ! \
290         depi            0, 31, 2, src_off                       ! \
291                                                                 ! \
292         /*                                                      ! \
293          * It's somewhat important to note that this            ! \
294          * code thinks of count as "the number of bytes         ! \
295          * that haven't been stored yet", as opposed to         ! \
296          * "the number of bytes that haven't been copied        ! \
297          * yet".  The distinction is subtle, but becomes        ! \
298          * apparent at the end of the shifting code, where      ! \
299          * we "back up" src_off to correspond to count,         ! \
300          * as opposed to flushing the FIFO.                     ! \
301          *                                                      ! \
302          * We calculated above how many bytes our first         ! \
303          * store will store, so update count now.               ! \
304          *                                                      ! \
305          * If the shift is zero, strictly as an optimization    ! \
306          * we use a copy loop that does no shifting.            ! \
307          */                                                     ! \
308         comb,<>         %r0, %t1, _LABEL(_shifting)             ! \
309         sub             count, %t2, count                       ! \
310                                                                 ! \
311         /* Load and store the first word. */                    ! \
312         ldws,ma         4(src_spc, src_off), %t4                ! \
313         stbys,b,m       %t4, 4(dst_spc, dst_off)                ! \
314                                                                 ! \
315         /* Do the rest of the copy. */                          ! \
316         _COPY(src_spc,src_off,dst_spc,dst_off,count,ma,1)       ! \
317                                                                 ! \
318 .label _LABEL(_shifting)                                        ! \
319                                                                 ! \
320         /*                                                      ! \
321          * If shift < 0, we need to shift words to the          ! \
322          * left.  Since we can't do this directly, we           ! \
323          * adjust the shift so it's a shift to the right        ! \
324          * and load the first word into the high word of        ! \
325          * the FIFO.  Otherwise, we load a zero into the        ! \
326          * high word of the FIFO.                               ! \
327          */                                                     ! \
328         comb,<=         %r0, %t1, _LABEL(_shiftingrt)           ! \
329         copy            %r0, %t3                                ! \
330         addi            4, %t1, %t1                             ! \
331         ldws,ma         4(src_spc, src_off), %t3                ! \
332 .label _LABEL(_shiftingrt)                                      ! \
333                                                                 ! \
334         /*                                                      ! \
335          * Turn the shift byte count into a bit count,          ! \
336          * load the next word, set the Shift Amount             ! \
337          * Register, and form and store the first word.         ! \
338          */                                                     ! \
339         sh3add          %t1, %r0, %t1                           ! \
340         ldws,ma         4(src_spc, src_off), %t4                ! \
341         mtctl           %t1, %cr11                              ! \
342         vshd            %t3, %t4, %r1                           ! \
343         stbys,b,m       %r1, 4(dst_spc, dst_off)                ! \
344                                                                 ! \
345         /* Do the rest of the copy. */                          ! \
346         _COPYS(src_spc,src_off,dst_spc,dst_off,count,ma,1)
348 /* This macro copies a region in the reverse direction. */
349 #define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count)  \
350                                                                 ! \
351         /* Immediately add count to both offsets. */            ! \
352         add     src_off, count, src_off                         ! \
353         add     dst_off, count, dst_off                         ! \
354                                                                 ! \
355         /*                                                      ! \
356          * Since in the shifting-right case we                  ! \
357          * will load 8 bytes before checking                    ! \
358          * count, to keep things simple, branch                 ! \
359          * to the byte copier unless we're                      ! \
360          * copying at least 8 bytes.                            ! \
361          */                                                     ! \
362         comib,>>,n      8, count, _LABEL(_do1)                  ! \
363                                                                 ! \
364         /*                                                      ! \
365          * Once we 4-byte align the source offset,              ! \
366          * figure out how many bytes from the region            ! \
367          * will be in the first 4-byte word we read.            ! \
368          * Ditto for writing the destination offset.            ! \
369          */                                                     ! \
370         extru,<>        src_off, 31, 2, %t1                     ! \
371         ldi             4, %t1                                  ! \
372         extru,<>        dst_off, 31, 2, %t2                     ! \
373         ldi             4, %t2                                  ! \
374                                                                 ! \
375         /*                                                      ! \
376          * Calculate the byte shift required.  A                ! \
377          * positive value means a source 4-byte                 ! \
378          * word has to be shifted to the right to               ! \
379          * line up as a destination 4-byte word.                ! \
380          */                                                     ! \
381         sub             %t2, %t1, %t1                           ! \
382                                                                 ! \
383         /*                                                      ! \
384          * 4-byte align src_off, leaving it pointing            ! \
385          * to the 4-byte word *after* the next word             ! \
386          * we intend to load.                                   ! \
387          *                                                      ! \
388          * It's somewhat important to note that this            ! \
389          * code thinks of count as "the number of bytes         ! \
390          * that haven't been stored yet", as opposed to         ! \
391          * "the number of bytes that haven't been copied        ! \
392          * yet".  The distinction is subtle, but becomes        ! \
393          * apparent at the end of the shifting code, where      ! \
394          * we "back up" src_off to correspond to count,         ! \
395          * as opposed to flushing the FIFO.                     ! \
396          *                                                      ! \
397          * We calculated above how many bytes our first         ! \
398          * store will store, so update count now.               ! \
399          *                                                      ! \
400          * If the shift is zero, we use a copy loop that        ! \
401          * does no shifting.  NB: unlike the forward case,      ! \
402          * this is NOT strictly an optimization.  If the        ! \
403          * SAR is zero the vshds do NOT do the right thing.     ! \
404          * This is another assymetry more or less the "fault"   ! \
405          * of vshd.                                             ! \
406          */                                                     ! \
407         addi            3, src_off, src_off                     ! \
408         sub             count, %t2, count                       ! \
409         comb,<>         %r0, %t1, _LABEL(_shifting)             ! \
410         depi            0, 31, 2, src_off                       ! \
411                                                                 ! \
412         /* Load and store the first word. */                    ! \
413         ldws,mb         -4(src_spc, src_off), %t4               ! \
414         _STBYS_E_M(%t4, dst_spc, dst_off)                       ! \
415                                                                 ! \
416         /* Do the rest of the copy. */                          ! \
417         _COPY(src_spc,src_off,dst_spc,dst_off,count,mb,-1)      ! \
418                                                                 ! \
419 .label _LABEL(_shifting)                                        ! \
420                                                                 ! \
421         /*                                                      ! \
422          * If shift < 0, we need to shift words to the          ! \
423          * left.  Since we can't do this directly, we           ! \
424          * adjust the shift so it's a shift to the right        ! \
425          * and load a zero in to the low word of the FIFO.      ! \
426          * Otherwise, we load the first word into the           ! \
427          * low word of the FIFO.                                ! \
428          *                                                      ! \
429          * Note the nullification trickery here.  We            ! \
430          * assume that we're shifting to the left, and          ! \
431          * load zero into the low word of the FIFO.  Then       ! \
432          * we nullify the addi if we're shifting to the         ! \
433          * right.  If the addi is not nullified, we are         ! \
434          * shifting to the left, so we nullify the load.        ! \
435          * we branch if we're shifting to the                   ! \
436          */                                                     ! \
437         copy            %r0, %t3                                ! \
438         comb,<=,n       %r0, %t1, 0                             ! \
439         addi,tr         4, %t1, %t1                             ! \
440         ldws,mb         -4(src_spc, src_off), %t3               ! \
441                                                                 ! \
442         /*                                                      ! \
443          * Turn the shift byte count into a bit count,          ! \
444          * load the next word, set the Shift Amount             ! \
445          * Register, and form and store the first word.         ! \
446          */                                                     ! \
447         sh3add          %t1, %r0, %t1                           ! \
448         ldws,mb         -4(src_spc, src_off), %t4               ! \
449         mtctl           %t1, %cr11                              ! \
450         vshd            %t4, %t3, %r1                           ! \
451         _STBYS_E_M(%r1, dst_spc, dst_off)                       ! \
452                                                                 ! \
453         /* Do the rest of the copy. */                          ! \
454         _COPYS(src_spc,src_off,dst_spc,dst_off,count,mb,-1)
457  * For paranoia, when things aren't going well, enable this
458  * code to assemble byte-at-a-time-only copying.
459  */
460 #if 1
461 #undef _COPY_FORWARD
462 #define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count)  \
463         comb,=,n        %r0, count, _LABEL(_done)               ! \
464         ldbs,ma         1(src_spc, src_off), %r1                ! \
465         addib,<>        -1, count, -12                          ! \
466         stbs,ma         %r1, 1(dst_spc, dst_off)                ! \
467         b,n             _LABEL(_done)
468 #undef _COPY_REVERSE
469 #define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count)  \
470         comb,=          %r0, count, _LABEL(_done)               ! \
471         add             src_off, count, src_off                 ! \
472         add             dst_off, count, dst_off                 ! \
473         ldbs,mb         -1(src_spc, src_off), %r1               ! \
474         addib,<>        -1, count, -12                          ! \
475         stbs,mb         %r1, -1(dst_spc, dst_off)               ! \
476         b,n             _LABEL(_done)
477 #endif
480  * If none of the following are defined, define BCOPY.
481  */
482 #if !(defined(SPCOPY) || defined(MEMCPY) || defined(MEMMOVE))
483 #define BCOPY
484 #endif
486 #if defined(SPCOPY) && !defined(_STANDALONE)
487 #include <sys/errno.h>
488 #include "assym.h"
491  * int spcopy(pa_space_t ssp, const void *src, pa_space_t dsp, void *dst,
492  *      size_t len)
494  * We assume that the regions do not overlap.
495  */
496 LEAF_ENTRY(spcopy)
498         /*
499          * Setup the fault handler, and load %ret0
500          * with EFAULT, assuming the copy will fail.
501          */
502         mfctl   CR_CURLWP, %r31
503 #ifdef  DIAGNOSTIC
504         comb,<>,n %r0, %r31, Lspcopy_curlwp_ok
505         ldil    L%panic, %r1
506         ldil    L%Lspcopy_curlwp_bad, %arg0
507         ldo     R%panic(%r1), %r1
508         ldo     R%Lspcopy_curlwp_bad(%arg0), %arg0
509         .call
510         bv,n    %r0(%r1)
511         nop
512 Lspcopy_curlwp_bad:
513         .asciz  "spcopy: curlwp == NULL\n"
514         .align  8
515 Lspcopy_curlwp_ok:
516 #endif /* DIAGNOSTIC */
517         ldil    L%spcopy_fault, %r1
518         ldw     L_PCB(%r31), %r31
519         ldo     R%spcopy_fault(%r1), %r1
520         ldi     EFAULT, %ret0
521         stw     %r1, PCB_ONFAULT(%r31)
523         /* Setup the space registers. */
524         mfsp    %sr2, %ret1
525         mtsp    %arg0, %sr1
526         mtsp    %arg2, %sr2
528         /* Get the len argument and do the copy. */
529         ldw     HPPA_FRAME_ARG(4)(%sp), %arg0
530 #define _LABEL(l) __CONCAT(spcopy,l)
531         _COPY_FORWARD(%sr1,%arg1,%sr2,%arg3,%arg0)
532 _LABEL(_done):
534         /* Return. */
535         copy    %r0, %ret0
536 ALTENTRY(spcopy_fault)
537         stw     %r0, PCB_ONFAULT(%r31)
538         bv      %r0(%rp)
539         mtsp    %ret1, %sr2
540 EXIT(spcopy)
541 #endif /* SPCOPY && !_STANDALONE */
543 #ifdef MEMCPY
545  * void *memcpy(void *restrict dst, const void *restrict src, size_t len);
547  * memcpy is specifically restricted to working on
548  * non-overlapping regions, so we can just copy forward.
549  */
550 LEAF_ENTRY(memcpy)
551         copy    %arg0, %ret0
552 #define _LABEL(l) __CONCAT(memcpy,l)
553         _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2)
554 _LABEL(_done):
555         bv,n    %r0(%rp)
556         nop
557 EXIT(memcpy)
558 #endif /* MEMCPY */
560 #ifdef BCOPY
562  * void bcopy(const void *src, void *dst, size_t len);
563  */
564 LEAF_ENTRY(bcopy)
565         copy    %arg0, %r1
566         copy    %arg1, %arg0
567         copy    %r1, %arg1
568         /* FALLTHROUGH */
569 #define _LABEL_F(l) __CONCAT(bcopy_F,l)
570 #define _LABEL_R(l) __CONCAT(bcopy_R,l)
571 #endif
573 #ifdef MEMMOVE
575  * void *memmove(void *dst, const void *src, size_t len);
576  */
577 LEAF_ENTRY(memmove)
578 #define _LABEL_F(l) __CONCAT(memmove_F,l)
579 #define _LABEL_R(l) __CONCAT(memmove_R,l)
580         copy    %arg0, %ret0
581 #endif /* MEMMOVE */
583 #if defined(BCOPY) || defined(MEMMOVE)
585         /*
586          * If src >= dst or src + len <= dst, we copy
587          * forward, else we copy in reverse.
588          */
589         add             %arg1, %arg2, %r1
590         comb,>>=,n      %arg1, %arg0, 0
591         comb,>>,n       %r1, %arg0, _LABEL_R(_go)
593 #define _LABEL _LABEL_F
594         _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2)
595 #undef _LABEL
597 _LABEL_R(_go):
598 #define _LABEL _LABEL_R
599         _COPY_REVERSE(%sr0,%arg1,%sr0,%arg0,%arg2)
600 #undef _LABEL
602 _LABEL_F(_done):
603 _LABEL_R(_done):
604         bv,n    %r0(%rp)
605         nop
606 #ifdef BCOPY
607 EXIT(bcopy)
608 #else
609 EXIT(memmove)
610 #endif
611 #endif /* BCOPY || MEMMOVE */