1 /*******************************************************************************
3 * Copyright (c) 1993 Intel Corporation
5 * Intel hereby grants you permission to copy, modify, and distribute this
6 * software and its documentation. Intel grants this permission provided
7 * that the above copyright notice appears in all copies and that both the
8 * copyright notice and this permission notice appear in supporting
9 * documentation. In addition, Intel grants this permission provided that
10 * you prominently mark as "not part of the original" any modifications
11 * made to this software or documentation, and that the name of Intel
12 * Corporation not be used in advertising or publicity pertaining to
13 * distribution of the software or the documentation without specific,
14 * written prior permission.
16 * Intel Corporation provides this AS IS, WITHOUT ANY WARRANTY, EXPRESS OR
17 * IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTY OF MERCHANTABILITY
18 * OR FITNESS FOR A PARTICULAR PURPOSE. Intel makes no guarantee or
19 * representations regarding the use of, or the results of the use of,
20 * the software and documentation in terms of correctness, accuracy,
21 * reliability, currentness, or otherwise; and you rely on the software,
22 * documentation and results solely at your own risk.
24 * IN NO EVENT SHALL INTEL BE LIABLE FOR ANY LOSS OF USE, LOSS OF BUSINESS,
25 * LOSS OF PROFITS, INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES
26 * OF ANY KIND. IN NO EVENT SHALL INTEL'S TOTAL LIABILITY EXCEED THE SUM
27 * PAID TO INTEL FOR THE PRODUCT LICENSED HEREUNDER.
29 ******************************************************************************/
39 * (c) copyright 1988,1992,1993 Intel Corp., all rights reserved
43 procedure memmove (optimized assembler version for the CA)
44 procedure memcpy (optimized assembler version for the CA)
46 dest_addr = memmove (dest_addr, src_addr, len)
47 dest_addr = memcpy (dest_addr, src_addr, len)
49 copy len bytes pointed to by src_addr to the space pointed to by
50 dest_addr. Return the original dest_addr.
52 Memcpy will fail if the source and destination string overlap
53 (in particular, if the end of the source is overlapped by the
54 beginning of the destination). The behavior is undefined.
55 This is acceptable according to the draft C standard.
56 Memmove will not fail if overlap exists.
58 Undefined behavior will also occur if the end of the source string
59 (i.e. the terminating null byte) is in the last word of the program's
60 allocated memory space. This is so because, in several cases, the
61 routine will fetch ahead one word. Disallowing the fetch ahead would
62 impose a severe performance penalty.
64 This program handles five cases:
66 1) both arguments start on a word boundary
67 2) neither are word aligned, but they are offset by the same amount
68 3) source is word aligned, destination is not
69 4) destination is word aligned, source is not
70 5) neither is word aligned, and they are offset by differing amounts
72 At the time of this writing, only g0 thru g7 and g13 are available
73 for use in this leafproc; other registers would have to be saved and
74 restored. These nine registers, plus tricky use of g14 are sufficient
75 to implement the routine. The registers are used as follows:
77 g0 dest ptr; not modified, so that it may be returned
78 g1 src ptr; shift count
80 g3 src ptr (word aligned)
81 g4 dest ptr (word aligned)
82 g5 -4 for Lbackwards move
84 g6 lsw of double word for extraction of 4 bytes
85 g7 msw of double word for extraction of 4 bytes
87 g6 msw of double word for extraction of 4 bytes
88 g7 lsw of double word for extraction of 4 bytes
93 #if __i960_BIG_ENDIAN__
101 .globl _memmove, _memcpy
102 .globl __memmove, __memcpy
103 .leafproc _memmove, __memmove
104 .leafproc _memcpy, __memcpy
111 lda Lrett-(.+8)(ip),g14
115 cmpibge.f 0,g2,Lquick_exit # Lexit if number of bytes to move is <= zero.
116 cmpo g0,g1 # if dest starts earlier than src ...
117 lda (g14),g13 # preserve return address
118 addo g2,g1,g5 # compute addr of byte after last byte of src
119 be.f Lexit_code # no move necessary if src and dest are same
120 concmpo g5,g0 # ... or if dest starts after end of src ...
121 notand g1,3,g3 # extract word addr of start of src
122 bg.f Lbackwards # ... then drop thru, else do move backwards
123 cmpo g3,g1 # check alignment of src
124 ld (g3),LSW # fetch word containing at least first byte
125 notand g0,3,g4 # extract word addr of start of dest
126 lda 4(g3),g3 # advance src word addr
127 bne.f Lcase_245 # branch if src is NOT word aligned
130 cmpo g0,g4 # check alignment of dest
131 subo 4,g4,g4 # store is pre-incrementing; back up dest addr
132 be.t Lcase_1 # branch if dest word aligned
134 Lcase_3: # src is word aligned; dest is not
135 addo 8,g4,g4 # move dest word ptr to first word boundary
136 lda (g0),g1 # copy dest byte ptr
137 mov LSW,MSW # make copy of first word of src
138 lda 32,g14 # initialize shift count to zero (mod 32)
141 Lcase_3_cloop_at_start: # character copying loop for start of dest str
142 cmpdeci 0,g2,g2 # is max_bytes exhausted?
143 be.f Lexit_code # Lexit if max_bytes is exhausted
144 #if __i960_BIG_ENDIAN__
145 rotate 8,MSW,MSW # move next byte into position for extraction
146 subo 8,g14,g14 # augment the shift counter
147 stob MSW,(g1) # store the byte in dest
149 addo 8,g14,g14 # augment the shift counter
150 stob MSW,(g1) # store the byte in dest
151 shro 8,MSW,MSW # move next byte into position for extraction
153 lda 1(g1),g1 # post-increment dest ptr
154 cmpobne.t g1,g4,Lcase_3_cloop_at_start # branch if reached word boundary
156 ld (g3),MSW # fetch msw of operand for double shift
160 cmpi g2,4 # less than four bytes to move?
161 lda 4(g3),g3 # post-increment src word addr
162 eshro g14,g6,g1 # extract 4 bytes of src
163 bl.f Lcase_3_cloop # branch if < four bytes left to move
164 mov MSW,LSW # move msw to lsw
165 ld (g3),MSW # pre-fetch msw of operand for double shift
166 subi 4,g2,g2 # decrease max_byte count by the 4 bytes moved
167 st g1,(g4) # store 4 bytes to dest
168 addo 4,g4,g4 # post-increment dest ptr
171 Lcase_1_wloop: # word copying loop
172 subi 4,g2,g2 # decrease max_byte count by the 4 bytes moved
173 ld (g3),LSW # pre-fetch next word of src
174 addo 4,g3,g3 # post-increment src addr
175 st g1,(g4) # store word in dest string
176 Lcase_1: # src and dest are word aligned
177 cmpi g2,4 # check for fewer than four bytes to move
178 addo 4,g4,g4 # pre-increment dest addr
179 lda (LSW),g1 # keep a copy of the src word
180 bge.t Lcase_1_wloop # branch if at least four bytes to copy
182 cmpibe.f 0,g2,Lexit_code # Lexit if max_bytes is exhausted
185 #if __i960_BIG_ENDIAN__
186 rotate 8,g1,g1 # move next byte into position for extraction
189 stob g1,(g4) # store the byte in dest
191 lda 1(g4),g4 # post-increment dest byte addr
192 #if ! __i960_BIG_ENDIAN__
193 shro 8,g1,g1 # move next byte into position for extraction
195 bne.t Lcase_1_cloop # Lexit if max_bytes is exhausted
198 mov 0,g14 # conform to register conventions
199 bx (g13) # g0 = addr of dest; g14 = 0
205 cmpo g0,g4 # check alignment of dest
206 ld (g3),MSW # pre-fetch second half
207 and 3,g1,g1 # compute shift count
209 #if __i960_BIG_ENDIAN__
210 subo g14,0,g14 # adjust shift count for big endian
212 be.t Lcase_4 # branch if dest is word aligned
213 or g4,g1,g1 # is src earlier in word, later, or sync w/ dst
214 cmpo g0,g1 # < indicates first word of dest has more bytes
215 lda 4(g4),g4 # move dest word addr to first word boundary
216 eshro g14,g6,g5 # extract four bytes
218 #if __i960_BIG_ENDIAN__
224 lda 4(g3),g3 # move src word addr to second word boundary
231 notand g5,3,MSW # extract word addr of byte after end of src
232 cmpo MSW,g5 # check alignment of end of src
233 subo 4,MSW,g3 # retreat src word addr
234 addo g2,g0,g1 # compute addr of byte after end of dest
235 notand g1,3,g4 # extract word addr of start of dest
236 bne.f Lcase.245 # branch if src is NOT word aligned
239 cmpo g1,g4 # check alignment of dest
240 ld (g3),MSW # fetch last word of src
241 subo 4,g3,g3 # retreat src word addr
242 be.t Lcase.1 # branch if dest word aligned
244 Lcase.3: # src is word aligned; dest is not
245 mov MSW,LSW # make copy of first word of src
246 lda 32,g14 # initialize shift count to zero (mod 32)
249 Lcase.3_cloop_at_start: # character copying loop for start of dest str
250 cmpdeci 0,g2,g2 # is max.bytes exhausted?
251 be.f Lexit_code # Lexit if max_bytes is exhausted
252 #if ! __i960_BIG_ENDIAN__
253 rotate 8,LSW,LSW # move next byte into position for storing
255 lda -1(g1),g1 # pre-decrement dest ptr
256 cmpo g1,g4 # have we reached word boundary in dest yet?
257 stob LSW,(g1) # store the byte in dest
258 #if __i960_BIG_ENDIAN__
259 shro 8,LSW,LSW # move next byte into position for storing
260 addo 8,g14,g14 # augment the shift counter
262 subo 8,g14,g14 # augment the shift counter
264 bne.t Lcase.3_cloop_at_start # branch if reached word boundary?
266 ld (g3),LSW # fetch lsw of operand for double shift
268 #if __i960_BIG_ENDIAN__
269 cmpobne 0,g14,Lcase.3_wloop
271 cmpi g2,4 # less than four bytes to move?
272 lda -4(g3),g3 # post-decrement src word addr
273 mov MSW,g1 # extract 4 bytes of src
274 lda (LSW),MSW # move lsw to msw
275 subo 4,g4,g4 # pre-decrement dest ptr
276 bl.f Lcase.3_cloop # branch if < four bytes left to move
277 ld (g3),LSW # pre-fetch lsw of operand for double shift
278 subi 4,g2,g2 # decrease max.byte count by the 4 bytes moved
279 st g1,(g4) # store 4 bytes to dest
285 cmpi g2,4 # less than four bytes to move?
286 lda -4(g3),g3 # post-decrement src word addr
287 eshro g14,g6,g1 # extract 4 bytes of src
288 lda (LSW),MSW # move lsw to msw
289 subo 4,g4,g4 # pre-decrement dest ptr
290 bl.f Lcase.3_cloop # branch if < four bytes left to move
291 ld (g3),LSW # pre-fetch lsw of operand for double shift
292 subi 4,g2,g2 # decrease max.byte count by the 4 bytes moved
293 st g1,(g4) # store 4 bytes to dest
296 Lcase.1_wloop: # word copying loop
297 subi 4,g2,g2 # decrease max.byte count by the 4 bytes moved
298 ld (g3),MSW # pre-fetch next word of src
299 subo 4,g3,g3 # post-decrement src addr
300 st g1,(g4) # store word in dest string
301 Lcase.1: # src and dest are word aligned
302 cmpi g2,4 # check for fewer than four bytes to move
303 subo 4,g4,g4 # pre-decrement dest addr
304 lda (MSW),g1 # keep a copy of the src word
305 bge.t Lcase.1_wloop # branch if at least four bytes to copy
307 cmpibe.f 0,g2,Lexit_code # Lexit if max_bytes is exhausted
308 #if ! __i960_BIG_ENDIAN__
309 rotate 8,g1,g1 # move next byte into position for storing
311 lda 4(g4),g4 # pre-decremented dest addr 4 too much
314 subi 1,g4,g4 # pre-decrement dest byte addr
315 cmpi g4,g0 # has dest ptr reached beginning of dest?
316 stob g1,(g4) # store the byte in dest
317 #if __i960_BIG_ENDIAN__
318 shro 8,g1,g1 # move next byte into position for storing
320 rotate 8,g1,g1 # move next byte into position for storing
322 bne.t Lcase.1_cloop # Lexit if move is completed
326 cmpo g1,g4 # check alignment of dest
327 ld (MSW),MSW # pre-fetch word with at least last byte
328 and 3,g5,g5 # compute shift count
329 ld (g3),LSW # pre-fetch second to last word
331 #if __i960_BIG_ENDIAN__
332 subo g14,0,g14 # adjust shift count for big endian
334 be.t Lcase.4 # branch if dest is word aligned
335 or g4,g5,g5 # is src earlier in word, later, or sync w/ dst
336 cmpo g1,g5 # < indicates last word of dest has less bytes
337 eshro g14,g6,g5 # extract four bytes
340 #if ! __i960_BIG_ENDIAN__
343 subo 4,g3,g3 # move src word addr to second word boundary