2 (C) Copyright 2001,2006,
3 International Business Machines Corporation,
4 Sony Computer Entertainment, Incorporated,
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are met:
12 * Redistributions of source code must retain the above copyright notice,
13 this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 * Neither the names of the copyright holders nor the names of their
18 contributors may be used to endorse or promote products derived from this
19 software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 POSSIBILITY OF SUCH DAMAGE.
33 #include <spu_intrinsics.h>
35 #include "vec_literal.h"
37 /* Copy n bytes from memory area src to memory area dest.
38 * Copying is performed as if the n characters pointed to
39 * by src are first copied into a temporary array that does
40 * not overlap the src and dest arrays. Then the n characters
41 * of the temporary array are copied into the destination
42 * array. The memmove subroutine returns a pointer to dest.
45 void * memmove(void * __restrict__ dest
, const void * __restrict__ src
, size_t n
)
48 unsigned int soffset1
, soffset2
, doffset1
, doffset2
;
49 vec_uchar16
*vSrc
, *vDst
;
50 vec_uchar16 sdata1
, sdata2
, sdata
, ddata
, shuffle
;
51 vec_uchar16 mask
, mask1
, mask2
, mask3
, one
= spu_splats((unsigned char)-1);
53 soffset1
= (unsigned int)(src
) & 15;
54 doffset1
= (unsigned int)(dest
) & 15;
55 doffset2
= ((unsigned int)(dest
) + n
) & 15;
57 /* Construct a series of masks used to data insert. The masks
58 * contains 0 bit when the destination word is unchanged, 1 when it
59 * must be replaced by source bits.
61 * mask1 = mask for leading unchanged bytes
62 * mask2 = mask for trailing unchange bytes
63 * mask3 = mask indicating the more than one qword is being changed.
66 mask1
= spu_rlmaskqwbyte(mask
, -doffset1
);
67 mask2
= spu_slqwbyte(mask
, 16-doffset2
);
68 mask3
= (vec_uchar16
)spu_cmpgt(spu_splats((unsigned int)(doffset1
+ n
)), 15);
70 vDst
= (vec_uchar16
*)(dest
);
72 delta
= (int)soffset1
- (int)doffset1
;
74 /* The follow check only works if the SPU addresses are not
75 * wrapped. No provisions have been made to correct for this
78 if (((unsigned int)dest
- (unsigned int)src
) >= (unsigned int)n
) {
79 /* Forward copy. Perform a memcpy.
81 * Handle any leading destination partial quadwords as
82 * well a very short copy (ie, such that the n characters
83 * all reside in a single (destination) quadword.
85 vSrc
= (vec_uchar16
*)(src
);
86 vDst
= (vec_uchar16
*)(dest
);
88 /* Handle any leading destination partial quadwords as
89 * well a very short copy (ie, such that the n characters
90 * all reside in a single (destination) quadword.
92 soffset1
= (unsigned int)(src
) & 15;
93 doffset1
= (unsigned int)(dest
) & 15;
94 doffset2
= ((unsigned int)(dest
) + n
) & 15;
96 /* Compute a shuffle pattern used to align the source string
97 * with the alignment of the destination string.
100 adjust
= (int)spu_extract(spu_cmpgt(spu_promote(doffset1
, 0), spu_promote(soffset1
, 0)), 0);
101 delta
= (int)soffset1
- (int)doffset1
;
102 delta
+= adjust
& 16;
104 shuffle
= (vec_uchar16
)spu_add((vec_uint4
)spu_splats((unsigned char)delta
),
105 VEC_LITERAL(vec_uint4
, 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F));
113 sdata
= spu_shuffle(sdata1
, sdata2
, shuffle
);
115 /* Construct a series of masks used to data insert. The masks
116 * contain 0 when the destination word is unchanged, 1 when it
117 * must be replaced by source bytes.
119 * mask1 = mask for leading unchanged bytes
120 * mask2 = mask for trailing unchange bytes
121 * mask3 = mask indicating the more than one qword is being changed.
124 mask1
= spu_rlmaskqwbyte(mask
, -doffset1
);
125 mask2
= spu_slqwbyte(mask
, 16-doffset2
);
126 mask3
= (vec_uchar16
)spu_cmpgt(spu_splats((unsigned int)(doffset1
+ n
)), 15);
128 *vDst
++ = spu_sel(ddata
, sdata
, spu_and(mask1
, spu_or(mask2
, mask3
)));
132 /* Handle complete destination quadwords
137 *vDst
++ = spu_shuffle(sdata1
, sdata2
, shuffle
);
141 /* Handle any trailing partial (destination) quadwords
143 mask
= spu_and((vec_uchar16
)spu_cmpgt(spu_splats((unsigned int)n
), 16), mask2
);
144 *vDst
= spu_sel(*vDst
, spu_shuffle(sdata2
, *vSrc
, shuffle
), mask
);
149 * Handle any leading destination partial quadwords as
150 * well a very short copy (ie, such that the n characters
151 * all reside in a single (destination) quadword.
153 vSrc
= (vec_uchar16
*)((unsigned int)src
+ n
-1);
154 vDst
= (vec_uchar16
*)((unsigned int)dest
+ n
-1);
156 /* Handle any leading destination partial quadwords as
157 * well a very short copy (ie, such that the n characters
158 * all reside in a single (destination) quadword.
160 soffset1
= (unsigned int)(src
) & 15;
161 soffset2
= (unsigned int)(vSrc
) & 15;
162 doffset1
= (unsigned int)(dest
) & 15;
163 doffset2
= (unsigned int)(vDst
) & 15;
165 /* Compute a shuffle pattern used to align the source string
166 * with the alignment of the destination string.
168 adjust
= (int)spu_extract(spu_cmpgt(spu_promote(soffset2
, 0), spu_promote(doffset2
, 0)), 0);
169 delta
= (int)doffset2
- (int)soffset2
;
170 delta
+= adjust
& 16;
172 shuffle
= (vec_uchar16
)spu_sub(VEC_LITERAL(vec_uint4
, 0x10111213, 0x14151617, 0x18191A1B, 0x1C1D1E1F),
173 (vec_uint4
)spu_splats((unsigned char)delta
));
181 sdata
= spu_shuffle(sdata1
, sdata2
, shuffle
);
183 /* Construct a series of masks used to data insert. The masks
184 * contain 0 when the destination word is unchanged, 1 when it
185 * must be replaced by source bytes.
187 * mask1 = mask for leading unchanged bytes
188 * mask2 = mask for trailing unchange bytes
189 * mask3 = mask indicating the more than one qword is being changed.
192 mask1
= spu_rlmaskqwbyte(mask
, -doffset1
);
193 mask2
= spu_slqwbyte(mask
, 15-doffset2
);
194 mask3
= (vec_uchar16
)spu_cmpgt(spu_splats((int)(doffset2
- n
)), -2);
196 *vDst
-- = spu_sel(ddata
, sdata
, spu_and(mask2
, spu_orc(mask1
, mask3
)));
200 /* Handle complete destination quadwords
202 while ((int)n
> 15) {
205 *vDst
-- = spu_shuffle(sdata1
, sdata2
, shuffle
);
209 /* Handle any trailing partial (destination) quadwords
211 mask
= spu_and((vec_uchar16
)spu_cmpgt(spu_splats((int)n
), 0), mask1
);
212 *vDst
= spu_sel(*vDst
, spu_shuffle(*vSrc
, sdata1
, shuffle
), mask
);