1 /* $Id: VISmemset.S,v 1.9 1999/05/25 16:53:01 jj Exp $
2 * VISmemset.S: High speed memset operations utilizing the UltraSparc
3 * Visual Instruction Set.
5 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
6 * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
12 #define SET_BLOCKS(base, offset, source) \
13 stx source, [base - offset - 0x18]; \
14 stx source, [base - offset - 0x10]; \
15 stx source, [base - offset - 0x08]; \
16 stx source, [base - offset - 0x00];
18 #define SET_BLOCKS(base, offset, source) \
19 stw source, [base - offset - 0x18]; \
20 stw source, [base - offset - 0x14]; \
21 stw source, [base - offset - 0x10]; \
22 stw source, [base - offset - 0x0c]; \
23 stw source, [base - offset - 0x08]; \
24 stw source, [base - offset - 0x04]; \
25 stw source, [base - offset - 0x00]; \
26 stw source, [base - offset + 0x04];
30 /* So that the brz,a,pt in memset doesn't have to get through PLT, here we go... */
36 #include <asm/visasm.h>
40 #define RETL mov %g3, %o0
43 /* Well, memset is a lot easier to get right than bcopy... */
53 brz,a,pt %o1, bzero_private
108 1: andcc %o5, 16, %g0
121 1: andcc %o5, 32, %g0
123 andncc %o2, 0x3f, %o3
147 wr %g0, ASI_BLK_P, %asi
148 membar #StoreStore | #LoadStore
162 stda %f0, [%o0 + 0x00] %asi
165 stda %f0, [%o0 + 0x40] %asi
166 stda %f0, [%o0 + 0x80] %asi
169 10: stda %f0, [%o0 + 0x00] %asi
170 stda %f0, [%o0 + 0x40] %asi
171 stda %f0, [%o0 + 0x80] %asi
172 stda %f0, [%o0 + 0xc0] %asi
173 11: subcc %o3, 256, %o3
182 wr %g0, FPRS_FEF, %fprs
185 membar #StoreLoad | #StoreStore
186 9: andcc %o2, 0x78, %g5
193 jmpl %o4 + %lo(13f), %g0
203 jmpl %o4 + (13f - 14b), %g0
206 12: SET_BLOCKS(%o0, 0x68, %o1)
207 SET_BLOCKS(%o0, 0x48, %o1)
208 SET_BLOCKS(%o0, 0x28, %o1)
209 SET_BLOCKS(%o0, 0x08, %o1)
238 andncc %o2, 0x3f, %o3