1 /* $NetBSD: memset_arm.S,v 1.2 2013/01/14 19:15:13 matt Exp $ */
4 * Copyright (c) 2012 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matt Thomas of 3am Software Foundry.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
31 #include <machine/asm.h>
34 #define STORE8 vst1.32 {d0}, [ip:64]!
35 #define STORE16 vst1.32 {d0-d1}, [ip:64]!
36 #define STORE32 vst1.32 {d0-d3}, [ip:64]!
38 #define STORE8 vstmia ip!, {d0}
39 #define STORE16 vstmia ip!, {d0-d1}
40 #define STORE32 vstmia ip!, {d0-d3}
41 #elif defined(_ARM_ARCH_DWORD_OK)
42 #define STORE8 strd r2, [ip], #8
43 #define STORE16 STORE8; STORE8
44 #define STORE32 STORE16; STORE16
46 #define STORE8 stmia ip!, {r2,r3}
47 #define STORE16 STORE8; STORE8
48 #define STORE32 STORE16; STORE16
51 * memset: Sets a block of memory to the specified value
52 * Using NEON instructions
57 * r2 - number of bytes to write
62 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
64 ands r3, r1, #0xff /* We deal with bytes */
65 orrne r3, r3, r3, lsl #8 /* replicate to all bytes */
66 orrne r3, r3, r3, lsl #16 /* replicate to all bytes */
67 movs r1, r2 /* we need r2 & r3 */
68 RETc(eq) /* return if length is 0 */
69 mov ip, r0 /* r0 needs to stay the same */
71 cmp r1, #12 /* is this a small memset? *?
72 blt .Lbyte_by_byte /* then do it byte by byte */
74 /* Ok first we will dword align the address */
75 ands r2, ip, #7 /* grab the bottom three bits */
76 beq .Lmemset_dwordaligned /* The addr is dword aligned */
78 rsb r2, r2, #8 /* how far until dword aligned? */
79 sub r1, r1, r2 /* subtract it from remaining length */
80 mov r2, r3 /* duplicate fill value */
82 tst ip, #1 /* halfword aligned? */
83 strneb r3, [ip], #1 /* no, write a byte */
84 tst ip, #2 /* word aligned? */
85 strneh r3, [ip], #2 /* no, write a halfword */
86 tst ip, #4 /* dword aligned? */
87 strne r3, [ip], #4 /* no, write a word */
89 /* We are now doubleword aligned */
90 .Lmemset_dwordaligned:
92 vdup.8 q0, r3 /* move fill to SIMD */
93 vmov q1, q0 /* put fill in q1 (d2-d3) */
95 mov r2, r3 /* duplicate fill value */
96 vmov d0, r2, r3 /* move to VFP */
104 blt .Lmemset_mainloop
105 ands r2, ip, #63 /* check for 64-byte alignment */
106 beq .Lmemset_mainloop
108 * Let's align to a 64-byte boundary so that stores don't cross
109 * cacheline boundaries. We also know we have at least 128-bytes to
110 * copy so we don't have to worry about the length at the moment.
112 rsb r2, r2, #64 /* how many bytes until 64 bytes */
113 sub r1, r1, r2 /* subtract from remaining length */
114 #if !defined(NEON) && !defined(VFP)
115 mov r2, r3 /* put fill back in r2 */
118 tst ip, #8 /* quadword aligned? */
120 STORE8 /* no, store a dword */
121 1: tst ip, #16 /* octaword aligned? *?
123 STORE16 /* no, store a quadword */
124 2: tst ip, #32 /* 32 word aligned? */
125 beq .Lmemset_mainloop /* yes */
126 STORE32 /* no, make 64-byte aligned */
130 #if !defined(NEON) && !defined(VFP)
131 mov r2, r3 /* put fill back in r2 */
133 subs r1, r1, #64 /* subtract an initial 64 */
134 blt .Lmemset_lessthan_64bytes
136 3: STORE32 /* store first octaword */
137 STORE32 /* store second octaword */
138 RETc(eq) /* return if done */
139 subs r1, r1, #64 /* subtract another 64 */
140 bge 3b /* and do other if still >= 0 */
141 .Lmemset_lessthan_64bytes:
142 tst r1, #32 /* do we have 16 bytes left? */
143 beq .Lmemset_lessthan_32bytes
144 STORE32 /* yes, store an octaword */
145 bics r1, r1, #32 /* subtract 16 */
146 RETc(eq) /* return if length is 0 */
147 .Lmemset_lessthan_32bytes:
148 tst r1, #16 /* do we have 16 bytes left? */
149 beq .Lmemset_lessthan_16bytes
150 STORE16 /* yes, store a quadword */
151 bics r1, r1, #16 /* subtract 16 */
152 RETc(eq) /* return if length is 0 */
153 .Lmemset_lessthan_16bytes:
154 tst r1, #8 /* do we have 8 bytes left? */
155 beq .Lmemset_lessthan_8bytes/* no */
156 STORE8 /* yes, store a dword */
157 bics r1, r1, #8 /* subtract 8 */
158 RETc(eq) /* return if length is 0 */
159 .Lmemset_lessthan_8bytes:
160 tst r1, #4 /* do we have a word left? */
161 strne r2, [ip], #4 /* yes, so write one */
162 tst r1, #2 /* do we have a halfword left? */
163 strneh r2, [ip], #2 /* yes, so write one */
164 tst r1, #1 /* do we have a byte left? */
165 strneb r2, [ip], #1 /* yes, so write one */
169 subs r1, r1, #1 /* can we write a byte? */
170 RETc(lt) /* no, we're done */
171 strb r3, [ip], #1 /* yes, so do it */
172 b .Lbyte_by_byte /* try next byte */