1 /* ANSI C standard library function memset.
3 Copyright (c) 2001-2008 Tensilica Inc.
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
13 The above copyright notice and this permission notice shall be included
14 in all copies or substantial portions of the Software.
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
24 #include "xtensa-asm.h"
26 /* void *memset (void *dst, int c, size_t length)
28 The algorithm is as follows:
30 Create a word with c in all byte positions.
32 If the destination is aligned, set 16B chunks with a loop, and then
33 finish up with 8B, 4B, 2B, and 1B stores conditional on the length.
35 If the destination is unaligned, align it by conditionally
36 setting 1B and/or 2B and then go to aligned case.
38 This code tries to use fall-through branches for the common
39 case of an aligned destination (except for the branches to
40 the alignment labels). */
43 /* Byte-by-byte set. */
47 .align XCHAL_INST_FETCH_WIDTH
51 /* Skip bytes to get proper alignment for three-byte loop */
52 .skip XCHAL_INST_FETCH_WIDTH - 3
59 add a6, a5, a4 // a6 = ending address
62 #if XTENSA_ESP32_PSRAM_CACHE_FIX
72 /* Destination is unaligned. */
76 .Ldst1mod2: // dst is only byte aligned
78 /* Do short sizes byte-by-byte. */
79 bltui a4, 8, .Lbyteset
85 #if XTENSA_ESP32_PSRAM_CACHE_FIX
89 /* Now retest if dst is aligned. */
90 _bbci.l a5, 1, .Ldstaligned
92 .Ldst2mod4: // dst has 16-bit alignment
94 /* Do short sizes byte-by-byte. */
95 bltui a4, 8, .Lbyteset
101 #if XTENSA_ESP32_PSRAM_CACHE_FIX
105 /* dst is now aligned; return to main algorithm */
111 .type memset, @function
114 /* a2 = dst, a3 = c, a4 = length */
116 /* Duplicate character into all bytes of word. */
123 mov a5, a2 // copy dst so that a2 is return value
125 /* Check if dst is unaligned. */
126 _bbsi.l a2, 0, .Ldst1mod2
127 _bbsi.l a2, 1, .Ldst2mod4
130 /* Get number of loop iterations with 16B per iteration. */
133 #if XTENSA_ESP32_PSRAM_CACHE_FIX
134 //do not do this if we have less than one iteration to do
136 //this seems to work to prefetch the cache line
141 /* Destination is word-aligned. */
147 add a6, a6, a5 // a6 = end of last 16B chunk
149 /* Set 16 bytes per iteration. */
155 #if !XCHAL_HAVE_LOOPS
159 /* Set any leftover pieces smaller than 16B. */
178 #if XTENSA_ESP32_PSRAM_CACHE_FIX
186 #if XTENSA_ESP32_PSRAM_CACHE_FIX
193 .size memset, . - memset