1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
6 #include <linux/linkage.h>
10 * The memset implementation below is optimized to use prefetchw and prealloc
11 * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
12 * If you want to implement optimized memset for other possible L1 data cache
13 * line lengths (32B and 128B) you should rewrite code carefully checking
14 * we don't call any prefetchw/prealloc instruction for L1 cache lines which
15 * don't belongs to memset area.
18 #if L1_CACHE_SHIFT == 6
20 .macro PREALLOC_INSTR reg, off
24 .macro PREFETCHW_INSTR reg, off
25 prefetchw [\reg, \off]
30 .macro PREALLOC_INSTR reg, off
33 .macro PREFETCHW_INSTR reg, off
39 PREFETCHW_INSTR r0, 0 ; Prefetch the first write location
43 mov r3, r0 ; don't clobber ret val
46 brls.d.nt r2, 8, .Lsmallchunk
51 lpnz @.Laligndestination
57 ;;; Destination is aligned
71 ;;; Convert len to Dwords, unfold x8
72 lsr.f lp_count, lp_count, 6
76 PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching
78 #ifdef CONFIG_ARC_HAS_LL64
107 lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
110 #ifdef CONFIG_ARC_HAS_LL64
127 and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
139 ; adjust bzero args to memset args
141 b.d memset ;tail call so need to tinker with blink