1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
6 #include <linux/linkage.h>
10 * The memset implementation below is optimized to use prefetchw and prealloc
11 * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
12 * If you want to implement optimized memset for other possible L1 data cache
13 * line lengths (32B and 128B) you should rewrite code carefully checking
14 * we don't call any prefetchw/prealloc instruction for L1 cache lines which
15 * don't belongs to memset area.
18 #if L1_CACHE_SHIFT == 6
20 .macro PREALLOC_INSTR reg, off
24 .macro PREFETCHW_INSTR reg, off
25 prefetchw [\reg, \off]
30 .macro PREALLOC_INSTR reg, off
33 .macro PREFETCHW_INSTR reg, off
42 mov r3, r0 ; don't clobber ret val
44 PREFETCHW_INSTR r0, 0 ; Prefetch the first write location
47 brls.d.nt r2, 8, .Lsmallchunk
52 lpnz @.Laligndestination
58 ;;; Destination is aligned
72 ;;; Convert len to Dwords, unfold x8
73 lsr.f lp_count, lp_count, 6
77 PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching
79 #ifdef CONFIG_ARC_HAS_LL64
108 lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
111 #ifdef CONFIG_ARC_HAS_LL64
128 and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
140 ; adjust bzero args to memset args
142 b.d memset ;tail call so need to tinker with blink