1 /* Copyright (C) 1996, 2000, 2003 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by David Mosberger (davidm@cs.arizona.edu).
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 /* Finds characters in a memory area. Optimized for the Alpha:
22 - memory accessed as aligned quadwords only
23 - uses cmpbge to compare 8 bytes in parallel
24 - does binary search to find 0 byte in last
25 quadword (HAKMEM needed 12 instructions to
26 do this instead of the 9 instructions that
29 For correctness consider that:
31 - only minimum number of quadwords may be accessed
32 - the third argument is an unsigned long
50 # Hack -- if someone passes in (size_t)-1, hoping to just
51 # search til the end of the address space, we will overflow
52 # below when we find the address of the last byte. Given
53 # that we will never have a 56-bit address space, cropping
54 # the length is the easiest way to avoid trouble.
55 zap a2, 0x80, t4 #-e0 :
57 beq a2, $not_found # .. e1 :
58 ldq_u t0, 0(a0) # e1 : load first quadword
59 insbl a1, 1, t1 # .. e0 : t1 = 000000000000ch00
60 and a1, 0xff, a1 #-e0 : a1 = 00000000000000ch
61 cmpult a2, 9, t3 # .. e1 :
62 or t1, a1, a1 # e0 : a1 = 000000000000chch
63 lda t2, -1(zero) # .. e1 :
64 sll a1, 16, t1 #-e0 : t1 = 00000000chch0000
65 addq a0, t4, t4 # .. e1 :
66 or t1, a1, a1 # e1 : a1 = 00000000chchchch
68 sll a1, 32, t1 #-e0 : t1 = chchchch00000000
69 or t1, a1, a1 # e1 : a1 = chchchchchchchch
70 extql t0, a0, t6 # e0 :
71 beq t3, $first_quad # .. e1 :
73 ldq_u t5, -1(t4) #-e1 : eight or less bytes to search
74 extqh t5, a0, t5 # .. e0 :
76 or t6, t5, t0 # .. e1 : t0 = quadword starting at a0
78 # Deal with the case where at most 8 bytes remain to be searched
81 # t0 = ????c6c5c4c3c2c1
84 xor a1, t0, t0 # .. e1 :
85 srl t2, t5, t5 # e0 : t5 = mask of a2 bits set
86 cmpbge zero, t0, t1 # .. e1 :
88 beq t1, $not_found # .. e1 :
91 # Now, determine which byte matched:
95 and t1, 0x0f, t0 #-e0 :
96 addq v0, 4, t2 # .. e1 :
97 cmoveq t0, t2, v0 # e0 :
99 addq v0, 2, t2 # .. e1 :
100 and t1, 0x33, t0 #-e0 :
101 cmoveq t0, t2, v0 # .. e1 :
103 and t1, 0x55, t0 # e0 :
104 addq v0, 1, t2 # .. e1 :
105 cmoveq t0, t2, v0 #-e0 :
109 # Deal with the case where a2 > 8 bytes remain to be
110 # searched. a0 may not be aligned.
113 andnot a0, 0x7, v0 #-e1 :
114 insqh t2, a0, t1 # .. e0 : t1 = 0000ffffffffffff (a0<0:2> ff)
115 xor t0, a1, t0 # e0 :
116 or t0, t1, t0 # e1 : t0 = ====ffffffffffff
117 cmpbge zero, t0, t1 #-e0 :
118 bne t1, $found_it # .. e1 :
120 # At least one byte left to process.
123 subq t4, 1, a2 # .. e1 :
124 addq v0, 8, v0 #-e0 :
126 # Make a2 point to last quad to be accessed (the
127 # last quad may or may not be partial).
129 andnot a2, 0x7, a2 # .. e1 :
130 cmpult v0, a2, t1 # e0 :
131 beq t1, $final # .. e1 :
133 # At least two quads remain to be accessed.
135 subq a2, v0, t3 #-e0 : t3 <- nr quads to be processed
136 and t3, 8, t3 # e1 : odd number of quads?
137 bne t3, $odd_quad_count # e1 :
139 # At least three quads remain to be accessed
141 mov t0, t3 # e0 : move prefetched value to correct reg
145 ldq t0, 8(v0) #-e0 : prefetch t0
146 xor a1, t3, t1 # .. e1 :
147 cmpbge zero, t1, t1 # e0 :
148 bne t1, $found_it # .. e1 :
150 addq v0, 8, v0 #-e0 :
152 xor a1, t0, t1 # .. e1 :
153 ldq t3, 8(v0) # e0 : prefetch t3
154 cmpbge zero, t1, t1 # .. e1 :
155 addq v0, 8, t5 #-e0 :
156 bne t1, $found_it # .. e1 :
158 cmpult t5, a2, t5 # e0 :
159 addq v0, 8, v0 # .. e1 :
160 bne t5, $unrolled_loop #-e1 :
162 mov t3, t0 # e0 : move prefetched value into t0
163 $final: subq t4, v0, a2 # .. e1 : a2 <- number of bytes left to do
164 bne a2, $last_quad # e1 :
172 weak_alias (__memchr, memchr)
173 #if !__BOUNDED_POINTERS__
174 weak_alias (__memchr, __ubp_memchr)
176 libc_hidden_builtin_def (memchr)