1 /* Copyright (C) 1996 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by David Mosberger (davidm@cs.arizona.edu).
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with the GNU C Library; see the file COPYING.LIB. If not,
17 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
20 /* Finds characters in a memory area. Optimized for the Alpha
23 - memory accessed as aligned quadwords only
24 - uses cmpbge to compare 8 bytes in parallel
25 - does binary search to find 0 byte in last
26 quadword (HAKMEM needed 12 instructions to
27 do this instead of the 9 instructions that
30 For correctness consider that:
32 - only minimum number of quadwords may be accessed
33 - the third argument is an unsigned long
52 ldq_u t0, 0(a0) # load first quadword (a0 may be misaligned)
54 and a1, 0xff, a1 # a1 = 00000000000000ch
55 sll a1, 8, t1 # t1 = 000000000000ch00
57 or t1, a1, a1 # a1 = 000000000000chch
58 sll a1, 16, t1 # t1 = 00000000chch0000
60 or t1, a1, a1 # a1 = 00000000chchchch
61 sll a1, 32, t1 # t1 = chchchch00000000
63 or t1, a1, a1 # a1 = chchchchchchchch
70 or t6, t5, t0 # t0 = quadword starting at a0
73 # Deal with the case where at most 8 bytes remain to be searched
76 # t0 = ????c6c5c4c3c2c1
79 srl t2, t5, t5 # t5 = mask of a2 bits set
86 # now, determine which byte matched:
106 # Deal with the case where a2 > 8 bytes remain to be
107 # searched. a0 may not be aligned.
111 insqh t2, a0, t1 # t1 = 0000ffffffffffff (a0<0:2> ff bytes)
113 or t0, t1, t0 # t0 = ====ffffffffffff
117 /* at least one byte left to process */
122 * Make a2 point to last quad to be accessed (the
123 * last quad may or may not be partial).
130 /* at least two quads remain to be accessed */
132 subq a2, v0, t3 # t3 <- number of quads to be processed in loop
133 and t3, 8, t3 # odd number of quads?
134 bne t3, $odd_quad_count
136 /* at least three quads remain to be accessed */
138 mov t0, t3 # move prefetched value into correct register
142 ldq t0, 8(v0) # prefetch t0
150 ldq t3, 8(v0) # prefetch t3
156 bne t5, $unrolled_loop
158 mov t3, t0 # move prefetched value into t0
159 $final: subq t4, v0, a2 # a2 <- number of bytes left to do