3 /* search.c -- searching large bodies of text.
4 Id: search.c,v 1.3 2004/04/11 17:56:46 karl Exp
6 Copyright (C) 1993, 1997, 1998, 2002, 2004 Free Software Foundation, Inc.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 Written by Brian Fox (bfox@ai.mit.edu). */
29 /* The search functions take two arguments:
31 1) a string to search for, and
33 2) a pointer to a SEARCH_BINDING which contains the buffer, start,
34 and end of the search.
36 They return a long, which is the offset from the start of the buffer
37 at which the match was found. An offset of -1 indicates failure. */
39 /* A function which makes a binding with buffer and bounds. */
41 make_binding (char *buffer
, long int start
, long int end
)
43 SEARCH_BINDING
*binding
;
45 binding
= (SEARCH_BINDING
*)xmalloc (sizeof (SEARCH_BINDING
));
46 binding
->buffer
= buffer
;
47 binding
->start
= start
;
54 /* Make a copy of BINDING without duplicating the data. */
56 copy_binding (SEARCH_BINDING
*binding
)
60 copy
= make_binding (binding
->buffer
, binding
->start
, binding
->end
);
61 copy
->flags
= binding
->flags
;
66 /* **************************************************************** */
68 /* The Actual Searching Functions */
70 /* **************************************************************** */
72 /* Search forwards or backwards for the text delimited by BINDING.
73 The search is forwards if BINDING->start is greater than BINDING->end. */
75 search (char *string
, SEARCH_BINDING
*binding
)
79 /* If the search is backwards, then search backwards, otherwise forwards. */
80 if (binding
->start
> binding
->end
)
81 result
= search_backward (string
, binding
);
83 result
= search_forward (string
, binding
);
88 /* Search forwards for STRING through the text delimited in BINDING. */
90 search_forward (char *string
, SEARCH_BINDING
*binding
)
92 register int c
, i
, len
;
93 register char *buff
, *end
;
94 char *alternate
= (char *)NULL
;
96 len
= strlen (string
);
98 /* We match characters in the search buffer against STRING and ALTERNATE.
99 ALTERNATE is a case reversed version of STRING; this is cheaper than
100 case folding each character before comparison. Alternate is only
101 used if the case folding bit is turned on in the passed BINDING. */
103 if (binding
->flags
& S_FoldCase
)
105 alternate
= xstrdup (string
);
107 for (i
= 0; i
< len
; i
++)
109 if (islower (alternate
[i
]))
110 alternate
[i
] = toupper (alternate
[i
]);
111 else if (isupper (alternate
[i
]))
112 alternate
[i
] = tolower (alternate
[i
]);
116 buff
= binding
->buffer
+ binding
->start
;
117 end
= binding
->buffer
+ binding
->end
+ 1;
119 while (buff
< (end
- len
))
121 for (i
= 0; i
< len
; i
++)
125 if ((c
!= string
[i
]) && (!alternate
|| c
!= alternate
[i
]))
133 if (binding
->flags
& S_SkipDest
)
135 return ((long) (buff
- binding
->buffer
));
147 /* Search for STRING backwards through the text delimited in BINDING. */
149 search_backward (char *input_string
, SEARCH_BINDING
*binding
)
151 register int c
, i
, len
;
152 register char *buff
, *end
;
154 char *alternate
= (char *)NULL
;
156 len
= strlen (input_string
);
158 /* Reverse the characters in the search string. */
159 string
= (char *)xmalloc (1 + len
);
160 for (c
= 0, i
= len
- 1; input_string
[c
]; c
++, i
--)
161 string
[i
] = input_string
[c
];
165 /* We match characters in the search buffer against STRING and ALTERNATE.
166 ALTERNATE is a case reversed version of STRING; this is cheaper than
167 case folding each character before comparison. ALTERNATE is only
168 used if the case folding bit is turned on in the passed BINDING. */
170 if (binding
->flags
& S_FoldCase
)
172 alternate
= xstrdup (string
);
174 for (i
= 0; i
< len
; i
++)
176 if (islower (alternate
[i
]))
177 alternate
[i
] = toupper (alternate
[i
]);
178 else if (isupper (alternate
[i
]))
179 alternate
[i
] = tolower (alternate
[i
]);
183 buff
= binding
->buffer
+ binding
->start
- 1;
184 end
= binding
->buffer
+ binding
->end
;
186 while (buff
> (end
+ len
))
188 for (i
= 0; i
< len
; i
++)
192 if (c
!= string
[i
] && (!alternate
|| c
!= alternate
[i
]))
202 if (binding
->flags
& S_SkipDest
)
204 return ((long) (1 + (buff
- binding
->buffer
)));
217 /* Find STRING in LINE, returning the offset of the end of the string.
218 Return an offset of -1 if STRING does not appear in LINE. The search
219 is bound by the end of the line (i.e., either NEWLINE or 0). */
221 string_in_line (char *string
, char *line
)
224 SEARCH_BINDING binding
;
226 /* Find the end of the line. */
227 for (end
= 0; line
[end
] && line
[end
] != '\n'; end
++);
229 /* Search for STRING within these confines. */
230 binding
.buffer
= line
;
233 binding
.flags
= S_FoldCase
| S_SkipDest
;
235 return (search_forward (string
, &binding
));
238 /* Return non-zero if STRING is the first text to appear at BINDING. */
240 looking_at (char *string
, SEARCH_BINDING
*binding
)
244 search_end
= search (string
, binding
);
246 /* If the string was not found, SEARCH_END is -1. If the string was found,
247 but not right away, SEARCH_END is != binding->start. Otherwise, the
248 string was found at binding->start. */
249 return (search_end
== binding
->start
);
252 /* **************************************************************** */
254 /* Small String Searches */
256 /* **************************************************************** */
258 /* Function names that start with "skip" are passed a string, and return
259 an offset from the start of that string. Function names that start
260 with "find" are passed a SEARCH_BINDING, and return an absolute position
261 marker of the item being searched for. "Find" functions return a value
262 of -1 if the item being looked for couldn't be found. */
264 /* Return the index of the first non-whitespace character in STRING. */
266 skip_whitespace (char *string
)
270 for (i
= 0; string
&& whitespace (string
[i
]); i
++);
274 /* Return the index of the first non-whitespace or newline character in
277 skip_whitespace_and_newlines (char *string
)
281 for (i
= 0; string
&& whitespace_or_newline (string
[i
]); i
++);
285 /* Return the index of the first whitespace character in STRING. */
287 skip_non_whitespace (char *string
)
291 for (i
= 0; string
&& string
[i
] && !whitespace (string
[i
]); i
++);
295 /* Return the index of the first non-node character in STRING. Note that
296 this function contains quite a bit of hair to ignore periods in some
297 special cases. This is because we here at GNU ship some info files which
298 contain nodenames that contain periods. No such nodename can start with
299 a period, or continue with whitespace, newline, or ')' immediately following
300 the period. If second argument NEWLINES_OKAY is non-zero, newlines should
301 be skipped while parsing out the nodename specification. */
303 skip_node_characters (char *string
, int newlines_okay
)
305 register int c
, i
= 0;
309 /* Handle special case. This is when another function has parsed out the
310 filename component of the node name, and we just want to parse out the
311 nodename proper. In that case, a period at the start of the nodename
312 indicates an empty nodename. */
313 if (string
&& *string
== '.')
316 if (string
&& *string
== '(')
323 for (; string
&& (c
= string
[i
]); i
++)
335 /* If the character following the close paren is a space or period,
336 then this node name has no more characters associated with it. */
340 ((!newlines_okay
) && (c
== '\n')) ||
341 ((paren_seen
&& string
[i
- 1] == ')') &&
342 (c
== ' ' || c
== '.')) ||
346 /* This test causes a node name ending in a period, like `This.', not to
347 be found. The trailing . is stripped. This occurs in the jargon
348 file (`I see no X here.' is a node name). */
351 (whitespace_or_newline (string
[i
+ 1])) ||
352 (string
[i
+ 1] == ')'))))
359 /* **************************************************************** */
361 /* Searching FILE_BUFFER's */
363 /* **************************************************************** */
365 /* Return the absolute position of the first occurence of a node separator in
366 BINDING-buffer. The search starts at BINDING->start. Return -1 if no node
367 separator was found. */
369 find_node_separator (SEARCH_BINDING
*binding
)
374 body
= binding
->buffer
;
376 /* A node is started by [^L]^_[^L]\n. That is to say, the C-l's are
377 optional, but the DELETE and NEWLINE are not. This separator holds
378 true for all separated elements in an Info file, including the tags
379 table (if present) and the indirect tags table (if present). */
380 for (i
= binding
->start
; i
< binding
->end
- 1; i
++)
381 if (((body
[i
] == INFO_FF
&& body
[i
+ 1] == INFO_COOKIE
) &&
382 (body
[i
+ 2] == '\n' ||
383 (body
[i
+ 2] == INFO_FF
&& body
[i
+ 3] == '\n'))) ||
384 ((body
[i
] == INFO_COOKIE
) &&
385 (body
[i
+ 1] == '\n' ||
386 (body
[i
+ 1] == INFO_FF
&& body
[i
+ 2] == '\n'))))
391 /* Return the length of the node separator characters that BODY is
392 currently pointing at. */
394 skip_node_separator (char *body
)
400 if (body
[i
] == INFO_FF
)
403 if (body
[i
++] != INFO_COOKIE
)
406 if (body
[i
] == INFO_FF
)
409 if (body
[i
++] != '\n')
415 /* Return the number of characters from STRING to the start of
418 skip_line (char *string
)
422 for (i
= 0; string
&& string
[i
] && string
[i
] != '\n'; i
++);
424 if (string
[i
] == '\n')
430 /* Return the absolute position of the beginning of a tags table in this
431 binding starting the search at binding->start. */
433 find_tags_table (SEARCH_BINDING
*binding
)
435 SEARCH_BINDING tmp_search
;
438 tmp_search
.buffer
= binding
->buffer
;
439 tmp_search
.start
= binding
->start
;
440 tmp_search
.end
= binding
->end
;
441 tmp_search
.flags
= S_FoldCase
;
443 while ((position
= find_node_separator (&tmp_search
)) != -1 )
445 tmp_search
.start
= position
;
446 tmp_search
.start
+= skip_node_separator (tmp_search
.buffer
449 if (looking_at (TAGS_TABLE_BEG_LABEL
, &tmp_search
))
455 /* Return the absolute position of the node named NODENAME in BINDING.
456 This is a brute force search, and we wish to avoid it when possible.
457 This function is called when a tag (indirect or otherwise) doesn't
458 really point to the right node. It returns the absolute position of
459 the separator preceding the node. */
461 find_node_in_binding (char *nodename
, SEARCH_BINDING
*binding
)
465 SEARCH_BINDING tmp_search
;
467 namelen
= strlen (nodename
);
469 tmp_search
.buffer
= binding
->buffer
;
470 tmp_search
.start
= binding
->start
;
471 tmp_search
.end
= binding
->end
;
472 tmp_search
.flags
= 0;
474 while ((position
= find_node_separator (&tmp_search
)) != -1)
476 tmp_search
.start
= position
;
477 tmp_search
.start
+= skip_node_separator
478 (tmp_search
.buffer
+ tmp_search
.start
);
480 offset
= string_in_line
481 (INFO_NODE_LABEL
, tmp_search
.buffer
+ tmp_search
.start
);
486 tmp_search
.start
+= offset
;
487 tmp_search
.start
+= skip_whitespace (tmp_search
.buffer
+ tmp_search
.start
);
488 offset
= skip_node_characters
489 (tmp_search
.buffer
+ tmp_search
.start
, DONT_SKIP_NEWLINES
);
491 /* Notice that this is an exact match. You cannot grovel through
492 the buffer with this function looking for random nodes. */
493 if ((offset
== namelen
) &&
494 (tmp_search
.buffer
[tmp_search
.start
] == nodename
[0]) &&
495 (strncmp (tmp_search
.buffer
+ tmp_search
.start
, nodename
, offset
) == 0))