1 /* pcresearch.c - searching subroutines using PCRE for grep.
2 Copyright 2000, 2007, 2009-2010 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
19 /* Written August 1992 by Mike Haertel. */
25 #elif HAVE_PCRE_PCRE_H
26 # include <pcre/pcre.h>
30 /* Compiled internal form of a Perl regular expression. */
33 /* Additional information about the pattern. */
34 static pcre_extra
*extra
;
38 Pcompile (char const *pattern
, size_t size
)
41 error (EXIT_TROUBLE
, 0, "%s",
42 _("support for the -P option is not compiled into "
43 "this --disable-perl-regexp binary"));
47 char *re
= xmalloc (4 * size
+ 7);
48 int flags
= PCRE_MULTILINE
| (match_icase
? PCRE_CASELESS
: 0);
49 char const *patlim
= pattern
+ size
;
54 /* FIXME: Remove these restrictions. */
55 if (memchr(pattern
, '\n', size
))
56 error (EXIT_TROUBLE
, 0, _("the -P option only supports a single pattern"));
65 /* The PCRE interface doesn't allow NUL bytes in the pattern, so
66 replace each NUL byte in the pattern with the four characters
67 "\000", removing a preceding backslash if there are an odd
68 number of backslashes before the NUL.
70 FIXME: This method does not work with some multibyte character
71 encodings, notably Shift-JIS, where a multibyte character can end
72 in a backslash byte. */
73 for (p
= pattern
; (pnul
= memchr (p
, '\0', patlim
- p
)); p
= pnul
+ 1)
75 memcpy (n
, p
, pnul
- p
);
77 for (p
= pnul
; pattern
< p
&& p
[-1] == '\\'; p
--)
84 memcpy (n
, p
, patlim
- p
);
92 cre
= pcre_compile (re
, flags
, &ep
, &e
, pcre_maketables ());
94 error (EXIT_TROUBLE
, 0, "%s", ep
);
96 extra
= pcre_study (cre
, 0, &ep
);
98 error (EXIT_TROUBLE
, 0, "%s", ep
);
105 Pexecute (char const *buf
, size_t size
, size_t *match_size
,
106 char const *start_ptr
)
112 /* This array must have at least two elements; everything after that
113 is just for performance improvement in pcre_exec. */
116 const char *line_buf
, *line_end
, *line_next
;
117 int e
= PCRE_ERROR_NOMATCH
;
118 ptrdiff_t start_ofs
= start_ptr
? start_ptr
- buf
: 0;
120 /* PCRE can't limit the matching to single lines, therefore we have to
121 match each line in the buffer separately. */
122 for (line_next
= buf
;
123 e
== PCRE_ERROR_NOMATCH
&& line_next
< buf
+ size
;
124 start_ofs
-= line_next
- line_buf
)
126 line_buf
= line_next
;
127 line_end
= memchr (line_buf
, eolbyte
, (buf
+ size
) - line_buf
);
128 if (line_end
== NULL
)
129 line_next
= line_end
= buf
+ size
;
131 line_next
= line_end
+ 1;
133 if (start_ptr
&& start_ptr
>= line_end
)
136 e
= pcre_exec (cre
, extra
, line_buf
, line_end
- line_buf
,
137 start_ofs
< 0 ? 0 : start_ofs
, 0,
138 sub
, sizeof sub
/ sizeof *sub
);
145 case PCRE_ERROR_NOMATCH
:
148 case PCRE_ERROR_NOMEMORY
:
149 error (EXIT_TROUBLE
, 0, _("memory exhausted"));
157 /* Narrow down to the line we've found. */
158 char const *beg
= line_buf
+ sub
[0];
159 char const *end
= line_buf
+ sub
[1];
160 char const *buflim
= buf
+ size
;
164 /* FIXME: The case when '\n' is not found indicates a bug:
165 Since grep is line oriented, the match should never contain
166 a newline, so there _must_ be a newline following.
168 if (!(end
= memchr (end
, eol
, buflim
- end
)))
172 while (buf
< beg
&& beg
[-1] != eol
)
176 *match_size
= end
- beg
;