2 * regex.c: String and regex operations for odt2txt
4 * Copyright (c) 2006-2009 Dennis Stosberg <dennis@stosberg.net>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License,
8 * version 2 as published by the Free Software Foundation
16 static char *headline(char line
, const char *buf
, regmatch_t matches
[],
17 size_t nmatch
, size_t off
);
18 static size_t charlen_utf8(const char *s
);
20 static void print_regexp_err(int reg_errno
, const regex_t
*rx
)
22 char *buf
= ymalloc(BUF_SZ
);
24 regerror(reg_errno
, rx
, buf
, BUF_SZ
);
25 fprintf(stderr
, "%s\n", buf
);
30 int regex_subst(STRBUF
*buf
,
31 const char *regex
, int regopt
,
41 const size_t nmatches
= 10;
42 regmatch_t matches
[10];
44 r
= regcomp(&rx
, regex
, REG_EXTENDED
);
46 print_regexp_err(r
, &rx
);
51 if (off
> strbuf_len(buf
))
54 bufp
= strbuf_get(buf
) + off
;
58 matches
[0].rm_eo
= strbuf_len(buf
) - off
;
60 if (0 != regexec(&rx
, bufp
, nmatches
, matches
, REG_STARTEND
))
62 if (0 != regexec(&rx
, bufp
, nmatches
, matches
, 0))
66 if (matches
[i
].rm_so
!= -1) {
70 if (regopt
& _REG_EXEC
) {
72 (const char *buf
, regmatch_t matches
[],
73 size_t nmatch
, size_t off
))subst
)
74 (strbuf_get(buf
), matches
, nmatches
, off
);
78 subst_len
= strbuf_subst(buf
,
79 matches
[i
].rm_so
+ off
,
80 matches
[i
].rm_eo
+ off
,
84 if (regopt
& _REG_EXEC
)
87 off
+= matches
[i
].rm_so
;
91 } while (regopt
& _REG_GLOBAL
);
97 int regex_rm(STRBUF
*buf
,
98 const char *regex
, int regopt
)
100 return regex_subst(buf
, regex
, regopt
, "");
103 char *underline(char linechar
, const char *str
)
108 size_t charlen
= charlen_utf8(str
);
110 if (str
[0] == '\0') {
117 strbuf_append(line
, str
);
118 strbuf_append(line
, "\n");
120 tmp
= ymalloc(charlen
);
121 for (i
= 0; i
< charlen
; i
++) {
124 strbuf_append_n(line
, tmp
, charlen
);
127 strbuf_append(line
, "\n\n");
128 return strbuf_spit(line
);
131 static char *headline(char line
, const char *buf
, regmatch_t matches
[],
132 size_t nmatch
, size_t off
)
139 len
= matches
[i
].rm_eo
- matches
[i
].rm_so
;
140 match
= ymalloc(len
+ 1);
142 memcpy(match
, buf
+ matches
[i
].rm_so
+ off
, len
);
145 result
= underline(line
, match
);
151 char *h1(const char *buf
, regmatch_t matches
[], size_t nmatch
, size_t off
)
153 return headline('=', buf
, matches
, nmatch
, off
);
156 char *h2(const char *buf
, regmatch_t matches
[], size_t nmatch
, size_t off
)
158 return headline('-', buf
, matches
, nmatch
, off
);
161 char *image(const char *buf
, regmatch_t matches
[], size_t nmatch
, size_t off
)
164 const char *prefix
= "[-- Image: ";
165 const char *postfix
= " --]";
166 size_t pr_len
, po_len
, len
;
169 pr_len
= strlen(prefix
);
170 len
= matches
[i
].rm_eo
- matches
[i
].rm_so
;
171 po_len
= strlen(prefix
);
173 match
= ymalloc(pr_len
+ len
+ po_len
+ 1);
174 memcpy(match
, prefix
, pr_len
);
175 memcpy(match
+ pr_len
, buf
+ matches
[i
].rm_so
+ off
, len
);
176 memcpy(match
+ pr_len
+ len
, postfix
, po_len
);
177 match
[pr_len
+ len
+ po_len
] = '\0' ;
182 static size_t charlen_utf8(const char *s
)
185 unsigned char *t
= (unsigned char*) s
;
188 t
+= utf8_length
[*t
- 0x80];
195 STRBUF
*wrap(STRBUF
*buf
, int width
)
197 const char *lf
= "\n";
198 const size_t lflen
= strlen(lf
);
201 const char *lastspace
= 0;
203 STRBUF
*out
= strbuf_new();
205 bufp
= strbuf_get(buf
);
209 strbuf_append_n(out
, strbuf_get(buf
), strbuf_len(buf
));
213 strbuf_append_n(out
, lf
, lflen
);
214 while(bufp
- strbuf_get(buf
) < (ptrdiff_t)strbuf_len(buf
)) {
217 else if (*bufp
== '\n') {
218 strbuf_append_n(out
, last
, (size_t)(bufp
- last
));
220 strbuf_append_n(out
, lf
, lflen
);
221 } while (*++bufp
== '\n');
224 while(*bufp
== ' ') {
231 if (NULL
!= lastspace
&& (int)linelen
> width
) {
232 strbuf_append_n(out
, last
, (size_t)(lastspace
- last
));
233 strbuf_append_n(out
, lf
, lflen
);
236 linelen
= (size_t)(bufp
- last
);
238 while(*last
== ' ') {
247 if ((unsigned char)*bufp
> 0x80)
248 bufp
+= utf8_length
[(unsigned char)*bufp
- 0x80];
250 strbuf_append_n(out
, "\n", 1);