2 * regex.c: String and regex operations for odt2txt
4 * Copyright (c) 2006-2009 Dennis Stosberg <dennis@stosberg.net>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License,
8 * version 2 as published by the Free Software Foundation
22 #define _REG_DEFAULT 0 /* Stop after first match, to be removed */
23 #define _REG_GLOBAL 1 /* Find all matches of regexp */
24 #define _REG_EXEC 2 /* subst is a function pointer */
27 * Deletes match(es) of regex from *buf.
29 * Returns the number of matches that were deleted.
31 int regex_rm(STRBUF
*buf
,
32 const char *regex
, int regopt
);
35 * Replaces match(es) of regex from *buf with subst.
37 int regex_subst(STRBUF
*buf
,
38 const char *regex
, int regopt
,
42 * Returns a pointer to a new string with two lines. The first line
43 * contains str, the second line contains strlen(str) copies of
46 char *underline(char linechar
, const char *str
);
49 * Wrappers around underline, to be used as argument to regex_subst
50 * when regopt is _REG_EXEC.
52 * They replace the match in buf with underline('=',match) or
53 * underline('-',match) respectively.
55 char *h1(const char *buf
, regmatch_t matches
[], size_t nmatch
, size_t off
);
56 char *h2(const char *buf
, regmatch_t matches
[], size_t nmatch
, size_t off
);
59 * Replace match with the name of the image frame
61 char *image(const char *buf
, regmatch_t matches
[], size_t nmatch
, size_t off
);
64 * Copies the contents of buf to a new string buffer, wrapped to a
65 * maximal line width of width characters.
67 STRBUF
*wrap(STRBUF
*buf
, int width
);
70 * number of characters that follow in the byte sequence
72 static const char utf8_length
[128] =
74 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */
75 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */
76 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */
77 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */
78 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */
79 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */
80 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */
81 3,3,3,3,3,3,3,3,4,4,4,4,5,5,0,0 /* 0xf0-0xff */