THANKS: Coverity.com (overdue)
[s-mailx.git] / src / mx / filter-quote.c
blobfda34541fcd50708415bd7290575c823235dbd2c
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ struct quoteflt: quotation (sub) filter.
3 *@ TODO quotation filter: anticipate in future data: don't break if only WS
4 *@ TODO or a LF escaping \ follows on the line (simply reuse the latter).
6 * Copyright (c) 2012/3 - 2020 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
7 * SPDX-License-Identifier: ISC
9 * Permission to use, copy, modify, and/or distribute this software for any
10 * purpose with or without fee is hereby granted, provided that the above
11 * copyright notice and this permission notice appear in all copies.
13 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
14 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
15 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
16 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
17 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
18 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
19 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 #undef su_FILE
22 #define su_FILE filter_quote
23 #define mx_SOURCE
25 #ifndef mx_HAVE_AMALGAMATION
26 # include "mx/nail.h"
27 #endif
29 #include <su/cs.h>
31 #ifdef mx_HAVE_FILTER_QUOTE_FOLD
32 # ifdef mx_HAVE_C90AMEND1
33 # include <wchar.h>
34 # include <wctype.h>
35 # endif
37 # include <su/icodec.h>
38 #endif
40 #include <su/mem.h>
42 #include "mx/filter-quote.h"
43 /* TODO fake */
44 #include "su/code-in.h"
46 #ifdef mx_HAVE_FILTER_QUOTE_FOLD
47 CTAV(n_QUOTE_MAX > 3);
49 enum qf_state {
50 _QF_CLEAN,
51 _QF_PREFIX,
52 _QF_DATA
55 struct qf_vc {
56 struct quoteflt *self;
57 char const *buf;
58 uz len;
61 /* Print out prefix and current quote */
62 static sz _qf_dump_prefix(struct quoteflt *self);
64 /* Add one data character */
65 static sz _qf_add_data(struct quoteflt *self, wchar_t wc);
67 /* State machine handlers */
68 static sz _qf_state_prefix(struct qf_vc *vc);
69 static sz _qf_state_data(struct qf_vc *vc);
71 static sz
72 _qf_dump_prefix(struct quoteflt *self)
74 sz rv;
75 uz i;
76 NYD_IN;
78 if ((i = self->qf_pfix_len) > 0 && i != fwrite(self->qf_pfix, 1, i,
79 self->qf_os))
80 goto jerr;
81 rv = i;
83 if ((i = self->qf_currq.l) > 0 && i != fwrite(self->qf_currq.s, 1, i,
84 self->qf_os))
85 goto jerr;
86 rv += i;
87 jleave:
88 NYD_OU;
89 return rv;
90 jerr:
91 rv = -1;
92 goto jleave;
95 static sz
96 _qf_add_data(struct quoteflt *self, wchar_t wc)
98 int w, l;
99 char *save_b;
100 u32 save_l, save_w;
101 sz rv;
102 NYD_IN;
104 rv = 0;
105 save_l = save_w = 0; /* silence cc */
106 save_b = NULL;
108 /* <newline> ends state */
109 if (wc == L'\n') {
110 w = 0;
111 goto jflush;
113 if (wc == L'\r') /* TODO CR should be stripped in lower level!! */
114 goto jleave;
116 /* Unroll <tab> to spaces */
117 if (wc == L'\t') {
118 save_l = self->qf_datw;
119 save_w = (save_l + n_QUOTE_TAB_SPACES) & ~(n_QUOTE_TAB_SPACES - 1);
120 save_w -= save_l;
121 while (save_w-- > 0) {
122 sz j = _qf_add_data(self, L' ');
123 if (j < 0) {
124 rv = j;
125 break;
127 rv += j;
129 goto jleave;
132 /* To avoid that the last visual excesses *qfold-max*, which may happen for
133 * multi-column characters, use w as an indicator for this and move that
134 * thing to the next line */
135 w = wcwidth(wc);
136 if (w == -1) {
137 w = 0;
138 jbad:
139 ++self->qf_datw;
140 self->qf_dat.s[self->qf_dat.l++] = '?';
141 } else if (self->qf_datw > self->qf_qfold_max - w) {
142 w = -1;
143 goto jneednl;
144 } else {
145 l = wctomb(self->qf_dat.s + self->qf_dat.l, wc);
146 if (l < 0)
147 goto jbad;
148 self->qf_datw += (u32)w;
149 self->qf_dat.l += (uz)l;
152 if (self->qf_datw >= self->qf_qfold_max) {
153 /* If we have seen a nice breakpoint during traversal, shuffle data
154 * around a bit so as to restore the trailing part after flushing */
155 jneednl:
156 if (self->qf_brkl > 0) {
157 save_w = self->qf_datw - self->qf_brkw;
158 save_l = self->qf_dat.l - self->qf_brkl;
159 save_b = self->qf_dat.s + self->qf_brkl + 2;
160 su_mem_move(save_b, save_b - 2, save_l);
161 self->qf_dat.l = self->qf_brkl;
164 self->qf_dat.s[self->qf_dat.l++] = '\\';
165 jflush:
166 self->qf_dat.s[self->qf_dat.l++] = '\n';
167 rv = quoteflt_flush(self);
169 /* Restore takeovers, if any */
170 if (save_b != NULL) {
171 self->qf_brk_isws = FAL0;
172 self->qf_datw += save_w;
173 self->qf_dat.l = save_l;
174 su_mem_move(self->qf_dat.s, save_b, save_l);
176 } else if (self->qf_datw >= self->qf_qfold_min && !self->qf_brk_isws) {
177 boole isws = (iswspace(wc) != 0);
179 if (isws || !self->qf_brk_isws || self->qf_brkl == 0) {
180 if((self->qf_brk_isws = isws) ||
181 self->qf_brkl < self->qf_qfold_maxnws){
182 self->qf_brkl = self->qf_dat.l;
183 self->qf_brkw = self->qf_datw;
188 /* Did we hold this back to avoid qf_fold_max excess? Then do it now */
189 if(rv >= 0 && w == -1){
190 sz j = _qf_add_data(self, wc);
191 if(j < 0)
192 rv = j;
193 else
194 rv += j;
196 /* If state changed to prefix, perform full reset (note this implies that
197 * quoteflt_flush() performs too much work..) */
198 else if (wc == '\n') {
199 self->qf_state = _QF_PREFIX;
200 self->qf_wscnt = self->qf_datw = 0;
201 self->qf_currq.l = 0;
203 jleave:
204 NYD_OU;
205 return rv;
208 static sz
209 _qf_state_prefix(struct qf_vc *vc)
211 struct quoteflt *self;
212 sz rv;
213 char const *buf;
214 uz len, i;
215 wchar_t wc;
216 NYD_IN;
218 self = vc->self;
219 rv = 0;
221 for (buf = vc->buf, len = vc->len; len > 0;) {
222 /* xxx NULL BYTE! */
223 i = mbrtowc(&wc, buf, len, self->qf_mbps);
224 if (i == (uz)-1) {
225 /* On hard error, don't modify mbstate_t and step one byte */
226 self->qf_mbps[0] = self->qf_mbps[1];
227 ++buf;
228 --len;
229 self->qf_wscnt = 0;
230 continue;
232 self->qf_mbps[1] = self->qf_mbps[0];
233 if (i == (uz)-2) {
234 /* Redundant shift sequence, out of buffer */
235 len = 0;
236 break;
238 buf += i;
239 len -= i;
241 if (wc == L'\n')
242 goto jfin;
243 if (iswspace(wc)) {
244 ++self->qf_wscnt;
245 continue;
247 if (i == 1 && su_cs_is_ascii(wc) &&
248 su_cs_find_c(self->qf_quote_chars, (char)wc) != NULL){
249 self->qf_wscnt = 0;
250 if (self->qf_currq.l >= n_QUOTE_MAX - 3) {
251 self->qf_currq.s[n_QUOTE_MAX - 3] = '.';
252 self->qf_currq.s[n_QUOTE_MAX - 2] = '.';
253 self->qf_currq.s[n_QUOTE_MAX - 1] = '.';
254 self->qf_currq.l = n_QUOTE_MAX;
255 } else
256 self->qf_currq.s[self->qf_currq.l++] = buf[-1];
257 continue;
260 /* The quote is parsed and compressed; dump it */
261 jfin:
262 self->qf_state = _QF_DATA;
263 /* Overtake WS to the current quote in order to preserve it for eventual
264 * necessary follow lines, too */
265 /* TODO we de-facto "normalize" to ASCII SP here which MESSES tabs!! */
266 while (self->qf_wscnt-- > 0 && self->qf_currq.l < n_QUOTE_MAX)
267 self->qf_currq.s[self->qf_currq.l++] = ' ';
268 self->qf_datw = self->qf_pfix_len + self->qf_currq.l;
269 self->qf_wscnt = 0;
270 rv = _qf_add_data(self, wc);
271 break;
274 vc->buf = buf;
275 vc->len = len;
276 NYD_OU;
277 return rv;
280 static sz
281 _qf_state_data(struct qf_vc *vc)
283 struct quoteflt *self;
284 sz rv;
285 char const *buf;
286 uz len, i;
287 wchar_t wc;
288 NYD_IN;
290 self = vc->self;
291 rv = 0;
293 for (buf = vc->buf, len = vc->len; len > 0;) {
294 /* xxx NULL BYTE! */
295 i = mbrtowc(&wc, buf, len, self->qf_mbps);
296 if (i == (uz)-1) {
297 /* On hard error, don't modify mbstate_t and step one byte */
298 self->qf_mbps[0] = self->qf_mbps[1];
299 ++buf;
300 --len;
301 continue;
303 self->qf_mbps[1] = self->qf_mbps[0];
304 if (i == (uz)-2) {
305 /* Redundant shift sequence, out of buffer */
306 len = 0;
307 break;
309 buf += i;
310 len -= i;
312 { sz j = _qf_add_data(self, wc);
313 if (j < 0) {
314 rv = j;
315 break;
317 rv += j;
320 if (self->qf_state != _QF_DATA)
321 break;
324 vc->buf = buf;
325 vc->len = len;
326 NYD_OU;
327 return rv;
329 #endif /* mx_HAVE_FILTER_QUOTE_FOLD */
331 struct quoteflt *
332 quoteflt_dummy(void) /* TODO LEGACY (until filters are plugged when needed) */
334 static struct quoteflt qf_i;
336 qf_i.qf_bypass = TRU1;
337 return &qf_i;
340 void
341 quoteflt_init(struct quoteflt *self, char const *prefix, boole bypass)
343 #ifdef mx_HAVE_FILTER_QUOTE_FOLD
344 char const *xcp, *cp;
345 #endif
346 NYD_IN;
348 su_mem_set(self, 0, sizeof *self);
350 if ((self->qf_pfix = prefix) != NULL)
351 self->qf_pfix_len = (u32)su_cs_len(prefix);
352 self->qf_bypass = bypass;
354 /* Check whether the user wants the more fancy quoting algorithm */
355 /* TODO *quote-fold*: n_QUOTE_MAX may excess it! */
356 #ifdef mx_HAVE_FILTER_QUOTE_FOLD
357 if (!bypass && (cp = ok_vlook(quote_fold)) != NULL) {
358 u32 qmax, qmaxnws, qmin;
360 /* These magic values ensure we don't bail */
361 su_idec_u32_cp(&qmax, cp, 10, &xcp);
362 if (qmax < self->qf_pfix_len + 6)
363 qmax = self->qf_pfix_len + 6;
364 qmaxnws = --qmax; /* The newline escape */
365 if (cp == xcp || *xcp == '\0')
366 qmin = (qmax >> 1) + (qmax >> 2) + (qmax >> 5);
367 else {
368 su_idec_u32_cp(&qmin, &xcp[1], 10, &xcp);
369 if (qmin < qmax >> 1)
370 qmin = qmax >> 1;
371 else if (qmin > qmax - 2)
372 qmin = qmax - 2;
374 if (cp != xcp && *xcp != '\0') {
375 su_idec_u32_cp(&qmaxnws, &xcp[1], 10, &xcp);
376 if (qmaxnws > qmax || qmaxnws < qmin)
377 qmaxnws = qmax;
380 self->qf_qfold_min = qmin;
381 self->qf_qfold_max = qmax;
382 self->qf_qfold_maxnws = qmaxnws;
383 self->qf_quote_chars = ok_vlook(quote_chars);
385 /* Add pad for takeover copies, reverse solidus and newline */
386 self->qf_dat.s = n_autorec_alloc((qmax + 3) * n_mb_cur_max);
387 self->qf_currq.s = n_autorec_alloc((n_QUOTE_MAX + 1) * n_mb_cur_max);
389 #endif
390 NYD_OU;
393 void
394 quoteflt_destroy(struct quoteflt *self) /* xxx inline */
396 NYD_IN;
397 UNUSED(self);
398 NYD_OU;
401 void
402 quoteflt_reset(struct quoteflt *self, FILE *f) /* xxx inline */
404 NYD_IN;
405 self->qf_os = f;
406 #ifdef mx_HAVE_FILTER_QUOTE_FOLD
407 self->qf_state = _QF_CLEAN;
408 self->qf_dat.l =
409 self->qf_currq.l = 0;
410 su_mem_set(self->qf_mbps, 0, sizeof self->qf_mbps);
411 #endif
412 NYD_OU;
416 quoteflt_push(struct quoteflt *self, char const *dat, uz len)
418 /* (xxx Ideally the actual push() [and flush()] would be functions on their
419 * xxx own, via indirect vtbl call ..) */
420 sz rv = 0;
421 NYD_IN;
423 self->qf_nl_last = (len > 0 && dat[len - 1] == '\n'); /* TODO HACK */
425 if (len == 0)
426 goto jleave;
428 /* Bypass? TODO Finally, this filter simply should not be used, then
429 * (TODO It supersedes prefix_write() or something) */
430 if (self->qf_bypass) {
431 if (len != fwrite(dat, 1, len, self->qf_os))
432 goto jerr;
433 rv = len;
435 /* Normal: place *indentprefix* at every BOL */
436 else
437 #ifdef mx_HAVE_FILTER_QUOTE_FOLD
438 if (self->qf_qfold_max == 0)
439 #endif
441 void *vp;
442 uz ll;
443 boole pxok = (self->qf_qfold_min != 0);
445 for (;;) {
446 if (!pxok && (ll = self->qf_pfix_len) > 0) {
447 if (ll != fwrite(self->qf_pfix, 1, ll, self->qf_os))
448 goto jerr;
449 rv += ll;
450 pxok = TRU1;
453 /* xxx Strictly speaking this is invalid, because only `/' and `.' are
454 * xxx mandated by POSIX.1-2008 as "invariant across all locales
455 * xxx supported"; though there is no charset known which uses this
456 * xxx control char as part of a multibyte character; note that S-nail
457 * XXX (and the Mail codebase as such) do not support EBCDIC */
458 if ((vp = su_mem_find(dat, '\n', len)) == NULL)
459 ll = len;
460 else {
461 pxok = FAL0;
462 ll = P2UZ((char*)vp - dat) + 1;
465 if (ll != fwrite(dat, sizeof *dat, ll, self->qf_os))
466 goto jerr;
467 rv += ll;
468 if ((len -= ll) == 0)
469 break;
470 dat += ll;
473 self->qf_qfold_min = pxok;
475 /* Overly complicated, though still only line-per-line: *quote-fold*.
476 * - If .qf_currq.l is 0, then we are in a clean state. Reset .qf_mbps;
477 * TODO note this means we assume that lines start with reset escape seq,
478 * TODO but i don't think this is any worse than what we currently do;
479 * TODO in 15.0, with the value carrier, we should carry conversion states
480 * TODO all along, only resetting on error (or at words for header =???=);
481 * TODO this still is weird for error handling, but we need to act more
482 * TODO stream-alike (though in practice i don't think cross-line states
483 * TODO can be found, because of compatibility reasons; however, being
484 * TODO a problem rather than a solution is not a good thing (tm))
485 * - Lookout for a newline */
486 #ifdef mx_HAVE_FILTER_QUOTE_FOLD
487 else {
488 struct qf_vc vc;
489 sz i;
491 vc.self = self;
492 vc.buf = dat;
493 vc.len = len;
494 while (vc.len > 0) {
495 switch (self->qf_state) {
496 case _QF_CLEAN:
497 case _QF_PREFIX:
498 i = _qf_state_prefix(&vc);
499 break;
500 default: /* silence cc (`i' unused) */
501 case _QF_DATA:
502 i = _qf_state_data(&vc);
503 break;
505 if (i < 0)
506 goto jerr;
507 rv += i;
510 #endif /* mx_HAVE_FILTER_QUOTE_FOLD */
512 jleave:
513 NYD_OU;
514 return rv;
515 jerr:
516 rv = -1;
517 goto jleave;
521 quoteflt_flush(struct quoteflt *self)
523 sz rv = 0;
524 NYD_IN;
525 UNUSED(self);
527 #ifdef mx_HAVE_FILTER_QUOTE_FOLD
528 if (self->qf_dat.l > 0) {
529 rv = _qf_dump_prefix(self);
530 if (rv >= 0) {
531 uz i = self->qf_dat.l;
532 if (i == fwrite(self->qf_dat.s, 1, i, self->qf_os))
533 rv += i;
534 else
535 rv = -1;
536 self->qf_dat.l = 0;
537 self->qf_brk_isws = FAL0;
538 self->qf_wscnt = self->qf_brkl = self->qf_brkw = 0;
539 self->qf_datw = self->qf_pfix_len + self->qf_currq.l;
542 #endif
543 NYD_OU;
544 return rv;
547 #include "su/code-ou.h"
548 /* s-it-mode */