1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ struct quoteflt: quotation (sub) filter.
3 *@ TODO quotation filter: anticipate in future data: don't break if only WS
4 *@ TODO or a LF escaping \ follows on the line (simply reuse the latter).
6 * Copyright (c) 2012/3 - 2020 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
7 * SPDX-License-Identifier: ISC
9 * Permission to use, copy, modify, and/or distribute this software for any
10 * purpose with or without fee is hereby granted, provided that the above
11 * copyright notice and this permission notice appear in all copies.
13 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
14 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
15 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
16 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
17 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
18 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
19 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 #define su_FILE filter_quote
25 #ifndef mx_HAVE_AMALGAMATION
31 #ifdef mx_HAVE_FILTER_QUOTE_FOLD
32 # ifdef mx_HAVE_C90AMEND1
37 # include <su/icodec.h>
42 #include "mx/filter-quote.h"
44 #include "su/code-in.h"
46 #ifdef mx_HAVE_FILTER_QUOTE_FOLD
47 CTAV(n_QUOTE_MAX
> 3);
56 struct quoteflt
*self
;
61 /* Print out prefix and current quote */
62 static sz
_qf_dump_prefix(struct quoteflt
*self
);
64 /* Add one data character */
65 static sz
_qf_add_data(struct quoteflt
*self
, wchar_t wc
);
67 /* State machine handlers */
68 static sz
_qf_state_prefix(struct qf_vc
*vc
);
69 static sz
_qf_state_data(struct qf_vc
*vc
);
72 _qf_dump_prefix(struct quoteflt
*self
)
78 if ((i
= self
->qf_pfix_len
) > 0 && i
!= fwrite(self
->qf_pfix
, 1, i
,
83 if ((i
= self
->qf_currq
.l
) > 0 && i
!= fwrite(self
->qf_currq
.s
, 1, i
,
96 _qf_add_data(struct quoteflt
*self
, wchar_t wc
)
105 save_l
= save_w
= 0; /* silence cc */
108 /* <newline> ends state */
113 if (wc
== L
'\r') /* TODO CR should be stripped in lower level!! */
116 /* Unroll <tab> to spaces */
118 save_l
= self
->qf_datw
;
119 save_w
= (save_l
+ n_QUOTE_TAB_SPACES
) & ~(n_QUOTE_TAB_SPACES
- 1);
121 while (save_w
-- > 0) {
122 sz j
= _qf_add_data(self
, L
' ');
132 /* To avoid that the last visual excesses *qfold-max*, which may happen for
133 * multi-column characters, use w as an indicator for this and move that
134 * thing to the next line */
140 self
->qf_dat
.s
[self
->qf_dat
.l
++] = '?';
141 } else if (self
->qf_datw
> self
->qf_qfold_max
- w
) {
145 l
= wctomb(self
->qf_dat
.s
+ self
->qf_dat
.l
, wc
);
148 self
->qf_datw
+= (u32
)w
;
149 self
->qf_dat
.l
+= (uz
)l
;
152 if (self
->qf_datw
>= self
->qf_qfold_max
) {
153 /* If we have seen a nice breakpoint during traversal, shuffle data
154 * around a bit so as to restore the trailing part after flushing */
156 if (self
->qf_brkl
> 0) {
157 save_w
= self
->qf_datw
- self
->qf_brkw
;
158 save_l
= self
->qf_dat
.l
- self
->qf_brkl
;
159 save_b
= self
->qf_dat
.s
+ self
->qf_brkl
+ 2;
160 su_mem_move(save_b
, save_b
- 2, save_l
);
161 self
->qf_dat
.l
= self
->qf_brkl
;
164 self
->qf_dat
.s
[self
->qf_dat
.l
++] = '\\';
166 self
->qf_dat
.s
[self
->qf_dat
.l
++] = '\n';
167 rv
= quoteflt_flush(self
);
169 /* Restore takeovers, if any */
170 if (save_b
!= NULL
) {
171 self
->qf_brk_isws
= FAL0
;
172 self
->qf_datw
+= save_w
;
173 self
->qf_dat
.l
= save_l
;
174 su_mem_move(self
->qf_dat
.s
, save_b
, save_l
);
176 } else if (self
->qf_datw
>= self
->qf_qfold_min
&& !self
->qf_brk_isws
) {
177 boole isws
= (iswspace(wc
) != 0);
179 if (isws
|| !self
->qf_brk_isws
|| self
->qf_brkl
== 0) {
180 if((self
->qf_brk_isws
= isws
) ||
181 self
->qf_brkl
< self
->qf_qfold_maxnws
){
182 self
->qf_brkl
= self
->qf_dat
.l
;
183 self
->qf_brkw
= self
->qf_datw
;
188 /* Did we hold this back to avoid qf_fold_max excess? Then do it now */
189 if(rv
>= 0 && w
== -1){
190 sz j
= _qf_add_data(self
, wc
);
196 /* If state changed to prefix, perform full reset (note this implies that
197 * quoteflt_flush() performs too much work..) */
198 else if (wc
== '\n') {
199 self
->qf_state
= _QF_PREFIX
;
200 self
->qf_wscnt
= self
->qf_datw
= 0;
201 self
->qf_currq
.l
= 0;
209 _qf_state_prefix(struct qf_vc
*vc
)
211 struct quoteflt
*self
;
221 for (buf
= vc
->buf
, len
= vc
->len
; len
> 0;) {
223 i
= mbrtowc(&wc
, buf
, len
, self
->qf_mbps
);
225 /* On hard error, don't modify mbstate_t and step one byte */
226 self
->qf_mbps
[0] = self
->qf_mbps
[1];
232 self
->qf_mbps
[1] = self
->qf_mbps
[0];
234 /* Redundant shift sequence, out of buffer */
247 if (i
== 1 && su_cs_is_ascii(wc
) &&
248 su_cs_find_c(self
->qf_quote_chars
, (char)wc
) != NULL
){
250 if (self
->qf_currq
.l
>= n_QUOTE_MAX
- 3) {
251 self
->qf_currq
.s
[n_QUOTE_MAX
- 3] = '.';
252 self
->qf_currq
.s
[n_QUOTE_MAX
- 2] = '.';
253 self
->qf_currq
.s
[n_QUOTE_MAX
- 1] = '.';
254 self
->qf_currq
.l
= n_QUOTE_MAX
;
256 self
->qf_currq
.s
[self
->qf_currq
.l
++] = buf
[-1];
260 /* The quote is parsed and compressed; dump it */
262 self
->qf_state
= _QF_DATA
;
263 /* Overtake WS to the current quote in order to preserve it for eventual
264 * necessary follow lines, too */
265 /* TODO we de-facto "normalize" to ASCII SP here which MESSES tabs!! */
266 while (self
->qf_wscnt
-- > 0 && self
->qf_currq
.l
< n_QUOTE_MAX
)
267 self
->qf_currq
.s
[self
->qf_currq
.l
++] = ' ';
268 self
->qf_datw
= self
->qf_pfix_len
+ self
->qf_currq
.l
;
270 rv
= _qf_add_data(self
, wc
);
281 _qf_state_data(struct qf_vc
*vc
)
283 struct quoteflt
*self
;
293 for (buf
= vc
->buf
, len
= vc
->len
; len
> 0;) {
295 i
= mbrtowc(&wc
, buf
, len
, self
->qf_mbps
);
297 /* On hard error, don't modify mbstate_t and step one byte */
298 self
->qf_mbps
[0] = self
->qf_mbps
[1];
303 self
->qf_mbps
[1] = self
->qf_mbps
[0];
305 /* Redundant shift sequence, out of buffer */
312 { sz j
= _qf_add_data(self
, wc
);
320 if (self
->qf_state
!= _QF_DATA
)
329 #endif /* mx_HAVE_FILTER_QUOTE_FOLD */
332 quoteflt_dummy(void) /* TODO LEGACY (until filters are plugged when needed) */
334 static struct quoteflt qf_i
;
336 qf_i
.qf_bypass
= TRU1
;
341 quoteflt_init(struct quoteflt
*self
, char const *prefix
, boole bypass
)
343 #ifdef mx_HAVE_FILTER_QUOTE_FOLD
344 char const *xcp
, *cp
;
348 su_mem_set(self
, 0, sizeof *self
);
350 if ((self
->qf_pfix
= prefix
) != NULL
)
351 self
->qf_pfix_len
= (u32
)su_cs_len(prefix
);
352 self
->qf_bypass
= bypass
;
354 /* Check whether the user wants the more fancy quoting algorithm */
355 /* TODO *quote-fold*: n_QUOTE_MAX may excess it! */
356 #ifdef mx_HAVE_FILTER_QUOTE_FOLD
357 if (!bypass
&& (cp
= ok_vlook(quote_fold
)) != NULL
) {
358 u32 qmax
, qmaxnws
, qmin
;
360 /* These magic values ensure we don't bail */
361 su_idec_u32_cp(&qmax
, cp
, 10, &xcp
);
362 if (qmax
< self
->qf_pfix_len
+ 6)
363 qmax
= self
->qf_pfix_len
+ 6;
364 qmaxnws
= --qmax
; /* The newline escape */
365 if (cp
== xcp
|| *xcp
== '\0')
366 qmin
= (qmax
>> 1) + (qmax
>> 2) + (qmax
>> 5);
368 su_idec_u32_cp(&qmin
, &xcp
[1], 10, &xcp
);
369 if (qmin
< qmax
>> 1)
371 else if (qmin
> qmax
- 2)
374 if (cp
!= xcp
&& *xcp
!= '\0') {
375 su_idec_u32_cp(&qmaxnws
, &xcp
[1], 10, &xcp
);
376 if (qmaxnws
> qmax
|| qmaxnws
< qmin
)
380 self
->qf_qfold_min
= qmin
;
381 self
->qf_qfold_max
= qmax
;
382 self
->qf_qfold_maxnws
= qmaxnws
;
383 self
->qf_quote_chars
= ok_vlook(quote_chars
);
385 /* Add pad for takeover copies, reverse solidus and newline */
386 self
->qf_dat
.s
= n_autorec_alloc((qmax
+ 3) * n_mb_cur_max
);
387 self
->qf_currq
.s
= n_autorec_alloc((n_QUOTE_MAX
+ 1) * n_mb_cur_max
);
394 quoteflt_destroy(struct quoteflt
*self
) /* xxx inline */
402 quoteflt_reset(struct quoteflt
*self
, FILE *f
) /* xxx inline */
406 #ifdef mx_HAVE_FILTER_QUOTE_FOLD
407 self
->qf_state
= _QF_CLEAN
;
409 self
->qf_currq
.l
= 0;
410 su_mem_set(self
->qf_mbps
, 0, sizeof self
->qf_mbps
);
416 quoteflt_push(struct quoteflt
*self
, char const *dat
, uz len
)
418 /* (xxx Ideally the actual push() [and flush()] would be functions on their
419 * xxx own, via indirect vtbl call ..) */
423 self
->qf_nl_last
= (len
> 0 && dat
[len
- 1] == '\n'); /* TODO HACK */
428 /* Bypass? TODO Finally, this filter simply should not be used, then
429 * (TODO It supersedes prefix_write() or something) */
430 if (self
->qf_bypass
) {
431 if (len
!= fwrite(dat
, 1, len
, self
->qf_os
))
435 /* Normal: place *indentprefix* at every BOL */
437 #ifdef mx_HAVE_FILTER_QUOTE_FOLD
438 if (self
->qf_qfold_max
== 0)
443 boole pxok
= (self
->qf_qfold_min
!= 0);
446 if (!pxok
&& (ll
= self
->qf_pfix_len
) > 0) {
447 if (ll
!= fwrite(self
->qf_pfix
, 1, ll
, self
->qf_os
))
453 /* xxx Strictly speaking this is invalid, because only `/' and `.' are
454 * xxx mandated by POSIX.1-2008 as "invariant across all locales
455 * xxx supported"; though there is no charset known which uses this
456 * xxx control char as part of a multibyte character; note that S-nail
457 * XXX (and the Mail codebase as such) do not support EBCDIC */
458 if ((vp
= su_mem_find(dat
, '\n', len
)) == NULL
)
462 ll
= P2UZ((char*)vp
- dat
) + 1;
465 if (ll
!= fwrite(dat
, sizeof *dat
, ll
, self
->qf_os
))
468 if ((len
-= ll
) == 0)
473 self
->qf_qfold_min
= pxok
;
475 /* Overly complicated, though still only line-per-line: *quote-fold*.
476 * - If .qf_currq.l is 0, then we are in a clean state. Reset .qf_mbps;
477 * TODO note this means we assume that lines start with reset escape seq,
478 * TODO but i don't think this is any worse than what we currently do;
479 * TODO in 15.0, with the value carrier, we should carry conversion states
480 * TODO all along, only resetting on error (or at words for header =???=);
481 * TODO this still is weird for error handling, but we need to act more
482 * TODO stream-alike (though in practice i don't think cross-line states
483 * TODO can be found, because of compatibility reasons; however, being
484 * TODO a problem rather than a solution is not a good thing (tm))
485 * - Lookout for a newline */
486 #ifdef mx_HAVE_FILTER_QUOTE_FOLD
495 switch (self
->qf_state
) {
498 i
= _qf_state_prefix(&vc
);
500 default: /* silence cc (`i' unused) */
502 i
= _qf_state_data(&vc
);
510 #endif /* mx_HAVE_FILTER_QUOTE_FOLD */
521 quoteflt_flush(struct quoteflt
*self
)
527 #ifdef mx_HAVE_FILTER_QUOTE_FOLD
528 if (self
->qf_dat
.l
> 0) {
529 rv
= _qf_dump_prefix(self
);
531 uz i
= self
->qf_dat
.l
;
532 if (i
== fwrite(self
->qf_dat
.s
, 1, i
, self
->qf_os
))
537 self
->qf_brk_isws
= FAL0
;
538 self
->qf_wscnt
= self
->qf_brkl
= self
->qf_brkw
= 0;
539 self
->qf_datw
= self
->qf_pfix_len
+ self
->qf_currq
.l
;
547 #include "su/code-ou.h"