better parser for broken quoted encoding (starting dot)
[chiroptera.git] / chibackend / decode.d
blob048de5ebf6670550f9a628d4596fd18edbf49513
1 /* E-Mail Client
2 * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
3 * Understanding is not required. Only obedience.
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, version 3 of the License ONLY.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 module chibackend.decode is aliced;
19 import iv.cmdcon;
20 import iv.dynstring;
21 import iv.encoding;
22 import iv.strex;
23 import iv.utfutil;
24 import iv.utfutil : utf8CodeLen, utf8Valid;
28 // ////////////////////////////////////////////////////////////////////////// //
29 // `ch`: utf8 start
30 // -1: invalid utf8
31 public byte utf8CodeLen (char ch) pure nothrow @trusted @nogc {
32 //pragma(inline, true);
33 if (ch < 0x80) return 1;
34 if ((ch&0b1111_1110) == 0b1111_1100) return 6;
35 if ((ch&0b1111_1100) == 0b1111_1000) return 5;
36 if ((ch&0b1111_1000) == 0b1111_0000) return 4;
37 if ((ch&0b1111_0000) == 0b1110_0000) return 3;
38 if ((ch&0b1110_0000) == 0b1100_0000) return 2;
39 return -1; // invalid
43 // ////////////////////////////////////////////////////////////////////////// //
44 public bool utf8Valid (const(void)[] buf) pure nothrow @trusted @nogc {
45 const(ubyte)* bp = cast(const(ubyte)*)buf.ptr;
46 auto left = buf.length;
47 while (left--) {
48 auto len = utf8CodeLen(*bp++)-1;
49 if (len < 0 || len > left) return false;
50 left -= len;
51 while (len-- > 0) if (((*bp++)&0b1100_0000) != 0b1000_0000) return false;
53 return true;
58 // ////////////////////////////////////////////////////////////////////////// //
59 public bool isValidNickUniChar (immutable dchar ch) pure nothrow @safe @nogc {
60 pragma(inline, true);
61 return
62 (ch >= '0' && ch <= '9') ||
63 (ch >= 'A' && ch <= 'Z') ||
64 (ch >= 'a' && ch <= 'z') ||
65 ch == '-' || ch == '_' || ch == '.' ||
66 isValidCyrillicUni(ch);
70 public bool isValidUTFNick (const(char)[] s) nothrow @safe @nogc {
71 if (s.length == 0) return false;
72 Utf8DecoderFast dc;
73 foreach (immutable char ch; s) {
74 dc.decode(cast(ubyte)ch);
75 if (dc.invalid) return false;
76 if (dc.complete && !isValidNickUniChar(dc.codepoint)) return false;
78 return true;
82 // ////////////////////////////////////////////////////////////////////////// //
83 public bool isGoodCtlChar (immutable char ch) pure nothrow @safe @nogc {
84 pragma(inline, true);
85 return (ch == '\t' || ch == '\n');
89 // ////////////////////////////////////////////////////////////////////////// //
90 public bool isGoodText (const(char)[] buf) pure nothrow @trusted @nogc {
91 foreach (immutable char ch; buf) {
92 if (ch == 127 || (ch < 32 && !isGoodCtlChar(ch))) return false;
94 return true;
98 // ////////////////////////////////////////////////////////////////////////// //
99 private bool isGoodFileNameChar (immutable char ch) pure nothrow @safe @nogc {
100 if (ch <= 32 || ch == 127) return false;
101 if (ch >= 128) return true;
102 if (ch == '/' || ch == '\\') return false;
103 return true;
107 // ////////////////////////////////////////////////////////////////////////// //
108 // this also sanitizes it
109 public T toLowerStr (T:const(char)[]) (T s) nothrow @trusted {
110 static if (is(T == typeof(null))) {
111 return null;
112 } else {
113 bool needwork = false;
114 foreach (immutable char ch; s) {
115 if (ch == 127 || (ch < 32 && !isGoodCtlChar(ch)) || (ch >= 'A' && ch <= 'Z')) {
116 needwork = true;
117 break;
120 if (!needwork) {
121 return s;
122 } else {
123 char[] res;
124 res.reserve(s.length);
125 foreach (immutable idx, char ch; s) {
126 if (ch == 13) { if (idx+1 >= s.length || s.ptr[idx+1] != 10) res ~= '\n'; }
127 else if (ch < 32 && !isGoodCtlChar(ch)) res ~= ' ';
128 else if (ch == 127) res ~= '~';
129 else if (ch >= 'A' && ch <= 'Z') res ~= ch.tolower;
130 else res ~= ch;
132 return cast(T)res; // it is safe to cast here
138 // ////////////////////////////////////////////////////////////////////////// //
139 // this also sanitizes it
140 public T sanitizeFileNameStr (T:const(char)[]) (T s) nothrow @trusted {
141 static if (is(T == typeof(null))) {
142 return null;
143 } else {
144 bool needwork = false;
145 foreach (immutable char ch; s) if (!isGoodFileNameChar(ch)) { needwork = true; break; }
146 if (!needwork) {
147 return s;
148 } else {
149 char[] res = new char[s.length];
150 res[] = s[];
151 foreach (ref char ch; res) {
152 if (!isGoodFileNameChar(ch)) ch = '_';
154 return cast(T)res; // it is safe to cast here
160 // ////////////////////////////////////////////////////////////////////////// //
161 public T sanitizeStr (T:const(char)[]) (T s) nothrow @trusted {
162 static if (is(T == typeof(null))) {
163 return null;
164 } else {
165 if (isGoodText(s)) {
166 return s;
167 } else {
168 char[] res;
169 res.reserve(s.length);
170 foreach (immutable idx, char ch; s) {
171 if (ch == 13) { if (idx+1 >= s.length || s.ptr[idx+1] != 10) res ~= '\n'; }
172 else if (ch < 32 && !isGoodCtlChar(ch)) res ~= ' ';
173 else if (ch == 127) res ~= '~';
174 else res ~= ch;
176 return cast(T)res; // it is safe to cast here
182 // ////////////////////////////////////////////////////////////////////////// //
183 public T sanitizeStrLine (T:const(char)[]) (T s) nothrow @trusted {
184 static if (is(T == typeof(null))) {
185 return null;
186 } else {
187 bool found = false;
188 foreach (immutable idx, char ch; s) {
189 if (ch < 32 || ch == 127) { found = true; break; }
190 if (ch == 32 && (idx == 0 || s.ptr[idx-1] <= 32)) { found = true; break; }
192 if (!found) {
193 return s;
194 } else {
195 char[] res;
196 res.reserve(s.length);
197 foreach (char ch; s) {
198 if (ch < 32 || ch == 127) ch = ' ';
199 if (ch <= 32 && (res.length == 0 || res[$-1] <= 32)) continue;
200 res ~= ch;
202 while (res.length && res[$-1] <= 32) res = res[0..$-1];
203 return cast(T)res; // it is safe to cast here
209 // ////////////////////////////////////////////////////////////////////////// //
210 // for decoded subject parts
211 public T sanitizeStrSubjPart (T:const(char)[]) (T s) nothrow @trusted {
212 static if (is(T == typeof(null))) {
213 return null;
214 } else {
215 bool found = false;
216 foreach (immutable idx, immutable char ch; s) {
217 if (ch < 32 || ch == 127 || ch == '_') { found = true; break; }
219 if (!found) {
220 return s;
221 } else {
222 char[] res = new char[s.length];
223 res[] = s[];
224 foreach (ref char ch; res) if (ch < 32 || ch == 127 || ch == '_') ch = ' ';
225 return cast(T)res; // it is safe to cast here
231 // ////////////////////////////////////////////////////////////////////////// //
232 // this also sanitizes it
233 public T binaryToUtf8 (T:const(char)[]) (T s) nothrow @trusted {
234 static if (is(T == typeof(null))) {
235 return null;
236 } else {
237 bool found = false;
238 foreach (immutable char ch; s) {
239 if (ch >= 127 || (ch < 32 && !isGoodCtlChar(ch))) { found = true; break; }
241 if (!found) {
242 return s;
243 } else {
244 import iv.utfutil : utf8Valid;
245 if (utf8Valid(s)) return sanitizeStr(s);
246 char[8] uc;
247 char[] res;
248 // calc length
249 usize sz = s.length;
250 foreach (immutable char ch; s) {
251 if (ch >= 128) {
252 immutable int len = utf8Encode(uc[], cast(dchar)ch);
253 assert(len > 1);
254 sz += cast(uint)len;
257 res.reserve(sz);
258 foreach (immutable idx, char ch; s) {
259 if (ch < 128) {
260 if (ch == 13) { if (idx+1 >= s.length || s.ptr[idx+1] != 10) res ~= '\n'; }
261 else if (ch < 32 && !isGoodCtlChar(ch)) res ~= ' ';
262 else if (ch == 127) res ~= '~';
263 else res ~= ch;
264 } else {
265 immutable int len = utf8Encode(uc[], cast(dchar)ch);
266 assert(len > 1);
267 res ~= uc[0..len];
270 return cast(T)res; // it is safe to cast here
276 // ////////////////////////////////////////////////////////////////////////// //
277 // this also sanitizes it
278 public T utf8ToUtf8 (T:const(char)[]) (T s) nothrow @trusted {
279 static if (is(T == typeof(null))) {
280 return null;
281 } else {
282 bool found = false;
283 foreach (immutable char ch; s) {
284 if (ch >= 127 || (ch < 32 && !isGoodCtlChar(ch))) { found = true; break; }
286 if (!found) {
287 return s;
288 } else {
289 import iv.utfutil : utf8Valid;
290 if (utf8Valid(s)) return sanitizeStr(s);
291 char[8] uc;
292 char[] res;
293 res.reserve(s.length);
294 int utfleft = 0;
295 foreach (immutable idx, char ch; s) {
296 if (utfleft) { --utfleft; res ~= ch; continue; }
297 if (ch < 128) {
298 if (ch == 13) { if (idx+1 >= s.length || s.ptr[idx+1] != 10) res ~= '\n'; }
299 else if (ch < 32 && !isGoodCtlChar(ch)) res ~= ' ';
300 else if (ch == 127) res ~= '~';
301 else res ~= ch;
302 } else {
303 immutable byte ulen = utf8CodeLen(ch);
304 if (ulen < 1) { res ~= '?'; continue; }
305 if (s.length-idx < ulen) { res ~= '?'; break; }
306 if (!utf8Valid(s[idx..idx+ulen])) { res ~= '?'; continue; }
307 res ~= ch;
308 utfleft = ulen-1;
311 return cast(T)res; // it is safe to cast here
317 // ////////////////////////////////////////////////////////////////////////// //
318 public T subjRemoveRe(T:const(char)[]) (T s) nothrow @trusted {
319 static if (is(T == typeof(null))) {
320 return null;
321 } else {
322 for (;;) {
323 s = s.xstrip;
324 if (s.length < 3) break;
325 if (s.ptr[0] != 'r' && s.ptr[0] != 'R') break;
326 if (s.ptr[1] != 'e' && s.ptr[1] != 'E') break;
327 usize pp = 2;
328 while (pp < s.length && s.ptr[pp] <= 32) ++pp;
329 if (pp >= s.length || s.ptr[pp] != ':') break;
330 s = s[pp+1..$];
332 return s;
337 // ////////////////////////////////////////////////////////////////////////// //
338 private static immutable string b64alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
340 private static immutable ubyte[256] b64dc = () {
341 ubyte[256] res = 0xff; // invalid
342 foreach (immutable idx, immutable char ch; b64alphabet) {
343 res[cast(ubyte)ch] = cast(ubyte)idx;
345 res['='] = 0xfe; // padding
346 // ignore
347 res[0..32] = 0xf0;
348 res[127] = 0xf0; // just in case
349 return res;
350 }();
352 public char[] decodeBase64(bool ignoreUnderscore=false) (const(void)[] datavoid, out bool error) nothrow @trusted {
353 const(ubyte)[] data = cast(const(ubyte)[])datavoid;
355 bool inPadding = false;
356 ubyte[4] bts = void;
357 uint btspos = 0;
359 char[] dcx;
360 dcx.reserve((data.length+3U)/4U*3U+8U);
361 error = false;
363 bool decodeChunk () nothrow @trusted {
364 if (btspos == 0) return true;
365 if (btspos == 1) return false; //throw new Base64Exception("incomplete data in base64 decoder");
366 dcx ~= cast(char)((bts.ptr[0]<<2)|((bts.ptr[1]&0x30)>>4)); // 2 and more
367 if (btspos > 2) dcx ~= cast(char)(((bts.ptr[1]&0x0f)<<4)|((bts.ptr[2]&0x3c)>>2)); // 3 and more
368 if (btspos > 3) dcx ~= cast(char)(((bts.ptr[2]&0x03)<<6)|bts.ptr[3]);
369 return true;
372 while (data.length) {
373 immutable ubyte cb = b64dc.ptr[data.ptr[0]];
374 if (cb == 0xff) { error = true; delete dcx; return "<invalid base64 data>".dup; }
375 data = data[1..$];
376 if (cb == 0xf0) continue; // empty
377 static if (ignoreUnderscore) {
378 if (cb == '_') continue;
380 if (cb == 0xfe) {
381 // padding
382 if (!inPadding) {
383 if (!decodeChunk()) { error = true; delete dcx; return "<invalid base64 data>".dup; }
384 inPadding = true;
386 if (++btspos == 4) { inPadding = false; btspos = 0; }
387 } else {
388 // normal
389 if (inPadding) {
390 if (btspos != 0) { error = true; delete dcx; return "<invalid base64 data>".dup; }
391 inPadding = false;
393 bts.ptr[btspos++] = cb;
394 if (btspos == 4) {
395 if (!decodeChunk()) { error = true; delete dcx; return "<invalid base64 data>".dup; }
396 btspos = 0;
400 if (btspos != 0 && !inPadding) {
401 // assume that it is not padded
402 if (!decodeChunk()) { error = true; delete dcx; return "<invalid base64 data>".dup; }
405 return dcx;
409 // ////////////////////////////////////////////////////////////////////////// //
410 public char[] decodeQuotedPrintable(bool multiline) (const(void)[] datavoid) nothrow @trusted {
411 const(char)[] data = cast(const(char)[])datavoid;
412 //{ import core.stdc.stdio; fprintf(stderr, "***<%.*s>\n", cast(uint)data.length, data.ptr); }
413 char[] dcx;
414 dcx.reserve(data.length);
415 while (data.length) {
416 if (data.ptr[0] == '=') {
417 if (data.length == 1) break;
418 if (data.length >= 3 && digitInBase(data.ptr[1], 16) >= 0 && digitInBase(data.ptr[2], 16) >= 0) {
419 dcx ~= cast(char)(digitInBase(data.ptr[1], 16)*16+digitInBase(data.ptr[2], 16));
420 data = data[3..$];
421 continue;
423 // check if it is followed by blanks up to the newline
424 // if it is so, then this is "line continuation" -- remove both '=' and blanks
425 static if (multiline) {
426 bool ateol = false;
427 usize epos = 1; // skip '='
428 while (epos < data.length) {
429 char ch = data.ptr[epos++];
430 if (ch == 9 || ch == 32) continue;
431 if (ch == 13) {
432 if (epos >= data.length) { ateol = true; break; }
433 if (data.ptr[epos] == 10) continue;
434 ch = 10; // trigger next check
436 if (ch == 10) {
437 // check for most fuckin' idiots: new line started with a dot has two dots
438 if (epos < data.length && data.ptr[epos] == '.' &&
439 epos+1 < data.length && data.ptr[epos+1] == '.')
441 ++epos; // skip first dot
443 ateol = true;
444 break;
446 --epos;
447 break;
449 if (epos > data.length) epos = data.length; // just in case
450 if (ateol || epos >= data.length) {
451 data = data[epos..$];
452 continue;
455 } else {
456 // check for most fuckin' idiots: new line started with a dot has two dots
457 static if (multiline) {
458 if (data.length >= 3 &&
459 (data.ptr[0] == '\n' || data.ptr[0] == '\r') &&
460 data.ptr[1] == '.' && data.ptr[2] == '.')
462 dcx ~= data.ptr[0];
463 data = data[2..$];
467 dcx ~= data.ptr[0];
468 data = data[1..$];
470 return dcx;
474 // ////////////////////////////////////////////////////////////////////////// //
475 public T ensureProper7Bit(T:const(char)[]) (T s) nothrow @trusted {
476 static if (is(T == typeof(null))) {
477 return null;
478 } else {
479 bool needwork = false;
480 foreach (immutable char ch; s) if (ch >= 128) { needwork = true; break; }
481 if (!needwork) return s;
482 char[] dcx = new char[s.length];
483 dcx[] = s[];
484 foreach (ref char ch; dcx) ch &= 0x7f;
485 return cast(T)dcx; // it is safe to cast here
490 // ////////////////////////////////////////////////////////////////////////// //
491 // decode things like "=?UTF-8?B?Tm9yZGzDtnc=?="
492 public T decodeSubj(T:const(char)[]) (T s) nothrow @trusted {
493 static if (is(T == typeof(null))) {
494 return null;
495 } else {
496 if (s.indexOf("=?") < 0) return s.sanitizeStrLine.utf8ToUtf8;
498 // have to do some work
499 auto origs = s;
500 char[] res;
501 res.reserve(s.length); // at least
503 while (s.length > 2) {
504 auto stqpos = s.indexOf("=?");
505 if (stqpos < 0) break;
506 if (stqpos > 0) res ~= s[0..stqpos].utf8ToUtf8;
507 s = s[stqpos+2..$];
509 auto eepos = s.indexOf('?');
510 if (eepos < 0) break;
511 auto enc = s[0..eepos];
513 //conwriteln("ENCODING: '", enc, "'");
514 s = s[eepos+1..$];
515 if (enc.length == 0) enc = "utf-8";
516 if (s.length < 2 || s.ptr[1] != '?') return origs.sanitizeStrLine.utf8ToUtf8;
518 char ect = s.ptr[0];
519 s = s[2..$];
520 eepos = s.indexOf("?=");
521 if (eepos < 0) return origs.sanitizeStrLine.utf8ToUtf8;
523 auto part = s[0..eepos];
524 s = s[eepos+2..$];
526 // several encoded parts may be separated with spaces; those spaces should be ignored
527 stqpos = 0;
528 while (stqpos < s.length && s.ptr[stqpos] <= ' ') ++stqpos;
529 if (s.length-stqpos >= 2 && s.ptr[stqpos] == '=' && s.ptr[stqpos+1] == '?') s = s[stqpos..$];
531 // decode part
532 if (ect == 'Q' || ect == 'q') {
533 // quoted printable
534 part = cast(T)decodeQuotedPrintable!false(part); // it is safe to cast here
535 } else if (ect == 'B' || ect == 'b') {
536 // base64
537 //auto xpart = part;
538 bool error = false;
539 part = cast(T)decodeBase64!true(part, out error); // it is safe to cast here
540 if (error) {
541 //conwriteln("CANNOT DECODE B64: ", xpart);
542 delete part;
543 return origs.sanitizeStrLine.utf8ToUtf8;
547 // reencode part if necessary
548 if (!enc.strEquCI("utf-8") && !enc.strEquCI("utf8") && !enc.strEquCI("US-ASCII")) {
549 try {
550 //conwriteln("RECODING: ", enc);
551 part = recode(part, "utf-8", enc);
552 } catch (Exception e) {
553 //conwriteln("RECODE ERROR: ", e.msg);
554 return origs.sanitizeStrLine.utf8ToUtf8;
558 part = part.sanitizeStrSubjPart.utf8ToUtf8;
559 if (part.length) res ~= part;
562 if (s.length) res ~= s.utf8ToUtf8;
563 return cast(T)res.sanitizeStrLine; // it should be valid utf8 here; also, it is safe to cast here
568 // ////////////////////////////////////////////////////////////////////////// //
569 // decode content with the given encoding type
570 public T decodeContent(T:const(char)[]) (T data, const(char)[] encoding) nothrow @trusted {
571 static if (is(T == typeof(null))) {
572 return null;
573 } else {
574 if (data.length == 0 || encoding.length == 0 || encoding.strEquCI("8bit") || encoding.strEquCI("binary")) {
575 return data;
578 if (encoding.strEquCI("7bit")) {
579 return cast(T)ensureProper7Bit(data); // it is safe to cast here
582 if (encoding.strEquCI("base64")) {
583 bool error;
584 return cast(T)decodeBase64(data, out error); // it is safe to cast here
587 if (encoding.strEquCI("quoted-printable")) {
588 return cast(T)decodeQuotedPrintable!true(data); // it is safe to cast here
591 if (encoding.length != 0) {
592 char[] res = "<invalid encoding:".dup;
593 res ~= encoding;
594 res ~= ">";
595 return cast(T)res; // it is safe to cast here
598 return data;
603 // ////////////////////////////////////////////////////////////////////////// //
604 public T recodeToUtf8(T:const(char)[]) (T data, const(char)[] charset) nothrow @trusted {
605 static if (is(T == typeof(null))) {
606 return null;
607 } else {
608 if (data.length == 0) return data;
609 bool found = false;
610 foreach (immutable char ch; data) if (ch >= 128) { found = true; break; }
611 if (!found) return sanitizeStr(data);
612 if (charset.length == 0 || charset.strEquCI("utf-8") || charset.strEquCI("utf8") || charset.strEquCI("US-ASCII")) {
613 return utf8ToUtf8(data);
615 try {
616 data = recode(data, "utf-8", charset);
617 if (data.length == 0) return data;
618 return data.sanitizeStr;
619 } catch (Exception e) {}
620 char[] res = "<cannot decode '".dup;
621 foreach (char ch; charset) {
622 if (ch <= 32 || ch >= 127) continue;
623 res ~= ch;
625 res ~= "'>";
626 return cast(T)res; // it is safe to cast here
631 // ////////////////////////////////////////////////////////////////////////// //
632 private T mailNameUnquote (T:const(char)[]) (T buf) pure nothrow @trusted @nogc {
633 static if (is(T == typeof(null))) {
634 return null;
635 } else {
636 buf = buf.xstrip;
637 if (buf.length >= 2) {
638 if ((buf.ptr[0] == '"' && buf[$-1] == '"') ||
639 (buf.ptr[0] == '<' && buf[$-1] == '>') ||
640 (buf.ptr[0] == '`' && buf[$-1] == '\'') ||
641 (buf.ptr[0] == '\'' && buf[$-1] == '\''))
643 buf = buf[1..$-1].xstrip;
646 return buf;
651 // ////////////////////////////////////////////////////////////////////////// //
652 // extract email from decoded "From" and "To" fields
653 public T extractMail(bool doSanitize=true, T:const(char)[]) (T data) nothrow @trusted {
654 static if (is(T == typeof(null))) {
655 return null;
656 } else {
657 if (data.length == 0) return data;
658 if (data[$-1] == '>') {
659 usize pos = data.length;
660 while (pos > 0 && data.ptr[pos-1] != '<') --pos;
661 data = data[pos..$-1].xstrip;
662 } else {
663 data = data.xstrip;
665 static if (doSanitize) {
666 // hack for idiotic LJ (those morons are breaking all possible standards)
667 auto sppos = data.indexOf(' ');
668 if (sppos > 0) data = data[0..sppos];
670 return data.toLowerStr;
675 // ////////////////////////////////////////////////////////////////////////// //
676 // strip email from decoded "From" and "To" fields
677 public T stripMail(T:const(char)[]) (T data) nothrow @trusted {
678 static if (is(T == typeof(null))) {
679 return null;
680 } else {
681 if (data.length == 0) return data;
682 if (data[$-1] == '>') {
683 usize pos = data.length;
684 while (pos > 0 && data.ptr[pos-1] != '<') --pos;
685 if (pos == 0) return data[0..0];
686 return data[0..pos-1].xstrip;
688 return data[0..0];
693 // ////////////////////////////////////////////////////////////////////////// //
694 // extract name from decoded "From" and "To" fields
695 // can construct name if there is none
696 // special hack for idiotic LJ
697 public T extractName(T:const(char)[]) (T data) nothrow @trusted {
698 static if (is(T == typeof(null))) {
699 return null;
700 } else {
701 if (data.length == 0) return data;
702 auto origData = data;
703 T mail = extractMail(data);
704 data = stripMail(data).decodeSubj.xstrip;
705 // hack for idiotic LJ (those morons are breaking all possible standards)
706 if (mail.startsWith("lj_dontreply@lj.rossia.org")) {
707 auto dd = extractMail!false(origData);
708 auto spos = dd.indexOf(" (");
709 if (spos >= 0) {
710 dd = dd[spos+2..$-(dd[$-1] == ')' ? 1 : 0)].xstrip;
711 if (dd == "LJR Comment") {
712 dd = "anonymous";
713 } else if (dd.endsWith(" - LJR Comment")) {
714 auto dpos = dd.lastIndexOf('-');
715 dd = dd[0..dpos].xstrip;
716 if (dd.length == 0) dd = "anonymous";
718 dd = dd.mailNameUnquote;
719 if (dd.length) return dd;
722 data = data.mailNameUnquote;
723 if (data.length) {
724 if (mail.startsWith("lj-notify@livejournal.com")) {
725 if (data == "LJ Comment") {
726 data = "anonymous";
727 } else if (data.endsWith(" - LJ Comment")) {
728 auto dpos = data.lastIndexOf('-');
729 data = data[0..dpos].xstrip;
730 if (data.length == 0) data = "anonymous";
733 return data;
735 // construct name from the mail
736 auto npos = mail.indexOf('@');
737 if (npos <= 0) return mail;
738 data = mail[0..npos].xstrip;
739 if (data.length == 0) return mail;
740 char[] res;
741 res.reserve(data.length);
742 foreach (char ch; data) {
743 if (ch <= 32 || ch == '.' || ch == '-' || ch == '_') ch = 32;
744 if (ch == 32) {
745 if (res.length && res[$-1] != 32) res ~= ch;
746 } else {
747 if (res.length == 0 || res[$-1] == 32) ch = ch.toupper; else ch = ch.tolower;
748 res ~= ch;
751 res = res.xstrip;
752 if (res.length == 0) return mail;
753 return cast(T)res; // it is safe to cast here
758 // ////////////////////////////////////////////////////////////////////////// //
759 // encode string if it contains some non-ascii
760 // always returns new string, which is safe to `delete`
761 // passed string must be in UTF-8
762 // can return `null` for empty string
763 public dynstring strEncodeQ (const(char)[] s) nothrow @trusted {
764 static bool isSpecial (immutable char ch) pure nothrow @safe @nogc {
765 return
766 ch < ' ' ||
767 ch >= 127 ||
768 ch == '\'' ||
769 ch == '`' ||
770 ch == '"' ||
771 ch == '\\' ||
772 ch == '@';
774 dynstring res;
775 if (s.length == 0) return res;
776 static immutable string hexd = "0123456789abcdef";
777 bool needWork = (s[0] == '=' || s[0] == '?');
778 if (!needWork) foreach (char ch; s) if (isSpecial(ch)) { needWork = true; break; }
779 if (!needWork) {
780 res = s;
781 } else {
782 res.reserve(s.length*3+32);
783 res ~= "=?UTF-8?Q?"; // quoted printable
784 foreach (char ch; s) {
785 if (ch <= ' ') ch = '_';
786 if (!isSpecial(ch) && ch != '=' && ch != '?') {
787 res ~= ch;
788 } else {
789 res ~= "=";
790 res ~= hexd[(cast(ubyte)ch)>>4];
791 res ~= hexd[(cast(ubyte)ch)&0x0f];
794 res ~= "?=";
796 return res;