chibackend/decode.d

   1 /* E-Mail Client
   2  * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
   3  * Understanding is not required. Only obedience.
   4  *
   5  * This program is free software: you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation, version 3 of the License ONLY.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16  */
  17 module chibackend.decode is aliced;
  18
  19 import iv.cmdcon;
  20 import iv.dynstring;
  21 import iv.encoding;
  22 import iv.strex;
  23 import iv.utfutil;
  24 import iv.utfutil : utf8CodeLen, utf8Valid;
  25
  26
  27 /*
  28 // ////////////////////////////////////////////////////////////////////////// //
  29 // `ch`: utf8 start
  30 // -1: invalid utf8
  31 public byte utf8CodeLen (char ch) pure nothrow @trusted @nogc {
  32   //pragma(inline, true);
  33   if (ch < 0x80) return 1;
  34   if ((ch&0b1111_1110) == 0b1111_1100) return 6;
  35   if ((ch&0b1111_1100) == 0b1111_1000) return 5;
  36   if ((ch&0b1111_1000) == 0b1111_0000) return 4;
  37   if ((ch&0b1111_0000) == 0b1110_0000) return 3;
  38   if ((ch&0b1110_0000) == 0b1100_0000) return 2;
  39   return -1; // invalid
  40 }
  41
  42
  43 // ////////////////////////////////////////////////////////////////////////// //
  44 public bool utf8Valid (const(void)[] buf) pure nothrow @trusted @nogc {
  45   const(ubyte)* bp = cast(const(ubyte)*)buf.ptr;
  46   auto left = buf.length;
  47   while (left--) {
  48     auto len = utf8CodeLen(*bp++)-1;
  49     if (len < 0 || len > left) return false;
  50     left -= len;
  51     while (len-- > 0) if (((*bp++)&0b1100_0000) != 0b1000_0000) return false;
  52   }
  53   return true;
  54 }
  55 */
  56
  57
  58 // ////////////////////////////////////////////////////////////////////////// //
  59 public bool isValidNickUniChar (immutable dchar ch) pure nothrow @safe @nogc {
  60   pragma(inline, true);
  61   return
  62     (ch >= '0' && ch <= '9') ||
  63     (ch >= 'A' && ch <= 'Z') ||
  64     (ch >= 'a' && ch <= 'z') ||
  65     ch == '-' || ch == '_' || ch == '.' ||
  66     isValidCyrillicUni(ch);
  67 }
  68
  69
  70 public bool isValidUTFNick (const(char)[] s) nothrow @safe @nogc {
  71   if (s.length == 0) return false;
  72   Utf8DecoderFast dc;
  73   foreach (immutable char ch; s) {
  74     dc.decode(cast(ubyte)ch);
  75     if (dc.invalid) return false;
  76     if (dc.complete && !isValidNickUniChar(dc.codepoint)) return false;
  77   }
  78   return true;
  79 }
  80
  81
  82 // ////////////////////////////////////////////////////////////////////////// //
  83 public bool isGoodCtlChar (immutable char ch) pure nothrow @safe @nogc {
  84   pragma(inline, true);
  85   return (ch == '\t' || ch == '\n');
  86 }
  87
  88
  89 // ////////////////////////////////////////////////////////////////////////// //
  90 public bool isGoodText (const(char)[] buf) pure nothrow @trusted @nogc {
  91   foreach (immutable char ch; buf) {
  92     if (ch == 127 || (ch < 32 && !isGoodCtlChar(ch))) return false;
  93   }
  94   return true;
  95 }
  96
  97
  98 // ////////////////////////////////////////////////////////////////////////// //
  99 private bool isGoodFileNameChar (immutable char ch) pure nothrow @safe @nogc {
 100   if (ch <= 32 || ch == 127) return false;
 101   if (ch >= 128) return true;
 102   if (ch == '/' || ch == '\\') return false;
 103   return true;
 104 }
 105
 106
 107 // ////////////////////////////////////////////////////////////////////////// //
 108 // this also sanitizes it
 109 public T toLowerStr (T:const(char)[]) (T s) nothrow @trusted {
 110   static if (is(T == typeof(null))) {
 111     return null;
 112   } else {
 113     bool needwork = false;
 114     foreach (immutable char ch; s) {
 115       if (ch == 127 || (ch < 32 && !isGoodCtlChar(ch)) || (ch >= 'A' && ch <= 'Z')) {
 116         needwork = true;
 117         break;
 118       }
 119     }
 120     if (!needwork) {
 121       return s;
 122     } else {
 123       char[] res;
 124       res.reserve(s.length);
 125       foreach (immutable idx, char ch; s) {
 126              if (ch == 13) { if (idx+1 >= s.length || s.ptr[idx+1] != 10) res ~= '\n'; }
 127         else if (ch < 32 && !isGoodCtlChar(ch)) res ~= ' ';
 128         else if (ch == 127) res ~= '~';
 129         else if (ch >= 'A' && ch <= 'Z') res ~= ch.tolower;
 130         else res ~= ch;
 131       }
 132       return cast(T)res; // it is safe to cast here
 133     }
 134   }
 135 }
 136
 137
 138 // ////////////////////////////////////////////////////////////////////////// //
 139 // this also sanitizes it
 140 public T sanitizeFileNameStr (T:const(char)[]) (T s) nothrow @trusted {
 141   static if (is(T == typeof(null))) {
 142     return null;
 143   } else {
 144     bool needwork = false;
 145     foreach (immutable char ch; s) if (!isGoodFileNameChar(ch)) { needwork = true; break; }
 146     if (!needwork) {
 147       return s;
 148     } else {
 149       char[] res = new char[s.length];
 150       res[] = s[];
 151       foreach (ref char ch; res) {
 152         if (!isGoodFileNameChar(ch)) ch = '_';
 153       }
 154       return cast(T)res; // it is safe to cast here
 155     }
 156   }
 157 }
 158
 159
 160 // ////////////////////////////////////////////////////////////////////////// //
 161 public T sanitizeStr (T:const(char)[]) (T s) nothrow @trusted {
 162   static if (is(T == typeof(null))) {
 163     return null;
 164   } else {
 165     if (isGoodText(s)) {
 166       return s;
 167     } else {
 168       char[] res;
 169       res.reserve(s.length);
 170       foreach (immutable idx, char ch; s) {
 171              if (ch == 13) { if (idx+1 >= s.length || s.ptr[idx+1] != 10) res ~= '\n'; }
 172         else if (ch < 32 && !isGoodCtlChar(ch)) res ~= ' ';
 173         else if (ch == 127) res ~= '~';
 174         else res ~= ch;
 175       }
 176       return cast(T)res; // it is safe to cast here
 177     }
 178   }
 179 }
 180
 181
 182 // ////////////////////////////////////////////////////////////////////////// //
 183 public T sanitizeStrLine (T:const(char)[]) (T s) nothrow @trusted {
 184   static if (is(T == typeof(null))) {
 185     return null;
 186   } else {
 187     bool found = false;
 188     foreach (immutable idx, char ch; s) {
 189       if (ch < 32 || ch == 127) { found = true; break; }
 190       if (ch == 32 && (idx == 0 || s.ptr[idx-1] <= 32)) { found = true; break; }
 191     }
 192     if (!found) {
 193       return s;
 194     } else {
 195       char[] res;
 196       res.reserve(s.length);
 197       foreach (char ch; s) {
 198         if (ch < 32 || ch == 127) ch = ' ';
 199         if (ch <= 32 && (res.length == 0 || res[$-1] <= 32)) continue;
 200         res ~= ch;
 201       }
 202       while (res.length && res[$-1] <= 32) res = res[0..$-1];
 203       return cast(T)res; // it is safe to cast here
 204     }
 205   }
 206 }
 207
 208
 209 // ////////////////////////////////////////////////////////////////////////// //
 210 // for decoded subject parts
 211 public T sanitizeStrSubjPart (T:const(char)[]) (T s) nothrow @trusted {
 212   static if (is(T == typeof(null))) {
 213     return null;
 214   } else {
 215     bool found = false;
 216     foreach (immutable idx, immutable char ch; s) {
 217       if (ch < 32 || ch == 127 || ch == '_') { found = true; break; }
 218     }
 219     if (!found) {
 220       return s;
 221     } else {
 222       char[] res = new char[s.length];
 223       res[] = s[];
 224       foreach (ref char ch; res) if (ch < 32 || ch == 127 || ch == '_') ch = ' ';
 225       return cast(T)res; // it is safe to cast here
 226     }
 227   }
 228 }
 229
 230
 231 // ////////////////////////////////////////////////////////////////////////// //
 232 // this also sanitizes it
 233 public T binaryToUtf8 (T:const(char)[]) (T s) nothrow @trusted {
 234   static if (is(T == typeof(null))) {
 235     return null;
 236   } else {
 237     bool found = false;
 238     foreach (immutable char ch; s) {
 239       if (ch >= 127 || (ch < 32 && !isGoodCtlChar(ch))) { found = true; break; }
 240     }
 241     if (!found) {
 242       return s;
 243     } else {
 244       import iv.utfutil : utf8Valid;
 245       if (utf8Valid(s)) return sanitizeStr(s);
 246       char[8] uc;
 247       char[] res;
 248       // calc length
 249       usize sz = s.length;
 250       foreach (immutable char ch; s) {
 251         if (ch >= 128) {
 252           immutable int len = utf8Encode(uc[], cast(dchar)ch);
 253           assert(len > 1);
 254           sz += cast(uint)len;
 255         }
 256       }
 257       res.reserve(sz);
 258       foreach (immutable idx, char ch; s) {
 259         if (ch < 128) {
 260                if (ch == 13) { if (idx+1 >= s.length || s.ptr[idx+1] != 10) res ~= '\n'; }
 261           else if (ch < 32 && !isGoodCtlChar(ch)) res ~= ' ';
 262           else if (ch == 127) res ~= '~';
 263           else res ~= ch;
 264         } else {
 265           immutable int len = utf8Encode(uc[], cast(dchar)ch);
 266           assert(len > 1);
 267           res ~= uc[0..len];
 268         }
 269       }
 270       return cast(T)res; // it is safe to cast here
 271     }
 272   }
 273 }
 274
 275
 276 // ////////////////////////////////////////////////////////////////////////// //
 277 // this also sanitizes it
 278 public T utf8ToUtf8 (T:const(char)[]) (T s) nothrow @trusted {
 279   static if (is(T == typeof(null))) {
 280     return null;
 281   } else {
 282     bool found = false;
 283     foreach (immutable char ch; s) {
 284       if (ch >= 127 || (ch < 32 && !isGoodCtlChar(ch))) { found = true; break; }
 285     }
 286     if (!found) {
 287       return s;
 288     } else {
 289       import iv.utfutil : utf8Valid;
 290       if (utf8Valid(s)) return sanitizeStr(s);
 291       char[8] uc;
 292       char[] res;
 293       res.reserve(s.length);
 294       int utfleft = 0;
 295       foreach (immutable idx, char ch; s) {
 296         if (utfleft) { --utfleft; res ~= ch; continue; }
 297         if (ch < 128) {
 298                if (ch == 13) { if (idx+1 >= s.length || s.ptr[idx+1] != 10) res ~= '\n'; }
 299           else if (ch < 32 && !isGoodCtlChar(ch)) res ~= ' ';
 300           else if (ch == 127) res ~= '~';
 301           else res ~= ch;
 302         } else {
 303           immutable byte ulen = utf8CodeLen(ch);
 304           if (ulen < 1) { res ~= '?'; continue; }
 305           if (s.length-idx < ulen) { res ~= '?'; break; }
 306           if (!utf8Valid(s[idx..idx+ulen])) { res ~= '?'; continue; }
 307           res ~= ch;
 308           utfleft = ulen-1;
 309         }
 310       }
 311       return cast(T)res; // it is safe to cast here
 312     }
 313   }
 314 }
 315
 316
 317 // ////////////////////////////////////////////////////////////////////////// //
 318 public T subjRemoveRe(T:const(char)[]) (T s) nothrow @trusted {
 319   static if (is(T == typeof(null))) {
 320     return null;
 321   } else {
 322     for (;;) {
 323       s = s.xstrip;
 324       if (s.length < 3) break;
 325       if (s.ptr[0] != 'r' && s.ptr[0] != 'R') break;
 326       if (s.ptr[1] != 'e' && s.ptr[1] != 'E') break;
 327       usize pp = 2;
 328       while (pp < s.length && s.ptr[pp] <= 32) ++pp;
 329       if (pp >= s.length || s.ptr[pp] != ':') break;
 330       s = s[pp+1..$];
 331     }
 332     return s;
 333   }
 334 }
 335
 336
 337 // ////////////////////////////////////////////////////////////////////////// //
 338 private static immutable string b64alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 339
 340 private static immutable ubyte[256] b64dc = () {
 341   ubyte[256] res = 0xff; // invalid
 342   foreach (immutable idx, immutable char ch; b64alphabet) {
 343     res[cast(ubyte)ch] = cast(ubyte)idx;
 344   }
 345   res['='] = 0xfe; // padding
 346   // ignore
 347   res[0..32] = 0xf0;
 348   res[127] = 0xf0; // just in case
 349   return res;
 350 }();
 351
 352 public char[] decodeBase64(bool ignoreUnderscore=false) (const(void)[] datavoid, out bool error) nothrow @trusted {
 353   const(ubyte)[] data = cast(const(ubyte)[])datavoid;
 354
 355   bool inPadding = false;
 356   ubyte[4] bts = void;
 357   uint btspos = 0;
 358
 359   char[] dcx;
 360   dcx.reserve((data.length+3U)/4U*3U+8U);
 361   error = false;
 362
 363   bool decodeChunk () nothrow @trusted {
 364     if (btspos == 0) return true;
 365     if (btspos == 1) return false; //throw new Base64Exception("incomplete data in base64 decoder");
 366     dcx ~= cast(char)((bts.ptr[0]<<2)|((bts.ptr[1]&0x30)>>4)); // 2 and more
 367     if (btspos > 2) dcx ~= cast(char)(((bts.ptr[1]&0x0f)<<4)|((bts.ptr[2]&0x3c)>>2)); // 3 and more
 368     if (btspos > 3) dcx ~= cast(char)(((bts.ptr[2]&0x03)<<6)|bts.ptr[3]);
 369     return true;
 370   }
 371
 372   while (data.length) {
 373     immutable ubyte cb = b64dc.ptr[data.ptr[0]];
 374     if (cb == 0xff) { error = true; delete dcx; return "<invalid base64 data>".dup; }
 375     data = data[1..$];
 376     if (cb == 0xf0) continue; // empty
 377     static if (ignoreUnderscore) {
 378       if (cb == '_') continue;
 379     }
 380     if (cb == 0xfe) {
 381       // padding
 382       if (!inPadding) {
 383         if (!decodeChunk()) { error = true; delete dcx; return "<invalid base64 data>".dup; }
 384         inPadding = true;
 385       }
 386       if (++btspos == 4) { inPadding = false; btspos = 0; }
 387     } else {
 388       // normal
 389       if (inPadding) {
 390         if (btspos != 0) { error = true; delete dcx; return "<invalid base64 data>".dup; }
 391         inPadding = false;
 392       }
 393       bts.ptr[btspos++] = cb;
 394       if (btspos == 4) {
 395         if (!decodeChunk()) { error = true; delete dcx; return "<invalid base64 data>".dup; }
 396         btspos = 0;
 397       }
 398     }
 399   }
 400   if (btspos != 0 && !inPadding) {
 401     // assume that it is not padded
 402     if (!decodeChunk()) { error = true; delete dcx; return "<invalid base64 data>".dup; }
 403   }
 404
 405   return dcx;
 406 }
 407
 408
 409 // ////////////////////////////////////////////////////////////////////////// //
 410 public char[] decodeQuotedPrintable(bool multiline) (const(void)[] datavoid) nothrow @trusted {
 411   const(char)[] data = cast(const(char)[])datavoid;
 412   //{ import core.stdc.stdio; fprintf(stderr, "***<%.*s>\n", cast(uint)data.length, data.ptr); }
 413   char[] dcx;
 414   dcx.reserve(data.length);
 415   while (data.length) {
 416     if (data.ptr[0] == '=') {
 417       if (data.length == 1) break;
 418       if (data.length >= 3 && digitInBase(data.ptr[1], 16) >= 0 && digitInBase(data.ptr[2], 16) >= 0) {
 419         dcx ~= cast(char)(digitInBase(data.ptr[1], 16)*16+digitInBase(data.ptr[2], 16));
 420         data = data[3..$];
 421         continue;
 422       }
 423       // check if it is followed by blanks up to the newline
 424       // if it is so, then this is "line continuation" -- remove both '=' and blanks
 425       static if (multiline) {
 426         bool ateol = false;
 427         usize epos = 1; // skip '='
 428         while (epos < data.length) {
 429           char ch = data.ptr[epos++];
 430           if (ch == 9 || ch == 32) continue;
 431           if (ch == 13) {
 432             if (epos >= data.length) { ateol = true; break; }
 433             if (data.ptr[epos] == 10) continue;
 434             ch = 10; // trigger next check
 435           }
 436           if (ch == 10) {
 437             // check for most fuckin' idiots: new line started with a dot has two dots
 438             if (epos < data.length && data.ptr[epos] == '.' &&
 439                 epos+1 < data.length && data.ptr[epos+1] == '.')
 440             {
 441               ++epos; // skip first dot
 442             }
 443             ateol = true;
 444             break;
 445           }
 446           --epos;
 447           break;
 448         }
 449         if (epos > data.length) epos = data.length; // just in case
 450         if (ateol || epos >= data.length) {
 451           data = data[epos..$];
 452           continue;
 453         }
 454       }
 455     } else {
 456       // check for most fuckin' idiots: new line started with a dot has two dots
 457       static if (multiline) {
 458         if (data.length >= 3 &&
 459             (data.ptr[0] == '\n' || data.ptr[0] == '\r') &&
 460             data.ptr[1] == '.' && data.ptr[2] == '.')
 461         {
 462           dcx ~= data.ptr[0];
 463           data = data[2..$];
 464         }
 465       }
 466     }
 467     dcx ~= data.ptr[0];
 468     data = data[1..$];
 469   }
 470   return dcx;
 471 }
 472
 473
 474 // ////////////////////////////////////////////////////////////////////////// //
 475 public T ensureProper7Bit(T:const(char)[]) (T s) nothrow @trusted {
 476   static if (is(T == typeof(null))) {
 477     return null;
 478   } else {
 479     bool needwork = false;
 480     foreach (immutable char ch; s) if (ch >= 128) { needwork = true; break; }
 481     if (!needwork) return s;
 482     char[] dcx = new char[s.length];
 483     dcx[] = s[];
 484     foreach (ref char ch; dcx) ch &= 0x7f;
 485     return cast(T)dcx; // it is safe to cast here
 486   }
 487 }
 488
 489
 490 // ////////////////////////////////////////////////////////////////////////// //
 491 // decode things like "=?UTF-8?B?Tm9yZGzDtnc=?="
 492 public T decodeSubj(T:const(char)[]) (T s) nothrow @trusted {
 493   static if (is(T == typeof(null))) {
 494     return null;
 495   } else {
 496     if (s.indexOf("=?") < 0) return s.sanitizeStrLine.utf8ToUtf8;
 497
 498     // have to do some work
 499     auto origs = s;
 500     char[] res;
 501     res.reserve(s.length); // at least
 502
 503     while (s.length > 2) {
 504       auto stqpos = s.indexOf("=?");
 505       if (stqpos < 0) break;
 506       if (stqpos > 0) res ~= s[0..stqpos].utf8ToUtf8;
 507       s = s[stqpos+2..$];
 508
 509       auto eepos = s.indexOf('?');
 510       if (eepos < 0) break;
 511       auto enc = s[0..eepos];
 512
 513       //conwriteln("ENCODING: '", enc, "'");
 514       s = s[eepos+1..$];
 515       if (enc.length == 0) enc = "utf-8";
 516       if (s.length < 2 || s.ptr[1] != '?') return origs.sanitizeStrLine.utf8ToUtf8;
 517
 518       char ect = s.ptr[0];
 519       s = s[2..$];
 520       eepos = s.indexOf("?=");
 521       if (eepos < 0) return origs.sanitizeStrLine.utf8ToUtf8;
 522
 523       auto part = s[0..eepos];
 524       s = s[eepos+2..$];
 525
 526       // several encoded parts may be separated with spaces; those spaces should be ignored
 527       stqpos = 0;
 528       while (stqpos < s.length && s.ptr[stqpos] <= ' ') ++stqpos;
 529       if (s.length-stqpos >= 2 && s.ptr[stqpos] == '=' && s.ptr[stqpos+1] == '?') s = s[stqpos..$];
 530
 531       // decode part
 532       if (ect == 'Q' || ect == 'q') {
 533         // quoted printable
 534         part = cast(T)decodeQuotedPrintable!false(part); // it is safe to cast here
 535       } else if (ect == 'B' || ect == 'b') {
 536         // base64
 537         //auto xpart = part;
 538         bool error = false;
 539         part = cast(T)decodeBase64!true(part, out error); // it is safe to cast here
 540         if (error) {
 541           //conwriteln("CANNOT DECODE B64: ", xpart);
 542           delete part;
 543           return origs.sanitizeStrLine.utf8ToUtf8;
 544         }
 545       }
 546
 547       // reencode part if necessary
 548       if (!enc.strEquCI("utf-8") && !enc.strEquCI("utf8") && !enc.strEquCI("US-ASCII")) {
 549         try {
 550           //conwriteln("RECODING: ", enc);
 551           part = recode(part, "utf-8", enc);
 552         } catch (Exception e) {
 553           //conwriteln("RECODE ERROR: ", e.msg);
 554           return origs.sanitizeStrLine.utf8ToUtf8;
 555         }
 556       }
 557
 558       part = part.sanitizeStrSubjPart.utf8ToUtf8;
 559       if (part.length) res ~= part;
 560     }
 561
 562     if (s.length) res ~= s.utf8ToUtf8;
 563     return cast(T)res.sanitizeStrLine; // it should be valid utf8 here; also, it is safe to cast here
 564   }
 565 }
 566
 567
 568 // ////////////////////////////////////////////////////////////////////////// //
 569 // decode content with the given encoding type
 570 public T decodeContent(T:const(char)[]) (T data, const(char)[] encoding) nothrow @trusted {
 571   static if (is(T == typeof(null))) {
 572     return null;
 573   } else {
 574     if (data.length == 0 || encoding.length == 0 || encoding.strEquCI("8bit") || encoding.strEquCI("binary")) {
 575       return data;
 576     }
 577
 578     if (encoding.strEquCI("7bit")) {
 579       return cast(T)ensureProper7Bit(data); // it is safe to cast here
 580     }
 581
 582     if (encoding.strEquCI("base64")) {
 583       bool error;
 584       return cast(T)decodeBase64(data, out error); // it is safe to cast here
 585     }
 586
 587     if (encoding.strEquCI("quoted-printable")) {
 588       return cast(T)decodeQuotedPrintable!true(data); // it is safe to cast here
 589     }
 590
 591     if (encoding.length != 0) {
 592       char[] res = "<invalid encoding:".dup;
 593       res ~= encoding;
 594       res ~= ">";
 595       return cast(T)res; // it is safe to cast here
 596     }
 597
 598     return data;
 599   }
 600 }
 601
 602
 603 // ////////////////////////////////////////////////////////////////////////// //
 604 public T recodeToUtf8(T:const(char)[]) (T data, const(char)[] charset) nothrow @trusted {
 605   static if (is(T == typeof(null))) {
 606     return null;
 607   } else {
 608     if (data.length == 0) return data;
 609     bool found = false;
 610     foreach (immutable char ch; data) if (ch >= 128) { found = true; break; }
 611     if (!found) return sanitizeStr(data);
 612     if (charset.length == 0 || charset.strEquCI("utf-8") || charset.strEquCI("utf8") || charset.strEquCI("US-ASCII")) {
 613       return utf8ToUtf8(data);
 614     }
 615     try {
 616       data = recode(data, "utf-8", charset);
 617       if (data.length == 0) return data;
 618       return data.sanitizeStr;
 619     } catch (Exception e) {}
 620     char[] res = "<cannot decode '".dup;
 621     foreach (char ch; charset) {
 622       if (ch <= 32 || ch >= 127) continue;
 623       res ~= ch;
 624     }
 625     res ~= "'>";
 626     return cast(T)res; // it is safe to cast here
 627   }
 628 }
 629
 630
 631 // ////////////////////////////////////////////////////////////////////////// //
 632 private T mailNameUnquote (T:const(char)[]) (T buf) pure nothrow @trusted @nogc {
 633   static if (is(T == typeof(null))) {
 634     return null;
 635   } else {
 636     buf = buf.xstrip;
 637     if (buf.length >= 2) {
 638       if ((buf.ptr[0] == '"' && buf[$-1] == '"') ||
 639           (buf.ptr[0] == '<' && buf[$-1] == '>') ||
 640           (buf.ptr[0] == '`' && buf[$-1] == '\'') ||
 641           (buf.ptr[0] == '\'' && buf[$-1] == '\''))
 642       {
 643         buf = buf[1..$-1].xstrip;
 644       }
 645     }
 646     return buf;
 647   }
 648 }
 649
 650
 651 // ////////////////////////////////////////////////////////////////////////// //
 652 // extract email from decoded "From" and "To" fields
 653 public T extractMail(bool doSanitize=true, T:const(char)[]) (T data) nothrow @trusted {
 654   static if (is(T == typeof(null))) {
 655     return null;
 656   } else {
 657     if (data.length == 0) return data;
 658     if (data[$-1] == '>') {
 659       usize pos = data.length;
 660       while (pos > 0 && data.ptr[pos-1] != '<') --pos;
 661       data = data[pos..$-1].xstrip;
 662     } else {
 663       data = data.xstrip;
 664     }
 665     static if (doSanitize) {
 666       // hack for idiotic LJ (those morons are breaking all possible standards)
 667       auto sppos = data.indexOf(' ');
 668       if (sppos > 0) data = data[0..sppos];
 669     }
 670     return data.toLowerStr;
 671   }
 672 }
 673
 674
 675 // ////////////////////////////////////////////////////////////////////////// //
 676 // strip email from decoded "From" and "To" fields
 677 public T stripMail(T:const(char)[]) (T data) nothrow @trusted {
 678   static if (is(T == typeof(null))) {
 679     return null;
 680   } else {
 681     if (data.length == 0) return data;
 682     if (data[$-1] == '>') {
 683       usize pos = data.length;
 684       while (pos > 0 && data.ptr[pos-1] != '<') --pos;
 685       if (pos == 0) return data[0..0];
 686       return data[0..pos-1].xstrip;
 687     }
 688     return data[0..0];
 689   }
 690 }
 691
 692
 693 // ////////////////////////////////////////////////////////////////////////// //
 694 // extract name from decoded "From" and "To" fields
 695 // can construct name if there is none
 696 // special hack for idiotic LJ
 697 public T extractName(T:const(char)[]) (T data) nothrow @trusted {
 698   static if (is(T == typeof(null))) {
 699     return null;
 700   } else {
 701     if (data.length == 0) return data;
 702     auto origData = data;
 703     T mail = extractMail(data);
 704     data = stripMail(data).decodeSubj.xstrip;
 705     // hack for idiotic LJ (those morons are breaking all possible standards)
 706     if (mail.startsWith("lj_dontreply@lj.rossia.org")) {
 707       auto dd = extractMail!false(origData);
 708       auto spos = dd.indexOf(" (");
 709       if (spos >= 0) {
 710         dd = dd[spos+2..$-(dd[$-1] == ')' ? 1 : 0)].xstrip;
 711         if (dd == "LJR Comment") {
 712           dd = "anonymous";
 713         } else if (dd.endsWith(" - LJR Comment")) {
 714           auto dpos = dd.lastIndexOf('-');
 715           dd = dd[0..dpos].xstrip;
 716           if (dd.length == 0) dd = "anonymous";
 717         }
 718         dd = dd.mailNameUnquote;
 719         if (dd.length) return dd;
 720       }
 721     }
 722     data = data.mailNameUnquote;
 723     if (data.length) {
 724       if (mail.startsWith("lj-notify@livejournal.com")) {
 725         if (data == "LJ Comment") {
 726           data = "anonymous";
 727         } else if (data.endsWith(" - LJ Comment")) {
 728           auto dpos = data.lastIndexOf('-');
 729           data = data[0..dpos].xstrip;
 730           if (data.length == 0) data = "anonymous";
 731         }
 732       }
 733       return data;
 734     }
 735     // construct name from the mail
 736     auto npos = mail.indexOf('@');
 737     if (npos <= 0) return mail;
 738     data = mail[0..npos].xstrip;
 739     if (data.length == 0) return mail;
 740     char[] res;
 741     res.reserve(data.length);
 742     foreach (char ch; data) {
 743       if (ch <= 32 || ch == '.' || ch == '-' || ch == '_') ch = 32;
 744       if (ch == 32) {
 745         if (res.length && res[$-1] != 32) res ~= ch;
 746       } else {
 747         if (res.length == 0 || res[$-1] == 32) ch = ch.toupper; else ch = ch.tolower;
 748         res ~= ch;
 749       }
 750     }
 751     res = res.xstrip;
 752     if (res.length == 0) return mail;
 753     return cast(T)res; // it is safe to cast here
 754   }
 755 }
 756
 757
 758 // ////////////////////////////////////////////////////////////////////////// //
 759 // encode string if it contains some non-ascii
 760 // always returns new string, which is safe to `delete`
 761 // passed string must be in UTF-8
 762 // can return `null` for empty string
 763 public dynstring strEncodeQ (const(char)[] s) nothrow @trusted {
 764   static bool isSpecial (immutable char ch) pure nothrow @safe @nogc {
 765     return
 766       ch < ' ' ||
 767       ch >= 127 ||
 768       ch == '\'' ||
 769       ch == '`' ||
 770       ch == '"' ||
 771       ch == '\\' ||
 772       ch == '@';
 773   }
 774   dynstring res;
 775   if (s.length == 0) return res;
 776   static immutable string hexd = "0123456789abcdef";
 777   bool needWork = (s[0] == '=' || s[0] == '?');
 778   if (!needWork) foreach (char ch; s) if (isSpecial(ch)) { needWork = true; break; }
 779   if (!needWork) {
 780     res = s;
 781   } else {
 782     res.reserve(s.length*3+32);
 783     res ~= "=?UTF-8?Q?"; // quoted printable
 784     foreach (char ch; s) {
 785       if (ch <= ' ') ch = '_';
 786       if (!isSpecial(ch) && ch != '=' && ch != '?') {
 787         res ~= ch;
 788       } else {
 789         res ~= "=";
 790         res ~= hexd[(cast(ubyte)ch)>>4];
 791         res ~= hexd[(cast(ubyte)ch)&0x0f];
 792       }
 793     }
 794     res ~= "?=";
 795   }
 796   return res;
 797 }