sqbase_experiment/chiroptera/decode.d

   1 /* E-Mail Client
   2  * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
   3  * Understanding is not required. Only obedience.
   4  *
   5  * This program is free software: you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation, version 3 of the License ONLY.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16  */
  17 module chiroptera.decode is aliced;
  18
  19 //import iv.base64;
  20 import iv.encoding;
  21 import iv.strex;
  22 import iv.utfutil;
  23 import iv.utfutil : utf8CodeLen, utf8Valid;
  24
  25
  26 /*
  27 // ////////////////////////////////////////////////////////////////////////// //
  28 // `ch`: utf8 start
  29 // -1: invalid utf8
  30 public byte utf8CodeLen (char ch) pure nothrow @trusted @nogc {
  31   //pragma(inline, true);
  32   if (ch < 0x80) return 1;
  33   if ((ch&0b1111_1110) == 0b1111_1100) return 6;
  34   if ((ch&0b1111_1100) == 0b1111_1000) return 5;
  35   if ((ch&0b1111_1000) == 0b1111_0000) return 4;
  36   if ((ch&0b1111_0000) == 0b1110_0000) return 3;
  37   if ((ch&0b1110_0000) == 0b1100_0000) return 2;
  38   return -1; // invalid
  39 }
  40
  41
  42 // ////////////////////////////////////////////////////////////////////////// //
  43 public bool utf8Valid (const(void)[] buf) pure nothrow @trusted @nogc {
  44   const(ubyte)* bp = cast(const(ubyte)*)buf.ptr;
  45   auto left = buf.length;
  46   while (left--) {
  47     auto len = utf8CodeLen(*bp++)-1;
  48     if (len < 0 || len > left) return false;
  49     left -= len;
  50     while (len-- > 0) if (((*bp++)&0b1100_0000) != 0b1000_0000) return false;
  51   }
  52   return true;
  53 }
  54 */
  55
  56
  57 // ////////////////////////////////////////////////////////////////////////// //
  58 public bool isValidNickUniChar (immutable dchar ch) pure nothrow @safe @nogc {
  59   pragma(inline, true);
  60   return
  61     (ch >= '0' && ch <= '9') ||
  62     (ch >= 'A' && ch <= 'Z') ||
  63     (ch >= 'a' && ch <= 'z') ||
  64     ch == '-' || ch == '_' || ch == '.' ||
  65     isValidCyrillicUni(ch);
  66 }
  67
  68
  69 public bool isValidUTFNick (const(char)[] s) pure nothrow @safe @nogc {
  70   if (s.length == 0) return false;
  71   Utf8DecoderFast dc;
  72   foreach (immutable char ch; s) {
  73     dc.decode(cast(ubyte)ch);
  74     if (dc.invalid) return false;
  75     if (dc.complete && !isValidNickUniChar(dc.codepoint)) return false;
  76   }
  77   return true;
  78 }
  79
  80
  81 // ////////////////////////////////////////////////////////////////////////// //
  82 public bool isGoodCtlChar (immutable char ch) pure nothrow @safe @nogc {
  83   pragma(inline, true);
  84   return (ch == '\t' || ch == '\n');
  85 }
  86
  87
  88 // ////////////////////////////////////////////////////////////////////////// //
  89 public bool isGoodText (const(char)[] buf) pure nothrow @trusted @nogc {
  90   foreach (immutable char ch; buf) {
  91     if (ch == 127 || (ch < 32 && !isGoodCtlChar(ch))) return false;
  92   }
  93   return true;
  94 }
  95
  96
  97 // ////////////////////////////////////////////////////////////////////////// //
  98 private bool isGoodFileNameChar (immutable char ch) pure nothrow @safe @nogc {
  99   if (ch <= 32 || ch == 127) return false;
 100   if (ch >= 128) return true;
 101   if (ch == '/' || ch == '\\') return false;
 102   return true;
 103 }
 104
 105
 106 // ////////////////////////////////////////////////////////////////////////// //
 107 // this also sanitizes it
 108 public T toLowerStr (T:const(char)[]) (T s) nothrow @trusted {
 109   static if (is(T == typeof(null))) {
 110     return null;
 111   } else {
 112     bool needwork = false;
 113     foreach (immutable char ch; s) {
 114       if (ch == 127 || (ch < 32 && !isGoodCtlChar(ch)) || (ch >= 'A' && ch <= 'Z')) {
 115         needwork = true;
 116         break;
 117       }
 118     }
 119     if (!needwork) {
 120       return s;
 121     } else {
 122       char[] res;
 123       res.reserve(s.length);
 124       foreach (immutable idx, char ch; s) {
 125              if (ch == 13) { if (idx+1 >= s.length || s.ptr[idx+1] != 10) res ~= '\n'; }
 126         else if (ch < 32 && !isGoodCtlChar(ch)) res ~= ' ';
 127         else if (ch == 127) res ~= '~';
 128         else if (ch >= 'A' && ch <= 'Z') res ~= ch.tolower;
 129         else res ~= ch;
 130       }
 131       return cast(T)res; // it is safe to cast here
 132     }
 133   }
 134 }
 135
 136
 137 // ////////////////////////////////////////////////////////////////////////// //
 138 // this also sanitizes it
 139 public T sanitizeFileNameStr (T:const(char)[]) (T s) nothrow @trusted {
 140   static if (is(T == typeof(null))) {
 141     return null;
 142   } else {
 143     bool needwork = false;
 144     foreach (immutable char ch; s) if (!isGoodFileNameChar(ch)) { needwork = true; break; }
 145     if (!needwork) {
 146       return s;
 147     } else {
 148       char[] res = new char[s.length];
 149       res[] = s[];
 150       foreach (ref char ch; res) {
 151         if (!isGoodFileNameChar(ch)) ch = '_';
 152       }
 153       return cast(T)res; // it is safe to cast here
 154     }
 155   }
 156 }
 157
 158
 159 // ////////////////////////////////////////////////////////////////////////// //
 160 public T sanitizeStr (T:const(char)[]) (T s) nothrow @trusted {
 161   static if (is(T == typeof(null))) {
 162     return null;
 163   } else {
 164     if (isGoodText(s)) {
 165       return s;
 166     } else {
 167       char[] res;
 168       res.reserve(s.length);
 169       foreach (immutable idx, char ch; s) {
 170              if (ch == 13) { if (idx+1 >= s.length || s.ptr[idx+1] != 10) res ~= '\n'; }
 171         else if (ch < 32 && !isGoodCtlChar(ch)) res ~= ' ';
 172         else if (ch == 127) res ~= '~';
 173         else res ~= ch;
 174       }
 175       return cast(T)res; // it is safe to cast here
 176     }
 177   }
 178 }
 179
 180
 181 // ////////////////////////////////////////////////////////////////////////// //
 182 public T sanitizeStrLine (T:const(char)[]) (T s) nothrow @trusted {
 183   static if (is(T == typeof(null))) {
 184     return null;
 185   } else {
 186     bool found = false;
 187     foreach (immutable idx, char ch; s) {
 188       if (ch < 32 || ch == 127) { found = true; break; }
 189       if (ch == 32 && (idx == 0 || s.ptr[idx-1] <= 32)) { found = true; break; }
 190     }
 191     if (!found) {
 192       return s;
 193     } else {
 194       char[] res;
 195       res.reserve(s.length);
 196       foreach (char ch; s) {
 197         if (ch < 32 || ch == 127) ch = ' ';
 198         if (ch <= 32 && (res.length == 0 || res[$-1] <= 32)) continue;
 199         res ~= ch;
 200       }
 201       while (res.length && res[$-1] <= 32) res = res[0..$-1];
 202       return cast(T)res; // it is safe to cast here
 203     }
 204   }
 205 }
 206
 207
 208 // ////////////////////////////////////////////////////////////////////////// //
 209 // for decoded subject parts
 210 public T sanitizeStrSubjPart (T:const(char)[]) (T s) nothrow @trusted {
 211   static if (is(T == typeof(null))) {
 212     return null;
 213   } else {
 214     bool found = false;
 215     foreach (immutable idx, immutable char ch; s) {
 216       if (ch < 32 || ch == 127 || ch == '_') { found = true; break; }
 217     }
 218     if (!found) {
 219       return s;
 220     } else {
 221       char[] res = new char[s.length];
 222       res[] = s[];
 223       foreach (ref char ch; res) if (ch < 32 || ch == 127 || ch == '_') ch = ' ';
 224       return cast(T)res; // it is safe to cast here
 225     }
 226   }
 227 }
 228
 229
 230 // ////////////////////////////////////////////////////////////////////////// //
 231 // this also sanitizes it
 232 public T binaryToUtf8 (T:const(char)[]) (T s) nothrow @trusted {
 233   static if (is(T == typeof(null))) {
 234     return null;
 235   } else {
 236     bool found = false;
 237     foreach (immutable char ch; s) {
 238       if (ch >= 127 || (ch < 32 && !isGoodCtlChar(ch))) { found = true; break; }
 239     }
 240     if (!found) {
 241       return s;
 242     } else {
 243       import iv.utfutil : utf8Valid;
 244       if (utf8Valid(s)) return sanitizeStr(s);
 245       char[8] uc;
 246       char[] res;
 247       // calc length
 248       usize sz = s.length;
 249       foreach (immutable char ch; s) {
 250         if (ch >= 128) {
 251           immutable int len = utf8Encode(uc[], cast(dchar)ch);
 252           assert(len > 1);
 253           sz += cast(uint)len;
 254         }
 255       }
 256       res.reserve(sz);
 257       foreach (immutable idx, char ch; s) {
 258         if (ch < 128) {
 259                if (ch == 13) { if (idx+1 >= s.length || s.ptr[idx+1] != 10) res ~= '\n'; }
 260           else if (ch < 32 && !isGoodCtlChar(ch)) res ~= ' ';
 261           else if (ch == 127) res ~= '~';
 262           else res ~= ch;
 263         } else {
 264           immutable int len = utf8Encode(uc[], cast(dchar)ch);
 265           assert(len > 1);
 266           res ~= uc[0..len];
 267         }
 268       }
 269       return cast(T)res; // it is safe to cast here
 270     }
 271   }
 272 }
 273
 274
 275 // ////////////////////////////////////////////////////////////////////////// //
 276 // this also sanitizes it
 277 public T utf8ToUtf8 (T:const(char)[]) (T s) nothrow @trusted {
 278   static if (is(T == typeof(null))) {
 279     return null;
 280   } else {
 281     bool found = false;
 282     foreach (immutable char ch; s) {
 283       if (ch >= 127 || (ch < 32 && !isGoodCtlChar(ch))) { found = true; break; }
 284     }
 285     if (!found) {
 286       return s;
 287     } else {
 288       import iv.utfutil : utf8Valid;
 289       if (utf8Valid(s)) return sanitizeStr(s);
 290       char[8] uc;
 291       char[] res;
 292       res.reserve(s.length);
 293       int utfleft = 0;
 294       foreach (immutable idx, char ch; s) {
 295         if (utfleft) { --utfleft; res ~= ch; continue; }
 296         if (ch < 128) {
 297                if (ch == 13) { if (idx+1 >= s.length || s.ptr[idx+1] != 10) res ~= '\n'; }
 298           else if (ch < 32 && !isGoodCtlChar(ch)) res ~= ' ';
 299           else if (ch == 127) res ~= '~';
 300           else res ~= ch;
 301         } else {
 302           immutable byte ulen = utf8CodeLen(ch);
 303           if (ulen < 1) { res ~= '?'; continue; }
 304           if (s.length-idx < ulen) { res ~= '?'; break; }
 305           if (!utf8Valid(s[idx..idx+ulen])) { res ~= '?'; continue; }
 306           res ~= ch;
 307           utfleft = ulen-1;
 308         }
 309       }
 310       return cast(T)res; // it is safe to cast here
 311     }
 312   }
 313 }
 314
 315
 316 // ////////////////////////////////////////////////////////////////////////// //
 317 public T subjRemoveRe(T:const(char)[]) (T s) nothrow @trusted {
 318   static if (is(T == typeof(null))) {
 319     return null;
 320   } else {
 321     for (;;) {
 322       s = s.xstrip;
 323       if (s.length < 3) break;
 324       if (s.ptr[0] != 'r' && s.ptr[0] != 'R') break;
 325       if (s.ptr[1] != 'e' && s.ptr[1] != 'E') break;
 326       usize pp = 2;
 327       while (pp < s.length && s.ptr[pp] <= 32) ++pp;
 328       if (pp >= s.length || s.ptr[pp] != ':') break;
 329       s = s[pp+1..$];
 330     }
 331     return s;
 332   }
 333 }
 334
 335
 336 // ////////////////////////////////////////////////////////////////////////// //
 337 private static immutable string b64alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 338
 339 private static immutable ubyte[256] b64dc = () {
 340   ubyte[256] res = 0xff; // invalid
 341   foreach (immutable idx, immutable char ch; b64alphabet) {
 342     res[cast(ubyte)ch] = cast(ubyte)idx;
 343   }
 344   res['='] = 0xfe; // padding
 345   // ignore
 346   res[0..32] = 0xf0;
 347   res[127] = 0xf0; // just in case
 348   return res;
 349 }();
 350
 351 public char[] decodeBase64 (const(void)[] datavoid, out bool error) nothrow @trusted {
 352   const(ubyte)[] data = cast(const(ubyte)[])datavoid;
 353
 354   bool inPadding = false;
 355   ubyte[4] bts = void;
 356   uint btspos = 0;
 357
 358   char[] dcx;
 359   dcx.reserve((data.length+3U)/4U*3U+8U);
 360   error = false;
 361
 362   bool decodeChunk () nothrow @trusted {
 363     if (btspos == 0) return true;
 364     if (btspos == 1) return false; //throw new Base64Exception("incomplete data in base64 decoder");
 365     dcx ~= cast(char)((bts.ptr[0]<<2)|((bts.ptr[1]&0x30)>>4)); // 2 and more
 366     if (btspos > 2) dcx ~= cast(char)(((bts.ptr[1]&0x0f)<<4)|((bts.ptr[2]&0x3c)>>2)); // 3 and more
 367     if (btspos > 3) dcx ~= cast(char)(((bts.ptr[2]&0x03)<<6)|bts.ptr[3]);
 368     return true;
 369   }
 370
 371   while (data.length) {
 372     immutable ubyte cb = b64dc.ptr[data.ptr[0]];
 373     if (cb == 0xff) { error = true; delete dcx; return "<invalid base64 data>".dup; }
 374     data = data[1..$];
 375     if (cb == 0xf0) continue; // empty
 376     if (cb == 0xfe) {
 377       // padding
 378       if (!inPadding) {
 379         if (!decodeChunk()) { error = true; delete dcx; return "<invalid base64 data>".dup; }
 380         inPadding = true;
 381       }
 382       if (++btspos == 4) { inPadding = false; btspos = 0; }
 383     } else {
 384       // normal
 385       if (inPadding) {
 386         if (btspos != 0) { error = true; delete dcx; return "<invalid base64 data>".dup; }
 387         inPadding = false;
 388       }
 389       bts.ptr[btspos++] = cb;
 390       if (btspos == 4) {
 391         if (!decodeChunk()) { error = true; delete dcx; return "<invalid base64 data>".dup; }
 392         btspos = 0;
 393       }
 394     }
 395   }
 396   if (btspos != 0 && !inPadding) {
 397     // assume that it is not padded
 398     if (!decodeChunk()) { error = true; delete dcx; return "<invalid base64 data>".dup; }
 399   }
 400
 401   return dcx;
 402 }
 403
 404
 405 // ////////////////////////////////////////////////////////////////////////// //
 406 public char[] decodeQuotedPrintable(bool multiline) (const(void)[] datavoid) nothrow @trusted {
 407   const(char)[] data = cast(const(char)[])datavoid;
 408   char[] dcx;
 409   dcx.reserve(data.length);
 410   while (data.length) {
 411     if (data[0] == '=') {
 412       if (data.length == 1) break;
 413       if (data.length >= 3 && digitInBase(data.ptr[1], 16) >= 0 && digitInBase(data.ptr[2], 16) >= 0) {
 414         dcx ~= cast(char)(digitInBase(data.ptr[1], 16)*16+digitInBase(data.ptr[2], 16));
 415         data = data[3..$];
 416         continue;
 417       }
 418       // check if it is followed by blanks up to the newline
 419       // if it is so, then this is "line continuation" -- remove both '=' and blanks
 420       static if (multiline) {
 421         bool ateol = false;
 422         usize epos = 1;
 423         while (epos < data.length) {
 424           immutable char ch = data.ptr[epos++];
 425           if (ch == 9 || ch == 32) continue;
 426           if (ch == 10) { ateol = true; break; }
 427           if (ch == 13) {
 428             if (epos >= data.length || data.ptr[epos] != 10) { ateol = true; break; }
 429             continue;
 430           }
 431           --epos;
 432           break;
 433         }
 434         if (ateol || epos >= data.length) {
 435           data = data[epos..$];
 436           continue;
 437         }
 438       }
 439     }
 440     dcx ~= data.ptr[0];
 441     data = data[1..$];
 442   }
 443   return dcx;
 444 }
 445
 446
 447 // ////////////////////////////////////////////////////////////////////////// //
 448 public T ensureProper7Bit(T:const(char)[]) (T s) nothrow @trusted {
 449   static if (is(T == typeof(null))) {
 450     return null;
 451   } else {
 452     bool needwork = false;
 453     foreach (immutable char ch; s) if (ch >= 128) { needwork = true; break; }
 454     if (!needwork) return s;
 455     char[] dcx = new char[s.length];
 456     dcx[] = s[];
 457     foreach (ref char ch; dcx) ch &= 0x7f;
 458     return cast(T)dcx; // it is safe to cast here
 459   }
 460 }
 461
 462
 463 // ////////////////////////////////////////////////////////////////////////// //
 464 // decode things like "=?UTF-8?B?Tm9yZGzDtnc=?="
 465 public T decodeSubj(T:const(char)[]) (T s) nothrow @trusted {
 466   static if (is(T == typeof(null))) {
 467     return null;
 468   } else {
 469     if (s.indexOf("=?") < 0) return s.sanitizeStrLine.utf8ToUtf8;
 470
 471     // have to do some work
 472     auto origs = s;
 473     char[] res;
 474     res.reserve(s.length); // at least
 475
 476     while (s.length > 2) {
 477       auto stqpos = s.indexOf("=?");
 478       if (stqpos < 0) break;
 479       if (stqpos > 0) res ~= s[0..stqpos].utf8ToUtf8;
 480       s = s[stqpos+2..$];
 481
 482       auto eepos = s.indexOf('?');
 483       if (eepos < 0) break;
 484       auto enc = s[0..eepos];
 485
 486       //conwriteln("ENCODING: '", enc, "'");
 487       s = s[eepos+1..$];
 488       if (enc.length == 0) enc = "utf-8";
 489       if (s.length < 2 || s.ptr[1] != '?') return origs.sanitizeStrLine.utf8ToUtf8;
 490
 491       char ect = s.ptr[0];
 492       s = s[2..$];
 493       eepos = s.indexOf("?=");
 494       if (eepos < 0) return origs.sanitizeStrLine.utf8ToUtf8;
 495
 496       auto part = s[0..eepos];
 497       s = s[eepos+2..$];
 498
 499       // several encoded parts may be separated with spaces; those spaces should be ignored
 500       stqpos = 0;
 501       while (stqpos < s.length && s.ptr[stqpos] <= ' ') ++stqpos;
 502       if (s.length-stqpos >= 2 && s.ptr[stqpos] == '=' && s.ptr[stqpos+1] == '?') s = s[stqpos..$];
 503
 504       // decode part
 505       if (ect == 'Q' || ect == 'q') {
 506         // quoted printable
 507         part = cast(T)decodeQuotedPrintable!false(part); // it is safe to cast here
 508       } else if (ect == 'B' || ect == 'b') {
 509         // base64
 510         bool error = false;
 511         part = cast(T)decodeBase64(part, out error); // it is safe to cast here
 512         if (error) { delete part; return origs.sanitizeStrLine.utf8ToUtf8; }
 513       }
 514
 515       // reencode part if necessary
 516       if (!enc.strEquCI("utf-8") && !enc.strEquCI("utf8") && !enc.strEquCI("US-ASCII")) {
 517         try {
 518           part = recode(part, "utf-8", enc);
 519         } catch (Exception e) {
 520           return origs.sanitizeStrLine.utf8ToUtf8;
 521         }
 522       }
 523
 524       part = part.sanitizeStrSubjPart.utf8ToUtf8;
 525       if (part.length) res ~= part;
 526     }
 527
 528     if (s.length) res ~= s.utf8ToUtf8;
 529     return cast(T)res.sanitizeStrLine; // it should be valid utf8 here; also, it is safe to cast here
 530   }
 531 }
 532
 533
 534 // ////////////////////////////////////////////////////////////////////////// //
 535 // decode content with the given encoding type
 536 public T decodeContent(T:const(char)[]) (T data, const(char)[] encoding) nothrow @trusted {
 537   static if (is(T == typeof(null))) {
 538     return null;
 539   } else {
 540     if (data.length == 0 || encoding.length == 0 || encoding.strEquCI("8bit") || encoding.strEquCI("binary")) {
 541       return data;
 542     }
 543
 544     if (encoding.strEquCI("7bit")) {
 545       return cast(T)ensureProper7Bit(data); // it is safe to cast here
 546     }
 547
 548     if (encoding.strEquCI("base64")) {
 549       bool error;
 550       return cast(T)decodeBase64(data, out error); // it is safe to cast here
 551     }
 552
 553     if (encoding.strEquCI("quoted-printable")) {
 554       return cast(T)decodeQuotedPrintable!true(data); // it is safe to cast here
 555     }
 556
 557     if (encoding.length != 0) {
 558       char[] res = "<invalid encoding:".dup;
 559       res ~= encoding;
 560       res ~= ">";
 561       return cast(T)res; // it is safe to cast here
 562     }
 563
 564     return data;
 565   }
 566 }
 567
 568
 569 // ////////////////////////////////////////////////////////////////////////// //
 570 public T recodeToUtf8(T:const(char)[]) (T data, const(char)[] charset) nothrow @trusted {
 571   static if (is(T == typeof(null))) {
 572     return null;
 573   } else {
 574     if (data.length == 0) return data;
 575     bool found = false;
 576     foreach (immutable char ch; data) if (ch >= 128) { found = true; break; }
 577     if (!found) return sanitizeStr(data);
 578     if (charset.length == 0 || charset.strEquCI("utf-8") || charset.strEquCI("utf8") || charset.strEquCI("US-ASCII")) {
 579       return utf8ToUtf8(data);
 580     }
 581     try {
 582       data = recode(data, "utf-8", charset);
 583       if (data.length == 0) return data;
 584       return data.sanitizeStr;
 585     } catch (Exception e) {}
 586     char[] res = "<cannot decode '".dup;
 587     foreach (char ch; charset) {
 588       if (ch <= 32 || ch >= 127) continue;
 589       res ~= ch;
 590     }
 591     res ~= "'>";
 592     return cast(T)res; // it is safe to cast here
 593   }
 594 }
 595
 596
 597 // ////////////////////////////////////////////////////////////////////////// //
 598 private T mailNameUnquote (T:const(char)[]) (T buf) pure nothrow @trusted @nogc {
 599   static if (is(T == typeof(null))) {
 600     return null;
 601   } else {
 602     buf = buf.xstrip;
 603     if (buf.length >= 2) {
 604       if ((buf.ptr[0] == '"' && buf[$-1] == '"') ||
 605           (buf.ptr[0] == '<' && buf[$-1] == '>') ||
 606           (buf.ptr[0] == '`' && buf[$-1] == '\'') ||
 607           (buf.ptr[0] == '\'' && buf[$-1] == '\''))
 608       {
 609         buf = buf[1..$-1].xstrip;
 610       }
 611     }
 612     return buf;
 613   }
 614 }
 615
 616
 617 // ////////////////////////////////////////////////////////////////////////// //
 618 // extract email from decoded "From" and "To" fields
 619 public T extractMail(bool doSanitize=true, T:const(char)[]) (T data) nothrow @trusted {
 620   static if (is(T == typeof(null))) {
 621     return null;
 622   } else {
 623     if (data.length == 0) return data;
 624     if (data[$-1] == '>') {
 625       usize pos = data.length;
 626       while (pos > 0 && data.ptr[pos-1] != '<') --pos;
 627       data = data[pos..$-1].xstrip;
 628     } else {
 629       data = data.xstrip;
 630     }
 631     static if (doSanitize) {
 632       // hack for idiotic LJ (those morons are breaking all possible standards)
 633       auto sppos = data.indexOf(' ');
 634       if (sppos > 0) data = data[0..sppos];
 635     }
 636     return data.toLowerStr;
 637   }
 638 }
 639
 640
 641 // ////////////////////////////////////////////////////////////////////////// //
 642 // strip email from decoded "From" and "To" fields
 643 public T stripMail(T:const(char)[]) (T data) nothrow @trusted {
 644   static if (is(T == typeof(null))) {
 645     return null;
 646   } else {
 647     if (data.length == 0) return data;
 648     if (data[$-1] == '>') {
 649       usize pos = data.length;
 650       while (pos > 0 && data.ptr[pos-1] != '<') --pos;
 651       if (pos == 0) return data[0..0];
 652       return data[0..pos-1].xstrip;
 653     }
 654     return data[0..0];
 655   }
 656 }
 657
 658
 659 // ////////////////////////////////////////////////////////////////////////// //
 660 // extract name from decoded "From" and "To" fields
 661 // can construct name if there is none
 662 // special hack for idiotic LJ
 663 public T extractName(T:const(char)[]) (T data) nothrow @trusted {
 664   static if (is(T == typeof(null))) {
 665     return null;
 666   } else {
 667     if (data.length == 0) return data;
 668     auto origData = data;
 669     T mail = extractMail(data);
 670     data = stripMail(data);
 671     // hack for idiotic LJ (those morons are breaking all possible standards)
 672     if (mail.startsWith("lj_dontreply@lj.rossia.org")) {
 673       auto dd = extractMail!false(origData);
 674       auto spos = dd.indexOf(" (");
 675       if (spos >= 0) {
 676         dd = dd[spos+2..$-(dd[$-1] == ')' ? 1 : 0)].xstrip;
 677         if (dd == "LJR Comment") {
 678           dd = "anonymous";
 679         } else if (dd.endsWith(" - LJR Comment")) {
 680           auto dpos = dd.lastIndexOf('-');
 681           dd = dd[0..dpos].xstrip;
 682           if (dd.length == 0) dd = "anonymous";
 683         }
 684         dd = dd.mailNameUnquote;
 685         if (dd.length) return dd;
 686       }
 687     }
 688     data = data.mailNameUnquote;
 689     if (data.length) {
 690       if (mail.startsWith("lj-notify@livejournal.com")) {
 691         if (data == "LJ Comment") {
 692           data = "anonymous";
 693         } else if (data.endsWith(" - LJ Comment")) {
 694           auto dpos = data.lastIndexOf('-');
 695           data = data[0..dpos].xstrip;
 696           if (data.length == 0) data = "anonymous";
 697         }
 698       }
 699       return data;
 700     }
 701     // construct name from the mail
 702     auto npos = mail.indexOf('@');
 703     if (npos <= 0) return mail;
 704     data = mail[0..npos].xstrip;
 705     if (data.length == 0) return mail;
 706     char[] res;
 707     res.reserve(data.length);
 708     foreach (char ch; data) {
 709       if (ch <= 32 || ch == '.' || ch == '-' || ch == '_') ch = 32;
 710       if (ch == 32) {
 711         if (res.length && res[$-1] != 32) res ~= ch;
 712       } else {
 713         if (res.length == 0 || res[$-1] == 32) ch = ch.toupper; else ch = ch.tolower;
 714         res ~= ch;
 715       }
 716     }
 717     res = res.xstrip;
 718     if (res.length == 0) return mail;
 719     return cast(T)res; // it is safe to cast here
 720   }
 721 }
 722
 723
 724 // ////////////////////////////////////////////////////////////////////////// //
 725 // encode string if it contains some non-ascii
 726 // always returns new string, which is safe to `delete`
 727 // passed string must be in UTF-8
 728 // can return `null` for empty string
 729 public char[] strEncodeQ (const(char)[] s) nothrow @trusted {
 730   static bool isSpecial (immutable char ch) pure nothrow @safe @nogc {
 731     return
 732       ch < ' ' ||
 733       ch >= 127 ||
 734       ch == '\'' ||
 735       ch == '`' ||
 736       ch == '"' ||
 737       ch == '\\' ||
 738       ch == '@';
 739   }
 740   if (s.length == 0) return null;
 741   static immutable string hexd = "0123456789abcdef";
 742   bool needWork = (s[0] == '=' || s[0] == '?');
 743   if (!needWork) foreach (char ch; s) if (isSpecial(ch)) { needWork = true; break; }
 744   char[] res;
 745   if (!needWork) {
 746     res = new char[s.length];
 747     res[] = s[];
 748   } else {
 749     res.reserve(s.length*3+32);
 750     res ~= "=?UTF-8?Q?"; // quoted printable
 751     foreach (char ch; s) {
 752       if (ch <= ' ') ch = '_';
 753       if (!isSpecial(ch) && ch != '=' && ch != '?') {
 754         res ~= ch;
 755       } else {
 756         res ~= "=";
 757         res ~= hexd[(cast(ubyte)ch)>>4];
 758         res ~= hexd[(cast(ubyte)ch)&0x0f];
 759       }
 760     }
 761     res ~= "?=";
 762   }
 763   return res;
 764 }