chibackend/decode.d

   1 /* E-Mail Client
   2  * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
   3  * Understanding is not required. Only obedience.
   4  *
   5  * This program is free software: you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation, version 3 of the License ONLY.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16  */
  17 module chibackend.decode is aliced;
  18
  19 import iv.cmdcon;
  20 import iv.dynstring;
  21 import iv.encoding;
  22 import iv.strex;
  23 import iv.utfutil;
  24 import iv.utfutil : utf8CodeLen, utf8Valid;
  25
  26
  27 /*
  28 // ////////////////////////////////////////////////////////////////////////// //
  29 // `ch`: utf8 start
  30 // -1: invalid utf8
  31 public byte utf8CodeLen (char ch) pure nothrow @trusted @nogc {
  32   //pragma(inline, true);
  33   if (ch < 0x80) return 1;
  34   if ((ch&0b1111_1110) == 0b1111_1100) return 6;
  35   if ((ch&0b1111_1100) == 0b1111_1000) return 5;
  36   if ((ch&0b1111_1000) == 0b1111_0000) return 4;
  37   if ((ch&0b1111_0000) == 0b1110_0000) return 3;
  38   if ((ch&0b1110_0000) == 0b1100_0000) return 2;
  39   return -1; // invalid
  40 }
  41
  42
  43 // ////////////////////////////////////////////////////////////////////////// //
  44 public bool utf8Valid (const(void)[] buf) pure nothrow @trusted @nogc {
  45   const(ubyte)* bp = cast(const(ubyte)*)buf.ptr;
  46   auto left = buf.length;
  47   while (left--) {
  48     auto len = utf8CodeLen(*bp++)-1;
  49     if (len < 0 || len > left) return false;
  50     left -= len;
  51     while (len-- > 0) if (((*bp++)&0b1100_0000) != 0b1000_0000) return false;
  52   }
  53   return true;
  54 }
  55 */
  56
  57
  58 // ////////////////////////////////////////////////////////////////////////// //
  59 public bool isValidNickUniChar (immutable dchar ch) pure nothrow @safe @nogc {
  60   pragma(inline, true);
  61   return
  62     (ch >= '0' && ch <= '9') ||
  63     (ch >= 'A' && ch <= 'Z') ||
  64     (ch >= 'a' && ch <= 'z') ||
  65     ch == '-' || ch == '_' || ch == '.' ||
  66     isValidCyrillicUni(ch);
  67 }
  68
  69
  70 public bool isValidUTFNick (const(char)[] s) pure nothrow @safe @nogc {
  71   if (s.length == 0) return false;
  72   Utf8DecoderFast dc;
  73   foreach (immutable char ch; s) {
  74     dc.decode(cast(ubyte)ch);
  75     if (dc.invalid) return false;
  76     if (dc.complete && !isValidNickUniChar(dc.codepoint)) return false;
  77   }
  78   return true;
  79 }
  80
  81
  82 // ////////////////////////////////////////////////////////////////////////// //
  83 public bool isGoodCtlChar (immutable char ch) pure nothrow @safe @nogc {
  84   pragma(inline, true);
  85   return (ch == '\t' || ch == '\n');
  86 }
  87
  88
  89 // ////////////////////////////////////////////////////////////////////////// //
  90 public bool isGoodText (const(char)[] buf) pure nothrow @trusted @nogc {
  91   foreach (immutable char ch; buf) {
  92     if (ch == 127 || (ch < 32 && !isGoodCtlChar(ch))) return false;
  93   }
  94   return true;
  95 }
  96
  97
  98 // ////////////////////////////////////////////////////////////////////////// //
  99 private bool isGoodFileNameChar (immutable char ch) pure nothrow @safe @nogc {
 100   if (ch <= 32 || ch == 127) return false;
 101   if (ch >= 128) return true;
 102   if (ch == '/' || ch == '\\') return false;
 103   return true;
 104 }
 105
 106
 107 // ////////////////////////////////////////////////////////////////////////// //
 108 // this also sanitizes it
 109 public T toLowerStr (T:const(char)[]) (T s) nothrow @trusted {
 110   static if (is(T == typeof(null))) {
 111     return null;
 112   } else {
 113     bool needwork = false;
 114     foreach (immutable char ch; s) {
 115       if (ch == 127 || (ch < 32 && !isGoodCtlChar(ch)) || (ch >= 'A' && ch <= 'Z')) {
 116         needwork = true;
 117         break;
 118       }
 119     }
 120     if (!needwork) {
 121       return s;
 122     } else {
 123       char[] res;
 124       res.reserve(s.length);
 125       foreach (immutable idx, char ch; s) {
 126              if (ch == 13) { if (idx+1 >= s.length || s.ptr[idx+1] != 10) res ~= '\n'; }
 127         else if (ch < 32 && !isGoodCtlChar(ch)) res ~= ' ';
 128         else if (ch == 127) res ~= '~';
 129         else if (ch >= 'A' && ch <= 'Z') res ~= ch.tolower;
 130         else res ~= ch;
 131       }
 132       return cast(T)res; // it is safe to cast here
 133     }
 134   }
 135 }
 136
 137
 138 // ////////////////////////////////////////////////////////////////////////// //
 139 // this also sanitizes it
 140 public T sanitizeFileNameStr (T:const(char)[]) (T s) nothrow @trusted {
 141   static if (is(T == typeof(null))) {
 142     return null;
 143   } else {
 144     bool needwork = false;
 145     foreach (immutable char ch; s) if (!isGoodFileNameChar(ch)) { needwork = true; break; }
 146     if (!needwork) {
 147       return s;
 148     } else {
 149       char[] res = new char[s.length];
 150       res[] = s[];
 151       foreach (ref char ch; res) {
 152         if (!isGoodFileNameChar(ch)) ch = '_';
 153       }
 154       return cast(T)res; // it is safe to cast here
 155     }
 156   }
 157 }
 158
 159
 160 // ////////////////////////////////////////////////////////////////////////// //
 161 public T sanitizeStr (T:const(char)[]) (T s) nothrow @trusted {
 162   static if (is(T == typeof(null))) {
 163     return null;
 164   } else {
 165     if (isGoodText(s)) {
 166       return s;
 167     } else {
 168       char[] res;
 169       res.reserve(s.length);
 170       foreach (immutable idx, char ch; s) {
 171              if (ch == 13) { if (idx+1 >= s.length || s.ptr[idx+1] != 10) res ~= '\n'; }
 172         else if (ch < 32 && !isGoodCtlChar(ch)) res ~= ' ';
 173         else if (ch == 127) res ~= '~';
 174         else res ~= ch;
 175       }
 176       return cast(T)res; // it is safe to cast here
 177     }
 178   }
 179 }
 180
 181
 182 // ////////////////////////////////////////////////////////////////////////// //
 183 public T sanitizeStrLine (T:const(char)[]) (T s) nothrow @trusted {
 184   static if (is(T == typeof(null))) {
 185     return null;
 186   } else {
 187     bool found = false;
 188     foreach (immutable idx, char ch; s) {
 189       if (ch < 32 || ch == 127) { found = true; break; }
 190       if (ch == 32 && (idx == 0 || s.ptr[idx-1] <= 32)) { found = true; break; }
 191     }
 192     if (!found) {
 193       return s;
 194     } else {
 195       char[] res;
 196       res.reserve(s.length);
 197       foreach (char ch; s) {
 198         if (ch < 32 || ch == 127) ch = ' ';
 199         if (ch <= 32 && (res.length == 0 || res[$-1] <= 32)) continue;
 200         res ~= ch;
 201       }
 202       while (res.length && res[$-1] <= 32) res = res[0..$-1];
 203       return cast(T)res; // it is safe to cast here
 204     }
 205   }
 206 }
 207
 208
 209 // ////////////////////////////////////////////////////////////////////////// //
 210 // for decoded subject parts
 211 public T sanitizeStrSubjPart (T:const(char)[]) (T s) nothrow @trusted {
 212   static if (is(T == typeof(null))) {
 213     return null;
 214   } else {
 215     bool found = false;
 216     foreach (immutable idx, immutable char ch; s) {
 217       if (ch < 32 || ch == 127 || ch == '_') { found = true; break; }
 218     }
 219     if (!found) {
 220       return s;
 221     } else {
 222       char[] res = new char[s.length];
 223       res[] = s[];
 224       foreach (ref char ch; res) if (ch < 32 || ch == 127 || ch == '_') ch = ' ';
 225       return cast(T)res; // it is safe to cast here
 226     }
 227   }
 228 }
 229
 230
 231 // ////////////////////////////////////////////////////////////////////////// //
 232 // this also sanitizes it
 233 public T binaryToUtf8 (T:const(char)[]) (T s) nothrow @trusted {
 234   static if (is(T == typeof(null))) {
 235     return null;
 236   } else {
 237     bool found = false;
 238     foreach (immutable char ch; s) {
 239       if (ch >= 127 || (ch < 32 && !isGoodCtlChar(ch))) { found = true; break; }
 240     }
 241     if (!found) {
 242       return s;
 243     } else {
 244       import iv.utfutil : utf8Valid;
 245       if (utf8Valid(s)) return sanitizeStr(s);
 246       char[8] uc;
 247       char[] res;
 248       // calc length
 249       usize sz = s.length;
 250       foreach (immutable char ch; s) {
 251         if (ch >= 128) {
 252           immutable int len = utf8Encode(uc[], cast(dchar)ch);
 253           assert(len > 1);
 254           sz += cast(uint)len;
 255         }
 256       }
 257       res.reserve(sz);
 258       foreach (immutable idx, char ch; s) {
 259         if (ch < 128) {
 260                if (ch == 13) { if (idx+1 >= s.length || s.ptr[idx+1] != 10) res ~= '\n'; }
 261           else if (ch < 32 && !isGoodCtlChar(ch)) res ~= ' ';
 262           else if (ch == 127) res ~= '~';
 263           else res ~= ch;
 264         } else {
 265           immutable int len = utf8Encode(uc[], cast(dchar)ch);
 266           assert(len > 1);
 267           res ~= uc[0..len];
 268         }
 269       }
 270       return cast(T)res; // it is safe to cast here
 271     }
 272   }
 273 }
 274
 275
 276 // ////////////////////////////////////////////////////////////////////////// //
 277 // this also sanitizes it
 278 public T utf8ToUtf8 (T:const(char)[]) (T s) nothrow @trusted {
 279   static if (is(T == typeof(null))) {
 280     return null;
 281   } else {
 282     bool found = false;
 283     foreach (immutable char ch; s) {
 284       if (ch >= 127 || (ch < 32 && !isGoodCtlChar(ch))) { found = true; break; }
 285     }
 286     if (!found) {
 287       return s;
 288     } else {
 289       import iv.utfutil : utf8Valid;
 290       if (utf8Valid(s)) return sanitizeStr(s);
 291       char[8] uc;
 292       char[] res;
 293       res.reserve(s.length);
 294       int utfleft = 0;
 295       foreach (immutable idx, char ch; s) {
 296         if (utfleft) { --utfleft; res ~= ch; continue; }
 297         if (ch < 128) {
 298                if (ch == 13) { if (idx+1 >= s.length || s.ptr[idx+1] != 10) res ~= '\n'; }
 299           else if (ch < 32 && !isGoodCtlChar(ch)) res ~= ' ';
 300           else if (ch == 127) res ~= '~';
 301           else res ~= ch;
 302         } else {
 303           immutable byte ulen = utf8CodeLen(ch);
 304           if (ulen < 1) { res ~= '?'; continue; }
 305           if (s.length-idx < ulen) { res ~= '?'; break; }
 306           if (!utf8Valid(s[idx..idx+ulen])) { res ~= '?'; continue; }
 307           res ~= ch;
 308           utfleft = ulen-1;
 309         }
 310       }
 311       return cast(T)res; // it is safe to cast here
 312     }
 313   }
 314 }
 315
 316
 317 // ////////////////////////////////////////////////////////////////////////// //
 318 public T subjRemoveRe(T:const(char)[]) (T s) nothrow @trusted {
 319   static if (is(T == typeof(null))) {
 320     return null;
 321   } else {
 322     for (;;) {
 323       s = s.xstrip;
 324       if (s.length < 3) break;
 325       if (s.ptr[0] != 'r' && s.ptr[0] != 'R') break;
 326       if (s.ptr[1] != 'e' && s.ptr[1] != 'E') break;
 327       usize pp = 2;
 328       while (pp < s.length && s.ptr[pp] <= 32) ++pp;
 329       if (pp >= s.length || s.ptr[pp] != ':') break;
 330       s = s[pp+1..$];
 331     }
 332     return s;
 333   }
 334 }
 335
 336
 337 // ////////////////////////////////////////////////////////////////////////// //
 338 private static immutable string b64alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 339
 340 private static immutable ubyte[256] b64dc = () {
 341   ubyte[256] res = 0xff; // invalid
 342   foreach (immutable idx, immutable char ch; b64alphabet) {
 343     res[cast(ubyte)ch] = cast(ubyte)idx;
 344   }
 345   res['='] = 0xfe; // padding
 346   // ignore
 347   res[0..32] = 0xf0;
 348   res[127] = 0xf0; // just in case
 349   return res;
 350 }();
 351
 352 public char[] decodeBase64(bool ignoreUnderscore=false) (const(void)[] datavoid, out bool error) nothrow @trusted {
 353   const(ubyte)[] data = cast(const(ubyte)[])datavoid;
 354
 355   bool inPadding = false;
 356   ubyte[4] bts = void;
 357   uint btspos = 0;
 358
 359   char[] dcx;
 360   dcx.reserve((data.length+3U)/4U*3U+8U);
 361   error = false;
 362
 363   bool decodeChunk () nothrow @trusted {
 364     if (btspos == 0) return true;
 365     if (btspos == 1) return false; //throw new Base64Exception("incomplete data in base64 decoder");
 366     dcx ~= cast(char)((bts.ptr[0]<<2)|((bts.ptr[1]&0x30)>>4)); // 2 and more
 367     if (btspos > 2) dcx ~= cast(char)(((bts.ptr[1]&0x0f)<<4)|((bts.ptr[2]&0x3c)>>2)); // 3 and more
 368     if (btspos > 3) dcx ~= cast(char)(((bts.ptr[2]&0x03)<<6)|bts.ptr[3]);
 369     return true;
 370   }
 371
 372   while (data.length) {
 373     immutable ubyte cb = b64dc.ptr[data.ptr[0]];
 374     if (cb == 0xff) { error = true; delete dcx; return "<invalid base64 data>".dup; }
 375     data = data[1..$];
 376     if (cb == 0xf0) continue; // empty
 377     static if (ignoreUnderscore) {
 378       if (cb == '_') continue;
 379     }
 380     if (cb == 0xfe) {
 381       // padding
 382       if (!inPadding) {
 383         if (!decodeChunk()) { error = true; delete dcx; return "<invalid base64 data>".dup; }
 384         inPadding = true;
 385       }
 386       if (++btspos == 4) { inPadding = false; btspos = 0; }
 387     } else {
 388       // normal
 389       if (inPadding) {
 390         if (btspos != 0) { error = true; delete dcx; return "<invalid base64 data>".dup; }
 391         inPadding = false;
 392       }
 393       bts.ptr[btspos++] = cb;
 394       if (btspos == 4) {
 395         if (!decodeChunk()) { error = true; delete dcx; return "<invalid base64 data>".dup; }
 396         btspos = 0;
 397       }
 398     }
 399   }
 400   if (btspos != 0 && !inPadding) {
 401     // assume that it is not padded
 402     if (!decodeChunk()) { error = true; delete dcx; return "<invalid base64 data>".dup; }
 403   }
 404
 405   return dcx;
 406 }
 407
 408
 409 // ////////////////////////////////////////////////////////////////////////// //
 410 public char[] decodeQuotedPrintable(bool multiline) (const(void)[] datavoid) nothrow @trusted {
 411   const(char)[] data = cast(const(char)[])datavoid;
 412   char[] dcx;
 413   dcx.reserve(data.length);
 414   while (data.length) {
 415     if (data[0] == '=') {
 416       if (data.length == 1) break;
 417       if (data.length >= 3 && digitInBase(data.ptr[1], 16) >= 0 && digitInBase(data.ptr[2], 16) >= 0) {
 418         dcx ~= cast(char)(digitInBase(data.ptr[1], 16)*16+digitInBase(data.ptr[2], 16));
 419         data = data[3..$];
 420         continue;
 421       }
 422       // check if it is followed by blanks up to the newline
 423       // if it is so, then this is "line continuation" -- remove both '=' and blanks
 424       static if (multiline) {
 425         bool ateol = false;
 426         usize epos = 1;
 427         while (epos < data.length) {
 428           immutable char ch = data.ptr[epos++];
 429           if (ch == 9 || ch == 32) continue;
 430           if (ch == 10) { ateol = true; break; }
 431           if (ch == 13) {
 432             if (epos >= data.length || data.ptr[epos] != 10) { ateol = true; break; }
 433             continue;
 434           }
 435           --epos;
 436           break;
 437         }
 438         if (ateol || epos >= data.length) {
 439           data = data[epos..$];
 440           continue;
 441         }
 442       }
 443     }
 444     dcx ~= data.ptr[0];
 445     data = data[1..$];
 446   }
 447   return dcx;
 448 }
 449
 450
 451 // ////////////////////////////////////////////////////////////////////////// //
 452 public T ensureProper7Bit(T:const(char)[]) (T s) nothrow @trusted {
 453   static if (is(T == typeof(null))) {
 454     return null;
 455   } else {
 456     bool needwork = false;
 457     foreach (immutable char ch; s) if (ch >= 128) { needwork = true; break; }
 458     if (!needwork) return s;
 459     char[] dcx = new char[s.length];
 460     dcx[] = s[];
 461     foreach (ref char ch; dcx) ch &= 0x7f;
 462     return cast(T)dcx; // it is safe to cast here
 463   }
 464 }
 465
 466
 467 // ////////////////////////////////////////////////////////////////////////// //
 468 // decode things like "=?UTF-8?B?Tm9yZGzDtnc=?="
 469 public T decodeSubj(T:const(char)[]) (T s) nothrow @trusted {
 470   static if (is(T == typeof(null))) {
 471     return null;
 472   } else {
 473     if (s.indexOf("=?") < 0) return s.sanitizeStrLine.utf8ToUtf8;
 474
 475     // have to do some work
 476     auto origs = s;
 477     char[] res;
 478     res.reserve(s.length); // at least
 479
 480     while (s.length > 2) {
 481       auto stqpos = s.indexOf("=?");
 482       if (stqpos < 0) break;
 483       if (stqpos > 0) res ~= s[0..stqpos].utf8ToUtf8;
 484       s = s[stqpos+2..$];
 485
 486       auto eepos = s.indexOf('?');
 487       if (eepos < 0) break;
 488       auto enc = s[0..eepos];
 489
 490       //conwriteln("ENCODING: '", enc, "'");
 491       s = s[eepos+1..$];
 492       if (enc.length == 0) enc = "utf-8";
 493       if (s.length < 2 || s.ptr[1] != '?') return origs.sanitizeStrLine.utf8ToUtf8;
 494
 495       char ect = s.ptr[0];
 496       s = s[2..$];
 497       eepos = s.indexOf("?=");
 498       if (eepos < 0) return origs.sanitizeStrLine.utf8ToUtf8;
 499
 500       auto part = s[0..eepos];
 501       s = s[eepos+2..$];
 502
 503       // several encoded parts may be separated with spaces; those spaces should be ignored
 504       stqpos = 0;
 505       while (stqpos < s.length && s.ptr[stqpos] <= ' ') ++stqpos;
 506       if (s.length-stqpos >= 2 && s.ptr[stqpos] == '=' && s.ptr[stqpos+1] == '?') s = s[stqpos..$];
 507
 508       // decode part
 509       if (ect == 'Q' || ect == 'q') {
 510         // quoted printable
 511         part = cast(T)decodeQuotedPrintable!false(part); // it is safe to cast here
 512       } else if (ect == 'B' || ect == 'b') {
 513         // base64
 514         //auto xpart = part;
 515         bool error = false;
 516         part = cast(T)decodeBase64!true(part, out error); // it is safe to cast here
 517         if (error) {
 518           //conwriteln("CANNOT DECODE B64: ", xpart);
 519           delete part;
 520           return origs.sanitizeStrLine.utf8ToUtf8;
 521         }
 522       }
 523
 524       // reencode part if necessary
 525       if (!enc.strEquCI("utf-8") && !enc.strEquCI("utf8") && !enc.strEquCI("US-ASCII")) {
 526         try {
 527           //conwriteln("RECODING: ", enc);
 528           part = recode(part, "utf-8", enc);
 529         } catch (Exception e) {
 530           //conwriteln("RECODE ERROR: ", e.msg);
 531           return origs.sanitizeStrLine.utf8ToUtf8;
 532         }
 533       }
 534
 535       part = part.sanitizeStrSubjPart.utf8ToUtf8;
 536       if (part.length) res ~= part;
 537     }
 538
 539     if (s.length) res ~= s.utf8ToUtf8;
 540     return cast(T)res.sanitizeStrLine; // it should be valid utf8 here; also, it is safe to cast here
 541   }
 542 }
 543
 544
 545 // ////////////////////////////////////////////////////////////////////////// //
 546 // decode content with the given encoding type
 547 public T decodeContent(T:const(char)[]) (T data, const(char)[] encoding) nothrow @trusted {
 548   static if (is(T == typeof(null))) {
 549     return null;
 550   } else {
 551     if (data.length == 0 || encoding.length == 0 || encoding.strEquCI("8bit") || encoding.strEquCI("binary")) {
 552       return data;
 553     }
 554
 555     if (encoding.strEquCI("7bit")) {
 556       return cast(T)ensureProper7Bit(data); // it is safe to cast here
 557     }
 558
 559     if (encoding.strEquCI("base64")) {
 560       bool error;
 561       return cast(T)decodeBase64(data, out error); // it is safe to cast here
 562     }
 563
 564     if (encoding.strEquCI("quoted-printable")) {
 565       return cast(T)decodeQuotedPrintable!true(data); // it is safe to cast here
 566     }
 567
 568     if (encoding.length != 0) {
 569       char[] res = "<invalid encoding:".dup;
 570       res ~= encoding;
 571       res ~= ">";
 572       return cast(T)res; // it is safe to cast here
 573     }
 574
 575     return data;
 576   }
 577 }
 578
 579
 580 // ////////////////////////////////////////////////////////////////////////// //
 581 public T recodeToUtf8(T:const(char)[]) (T data, const(char)[] charset) nothrow @trusted {
 582   static if (is(T == typeof(null))) {
 583     return null;
 584   } else {
 585     if (data.length == 0) return data;
 586     bool found = false;
 587     foreach (immutable char ch; data) if (ch >= 128) { found = true; break; }
 588     if (!found) return sanitizeStr(data);
 589     if (charset.length == 0 || charset.strEquCI("utf-8") || charset.strEquCI("utf8") || charset.strEquCI("US-ASCII")) {
 590       return utf8ToUtf8(data);
 591     }
 592     try {
 593       data = recode(data, "utf-8", charset);
 594       if (data.length == 0) return data;
 595       return data.sanitizeStr;
 596     } catch (Exception e) {}
 597     char[] res = "<cannot decode '".dup;
 598     foreach (char ch; charset) {
 599       if (ch <= 32 || ch >= 127) continue;
 600       res ~= ch;
 601     }
 602     res ~= "'>";
 603     return cast(T)res; // it is safe to cast here
 604   }
 605 }
 606
 607
 608 // ////////////////////////////////////////////////////////////////////////// //
 609 private T mailNameUnquote (T:const(char)[]) (T buf) pure nothrow @trusted @nogc {
 610   static if (is(T == typeof(null))) {
 611     return null;
 612   } else {
 613     buf = buf.xstrip;
 614     if (buf.length >= 2) {
 615       if ((buf.ptr[0] == '"' && buf[$-1] == '"') ||
 616           (buf.ptr[0] == '<' && buf[$-1] == '>') ||
 617           (buf.ptr[0] == '`' && buf[$-1] == '\'') ||
 618           (buf.ptr[0] == '\'' && buf[$-1] == '\''))
 619       {
 620         buf = buf[1..$-1].xstrip;
 621       }
 622     }
 623     return buf;
 624   }
 625 }
 626
 627
 628 // ////////////////////////////////////////////////////////////////////////// //
 629 // extract email from decoded "From" and "To" fields
 630 public T extractMail(bool doSanitize=true, T:const(char)[]) (T data) nothrow @trusted {
 631   static if (is(T == typeof(null))) {
 632     return null;
 633   } else {
 634     if (data.length == 0) return data;
 635     if (data[$-1] == '>') {
 636       usize pos = data.length;
 637       while (pos > 0 && data.ptr[pos-1] != '<') --pos;
 638       data = data[pos..$-1].xstrip;
 639     } else {
 640       data = data.xstrip;
 641     }
 642     static if (doSanitize) {
 643       // hack for idiotic LJ (those morons are breaking all possible standards)
 644       auto sppos = data.indexOf(' ');
 645       if (sppos > 0) data = data[0..sppos];
 646     }
 647     return data.toLowerStr;
 648   }
 649 }
 650
 651
 652 // ////////////////////////////////////////////////////////////////////////// //
 653 // strip email from decoded "From" and "To" fields
 654 public T stripMail(T:const(char)[]) (T data) nothrow @trusted {
 655   static if (is(T == typeof(null))) {
 656     return null;
 657   } else {
 658     if (data.length == 0) return data;
 659     if (data[$-1] == '>') {
 660       usize pos = data.length;
 661       while (pos > 0 && data.ptr[pos-1] != '<') --pos;
 662       if (pos == 0) return data[0..0];
 663       return data[0..pos-1].xstrip;
 664     }
 665     return data[0..0];
 666   }
 667 }
 668
 669
 670 // ////////////////////////////////////////////////////////////////////////// //
 671 // extract name from decoded "From" and "To" fields
 672 // can construct name if there is none
 673 // special hack for idiotic LJ
 674 public T extractName(T:const(char)[]) (T data) nothrow @trusted {
 675   static if (is(T == typeof(null))) {
 676     return null;
 677   } else {
 678     if (data.length == 0) return data;
 679     auto origData = data;
 680     T mail = extractMail(data);
 681     data = stripMail(data).decodeSubj.xstrip;
 682     // hack for idiotic LJ (those morons are breaking all possible standards)
 683     if (mail.startsWith("lj_dontreply@lj.rossia.org")) {
 684       auto dd = extractMail!false(origData);
 685       auto spos = dd.indexOf(" (");
 686       if (spos >= 0) {
 687         dd = dd[spos+2..$-(dd[$-1] == ')' ? 1 : 0)].xstrip;
 688         if (dd == "LJR Comment") {
 689           dd = "anonymous";
 690         } else if (dd.endsWith(" - LJR Comment")) {
 691           auto dpos = dd.lastIndexOf('-');
 692           dd = dd[0..dpos].xstrip;
 693           if (dd.length == 0) dd = "anonymous";
 694         }
 695         dd = dd.mailNameUnquote;
 696         if (dd.length) return dd;
 697       }
 698     }
 699     data = data.mailNameUnquote;
 700     if (data.length) {
 701       if (mail.startsWith("lj-notify@livejournal.com")) {
 702         if (data == "LJ Comment") {
 703           data = "anonymous";
 704         } else if (data.endsWith(" - LJ Comment")) {
 705           auto dpos = data.lastIndexOf('-');
 706           data = data[0..dpos].xstrip;
 707           if (data.length == 0) data = "anonymous";
 708         }
 709       }
 710       return data;
 711     }
 712     // construct name from the mail
 713     auto npos = mail.indexOf('@');
 714     if (npos <= 0) return mail;
 715     data = mail[0..npos].xstrip;
 716     if (data.length == 0) return mail;
 717     char[] res;
 718     res.reserve(data.length);
 719     foreach (char ch; data) {
 720       if (ch <= 32 || ch == '.' || ch == '-' || ch == '_') ch = 32;
 721       if (ch == 32) {
 722         if (res.length && res[$-1] != 32) res ~= ch;
 723       } else {
 724         if (res.length == 0 || res[$-1] == 32) ch = ch.toupper; else ch = ch.tolower;
 725         res ~= ch;
 726       }
 727     }
 728     res = res.xstrip;
 729     if (res.length == 0) return mail;
 730     return cast(T)res; // it is safe to cast here
 731   }
 732 }
 733
 734
 735 // ////////////////////////////////////////////////////////////////////////// //
 736 // encode string if it contains some non-ascii
 737 // always returns new string, which is safe to `delete`
 738 // passed string must be in UTF-8
 739 // can return `null` for empty string
 740 public dynstring strEncodeQ (const(char)[] s) nothrow @trusted {
 741   static bool isSpecial (immutable char ch) pure nothrow @safe @nogc {
 742     return
 743       ch < ' ' ||
 744       ch >= 127 ||
 745       ch == '\'' ||
 746       ch == '`' ||
 747       ch == '"' ||
 748       ch == '\\' ||
 749       ch == '@';
 750   }
 751   dynstring res;
 752   if (s.length == 0) return res;
 753   static immutable string hexd = "0123456789abcdef";
 754   bool needWork = (s[0] == '=' || s[0] == '?');
 755   if (!needWork) foreach (char ch; s) if (isSpecial(ch)) { needWork = true; break; }
 756   if (!needWork) {
 757     res = s;
 758   } else {
 759     res.reserve(s.length*3+32);
 760     res ~= "=?UTF-8?Q?"; // quoted printable
 761     foreach (char ch; s) {
 762       if (ch <= ' ') ch = '_';
 763       if (!isSpecial(ch) && ch != '=' && ch != '?') {
 764         res ~= ch;
 765       } else {
 766         res ~= "=";
 767         res ~= hexd[(cast(ubyte)ch)>>4];
 768         res ~= hexd[(cast(ubyte)ch)&0x0f];
 769       }
 770     }
 771     res ~= "?=";
 772   }
 773   return res;
 774 }