bump product version to 5.0.4.1
[LibreOffice.git] / svl / source / misc / adrparse.cxx
blob52d22ac36d4481a2d45028fbccf7d19908f455fa
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <tools/inetmime.hxx>
21 #include <rtl/ustrbuf.hxx>
22 #include <svl/adrparse.hxx>
24 namespace
27 enum ElementType { ELEMENT_START, ELEMENT_DELIM, ELEMENT_ITEM, ELEMENT_END };
29 struct ParsedAddrSpec
31 sal_Unicode const * m_pBegin;
32 sal_Unicode const * m_pEnd;
33 ElementType m_eLastElem;
34 bool m_bAtFound;
35 bool m_bReparse;
37 ParsedAddrSpec() { reset(); }
39 bool isPoorlyValid() const { return m_eLastElem >= ELEMENT_ITEM; }
41 bool isValid() const { return isPoorlyValid() && m_bAtFound; }
43 inline void reset();
45 inline void finish();
48 inline void ParsedAddrSpec::reset()
50 m_pBegin = 0;
51 m_pEnd = 0;
52 m_eLastElem = ELEMENT_START;
53 m_bAtFound = false;
54 m_bReparse = false;
57 inline void ParsedAddrSpec::finish()
59 if (isPoorlyValid())
60 m_eLastElem = ELEMENT_END;
61 else
62 reset();
67 class SvAddressParser_Impl
69 enum State { BEFORE_COLON, BEFORE_LESS, AFTER_LESS, AFTER_GREATER };
71 enum TokenType { TOKEN_QUOTED = 0x80000000, TOKEN_DOMAIN, TOKEN_COMMENT,
72 TOKEN_ATOM };
74 sal_Unicode const * m_pInputPos;
75 sal_Unicode const * m_pInputEnd;
76 sal_uInt32 m_nCurToken;
77 sal_Unicode const * m_pCurTokenBegin;
78 sal_Unicode const * m_pCurTokenEnd;
79 sal_Unicode const * m_pCurTokenContentBegin;
80 sal_Unicode const * m_pCurTokenContentEnd;
81 bool m_bCurTokenReparse;
82 ParsedAddrSpec m_aOuterAddrSpec;
83 ParsedAddrSpec m_aInnerAddrSpec;
84 ParsedAddrSpec * m_pAddrSpec;
85 sal_Unicode const * m_pRealNameBegin;
86 sal_Unicode const * m_pRealNameEnd;
87 sal_Unicode const * m_pRealNameContentBegin;
88 sal_Unicode const * m_pRealNameContentEnd;
89 bool m_bRealNameReparse;
90 bool m_bRealNameFinished;
91 sal_Unicode const * m_pFirstCommentBegin;
92 sal_Unicode const * m_pFirstCommentEnd;
93 bool m_bFirstCommentReparse;
94 State m_eState;
95 TokenType m_eType;
97 inline void resetRealNameAndFirstComment();
99 inline void reset();
101 inline void addTokenToAddrSpec(ElementType eTokenElem);
103 inline void addTokenToRealName();
105 bool readToken();
107 static OUString reparse(sal_Unicode const * pBegin,
108 sal_Unicode const * pEnd, bool bAddrSpec);
110 static OUString reparseComment(sal_Unicode const * pBegin,
111 sal_Unicode const * pEnd);
113 public:
114 SvAddressParser_Impl(SvAddressParser * pParser, const OUString& rIn);
117 inline void SvAddressParser_Impl::resetRealNameAndFirstComment()
119 m_pRealNameBegin = 0;
120 m_pRealNameEnd = 0;
121 m_pRealNameContentBegin = 0;
122 m_pRealNameContentEnd = 0;
123 m_bRealNameReparse = false;
124 m_bRealNameFinished = false;
125 m_pFirstCommentBegin = 0;
126 m_pFirstCommentEnd = 0;
127 m_bFirstCommentReparse = false;
130 inline void SvAddressParser_Impl::reset()
132 m_aOuterAddrSpec.reset();
133 m_aInnerAddrSpec.reset();
134 m_pAddrSpec = &m_aOuterAddrSpec;
135 resetRealNameAndFirstComment();
136 m_eState = BEFORE_COLON;
137 m_eType = TOKEN_ATOM;
140 inline void SvAddressParser_Impl::addTokenToAddrSpec(ElementType eTokenElem)
142 if (!m_pAddrSpec->m_pBegin)
143 m_pAddrSpec->m_pBegin = m_pCurTokenBegin;
144 else if (m_pAddrSpec->m_pEnd < m_pCurTokenBegin)
145 m_pAddrSpec->m_bReparse = true;
146 m_pAddrSpec->m_pEnd = m_pCurTokenEnd;
147 m_pAddrSpec->m_eLastElem = eTokenElem;
150 inline void SvAddressParser_Impl::addTokenToRealName()
152 if (!m_bRealNameFinished && m_eState != AFTER_LESS)
154 if (!m_pRealNameBegin)
155 m_pRealNameBegin = m_pRealNameContentBegin = m_pCurTokenBegin;
156 else if (m_pRealNameEnd < m_pCurTokenBegin - 1
157 || (m_pRealNameEnd == m_pCurTokenBegin - 1
158 && *m_pRealNameEnd != ' '))
159 m_bRealNameReparse = true;
160 m_pRealNameEnd = m_pRealNameContentEnd = m_pCurTokenEnd;
165 // SvAddressParser_Impl
168 bool SvAddressParser_Impl::readToken()
170 m_nCurToken = m_eType;
171 m_bCurTokenReparse = false;
172 switch (m_eType)
174 case TOKEN_QUOTED:
176 m_pCurTokenBegin = m_pInputPos - 1;
177 m_pCurTokenContentBegin = m_pInputPos;
178 bool bEscaped = false;
179 for (;;)
181 if (m_pInputPos >= m_pInputEnd)
182 return false;
183 sal_Unicode cChar = *m_pInputPos++;
184 if (bEscaped)
186 m_bCurTokenReparse = true;
187 bEscaped = false;
189 else if (cChar == '"')
191 m_pCurTokenEnd = m_pInputPos;
192 m_pCurTokenContentEnd = m_pInputPos - 1;
193 return true;
195 else if (cChar == '\\')
196 bEscaped = true;
200 case TOKEN_DOMAIN:
202 m_pCurTokenBegin = m_pInputPos - 1;
203 m_pCurTokenContentBegin = m_pInputPos;
204 bool bEscaped = false;
205 for (;;)
207 if (m_pInputPos >= m_pInputEnd)
208 return false;
209 sal_Unicode cChar = *m_pInputPos++;
210 if (bEscaped)
211 bEscaped = false;
212 else if (cChar == ']')
214 m_pCurTokenEnd = m_pInputPos;
215 return true;
217 else if (cChar == '\\')
218 bEscaped = true;
222 case TOKEN_COMMENT:
224 m_pCurTokenBegin = m_pInputPos - 1;
225 m_pCurTokenContentBegin = 0;
226 m_pCurTokenContentEnd = 0;
227 bool bEscaped = false;
228 int nLevel = 0;
229 for (;;)
231 if (m_pInputPos >= m_pInputEnd)
232 return false;
233 sal_Unicode cChar = *m_pInputPos++;
234 if (bEscaped)
236 m_bCurTokenReparse = true;
237 m_pCurTokenContentEnd = m_pInputPos;
238 bEscaped = false;
240 else if (cChar == '(')
242 if (!m_pCurTokenContentBegin)
243 m_pCurTokenContentBegin = m_pInputPos - 1;
244 m_pCurTokenContentEnd = m_pInputPos;
245 ++nLevel;
247 else if (cChar == ')')
248 if (nLevel)
250 m_pCurTokenContentEnd = m_pInputPos;
251 --nLevel;
253 else
254 return true;
255 else if (cChar == '\\')
257 if (!m_pCurTokenContentBegin)
258 m_pCurTokenContentBegin = m_pInputPos - 1;
259 bEscaped = true;
261 else if (cChar > ' ' && cChar != 0x7F) // DEL
263 if (!m_pCurTokenContentBegin)
264 m_pCurTokenContentBegin = m_pInputPos - 1;
265 m_pCurTokenContentEnd = m_pInputPos;
270 default:
272 sal_Unicode cChar;
273 for (;;)
275 if (m_pInputPos >= m_pInputEnd)
276 return false;
277 cChar = *m_pInputPos++;
278 if (cChar > ' ' && cChar != 0x7F) // DEL
279 break;
281 m_pCurTokenBegin = m_pInputPos - 1;
282 if (cChar == '"' || cChar == '(' || cChar == ')' || cChar == ','
283 || cChar == '.' || cChar == ':' || cChar == ';'
284 || cChar == '<' || cChar == '>' || cChar == '@'
285 || cChar == '[' || cChar == '\\' || cChar == ']')
287 m_nCurToken = cChar;
288 m_pCurTokenEnd = m_pInputPos;
289 return true;
291 else
292 for (;;)
294 if (m_pInputPos >= m_pInputEnd)
296 m_pCurTokenEnd = m_pInputPos;
297 return true;
299 cChar = *m_pInputPos++;
300 if (cChar <= ' ' || cChar == '"' || cChar == '('
301 || cChar == ')' || cChar == ',' || cChar == '.'
302 || cChar == ':' || cChar == ';' || cChar == '<'
303 || cChar == '>' || cChar == '@' || cChar == '['
304 || cChar == '\\' || cChar == ']'
305 || cChar == 0x7F) // DEL
307 m_pCurTokenEnd = --m_pInputPos;
308 return true;
315 // static
316 OUString SvAddressParser_Impl::reparse(sal_Unicode const * pBegin,
317 sal_Unicode const * pEnd, bool bAddrSpec)
319 OUStringBuffer aResult;
320 TokenType eMode = TOKEN_ATOM;
321 bool bEscaped = false;
322 bool bEndsWithSpace = false;
323 int nLevel = 0;
324 while (pBegin < pEnd)
326 sal_Unicode cChar = *pBegin++;
327 switch (eMode)
329 case TOKEN_QUOTED:
330 if (bEscaped)
332 aResult.append(cChar);
333 bEscaped = false;
335 else if (cChar == '"')
337 if (bAddrSpec)
338 aResult.append(cChar);
339 eMode = TOKEN_ATOM;
341 else if (cChar == '\\')
343 if (bAddrSpec)
344 aResult.append(cChar);
345 bEscaped = true;
347 else
348 aResult.append(cChar);
349 break;
351 case TOKEN_DOMAIN:
352 if (bEscaped)
354 aResult.append(cChar);
355 bEscaped = false;
357 else if (cChar == ']')
359 aResult.append(cChar);
360 eMode = TOKEN_ATOM;
362 else if (cChar == '\\')
364 if (bAddrSpec)
365 aResult.append(cChar);
366 bEscaped = true;
368 else
369 aResult.append(cChar);
370 break;
372 case TOKEN_COMMENT:
373 if (bEscaped)
374 bEscaped = false;
375 else if (cChar == '(')
376 ++nLevel;
377 else if (cChar == ')')
378 if (nLevel)
379 --nLevel;
380 else
381 eMode = TOKEN_ATOM;
382 else if (cChar == '\\')
383 bEscaped = true;
384 break;
386 case TOKEN_ATOM:
387 if (cChar <= ' ' || cChar == 0x7F) // DEL
389 if (!bAddrSpec && !bEndsWithSpace)
391 aResult.append(' ');
392 bEndsWithSpace = true;
395 else if (cChar == '(')
397 if (!bAddrSpec && !bEndsWithSpace)
399 aResult.append(' ');
400 bEndsWithSpace = true;
402 eMode = TOKEN_COMMENT;
404 else
406 bEndsWithSpace = false;
407 if (cChar == '"')
409 if (bAddrSpec)
410 aResult.append(cChar);
411 eMode = TOKEN_QUOTED;
413 else if (cChar == '[')
415 aResult.append(cChar);
416 eMode = TOKEN_QUOTED;
418 else
419 aResult.append(cChar);
421 break;
424 return aResult.makeStringAndClear();
427 // static
428 OUString SvAddressParser_Impl::reparseComment(sal_Unicode const * pBegin,
429 sal_Unicode const * pEnd)
431 OUStringBuffer aResult;
432 while (pBegin < pEnd)
434 sal_Unicode cChar = *pBegin++;
435 if (cChar == '\\')
436 cChar = *pBegin++;
437 aResult.append(cChar);
439 return aResult.makeStringAndClear();
442 SvAddressParser_Impl::SvAddressParser_Impl(SvAddressParser * pParser,
443 const OUString& rInput)
444 : m_pCurTokenBegin(NULL)
445 , m_pCurTokenEnd(NULL)
446 , m_pCurTokenContentBegin(NULL)
447 , m_pCurTokenContentEnd(NULL)
449 m_pInputPos = rInput.getStr();
450 m_pInputEnd = m_pInputPos + rInput.getLength();
452 reset();
453 bool bDone = false;
454 for (;;)
456 if (!readToken())
458 m_bRealNameFinished = true;
459 if (m_eState == AFTER_LESS)
460 m_nCurToken = '>';
461 else
463 m_nCurToken = ',';
464 bDone = true;
467 switch (m_nCurToken)
469 case TOKEN_QUOTED:
470 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
472 if (m_pAddrSpec->m_bAtFound
473 || m_pAddrSpec->m_eLastElem <= ELEMENT_DELIM)
474 m_pAddrSpec->reset();
475 addTokenToAddrSpec(ELEMENT_ITEM);
477 if (!m_bRealNameFinished && m_eState != AFTER_LESS)
479 if (m_bCurTokenReparse)
481 if (!m_pRealNameBegin)
482 m_pRealNameBegin = m_pCurTokenBegin;
483 m_pRealNameEnd = m_pCurTokenEnd;
484 m_bRealNameReparse = true;
486 else if (m_bRealNameReparse)
487 m_pRealNameEnd = m_pCurTokenEnd;
488 else if (!m_pRealNameBegin)
490 m_pRealNameBegin = m_pCurTokenBegin;
491 m_pRealNameContentBegin = m_pCurTokenContentBegin;
492 m_pRealNameEnd = m_pRealNameContentEnd = m_pCurTokenContentEnd;
494 else
496 m_pRealNameEnd = m_pCurTokenEnd;
497 m_bRealNameReparse = true;
500 m_eType = TOKEN_ATOM;
501 break;
503 case TOKEN_DOMAIN:
504 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
506 if (m_pAddrSpec->m_bAtFound && m_pAddrSpec->m_eLastElem == ELEMENT_DELIM)
507 addTokenToAddrSpec(ELEMENT_ITEM);
508 else
509 m_pAddrSpec->reset();
511 addTokenToRealName();
512 m_eType = TOKEN_ATOM;
513 break;
515 case TOKEN_COMMENT:
516 if (!m_bRealNameFinished && m_eState != AFTER_LESS
517 && !m_pFirstCommentBegin && m_pCurTokenContentBegin)
519 m_pFirstCommentBegin = m_pCurTokenContentBegin;
520 m_pFirstCommentEnd = m_pCurTokenContentEnd;
521 m_bFirstCommentReparse = m_bCurTokenReparse;
523 m_eType = TOKEN_ATOM;
524 break;
526 case TOKEN_ATOM:
527 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
529 if (m_pAddrSpec->m_eLastElem != ELEMENT_DELIM)
530 m_pAddrSpec->reset();
531 addTokenToAddrSpec(ELEMENT_ITEM);
533 addTokenToRealName();
534 break;
536 case '(':
537 m_eType = TOKEN_COMMENT;
538 break;
540 case ')':
541 case '\\':
542 case ']':
543 m_pAddrSpec->finish();
544 addTokenToRealName();
545 break;
547 case '<':
548 switch (m_eState)
550 case BEFORE_COLON:
551 case BEFORE_LESS:
552 m_aOuterAddrSpec.finish();
553 if (m_pRealNameBegin)
554 m_bRealNameFinished = true;
555 m_pAddrSpec = &m_aInnerAddrSpec;
556 m_eState = AFTER_LESS;
557 break;
559 case AFTER_LESS:
560 m_aInnerAddrSpec.finish();
561 break;
563 case AFTER_GREATER:
564 m_aOuterAddrSpec.finish();
565 addTokenToRealName();
566 break;
568 break;
570 case '>':
571 if (m_eState == AFTER_LESS)
573 m_aInnerAddrSpec.finish();
574 if (m_aInnerAddrSpec.isValid())
575 m_aOuterAddrSpec.m_eLastElem = ELEMENT_END;
576 m_pAddrSpec = &m_aOuterAddrSpec;
577 m_eState = AFTER_GREATER;
579 else
581 m_aOuterAddrSpec.finish();
582 addTokenToRealName();
584 break;
586 case '@':
587 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
589 if (!m_pAddrSpec->m_bAtFound
590 && m_pAddrSpec->m_eLastElem == ELEMENT_ITEM)
592 addTokenToAddrSpec(ELEMENT_DELIM);
593 m_pAddrSpec->m_bAtFound = true;
595 else
596 m_pAddrSpec->reset();
598 addTokenToRealName();
599 break;
601 case ',':
602 case ';':
603 if (m_eState == AFTER_LESS)
604 if (m_nCurToken == ',')
606 if (m_aInnerAddrSpec.m_eLastElem != ELEMENT_END)
607 m_aInnerAddrSpec.reset();
609 else
610 m_aInnerAddrSpec.finish();
611 else
613 if(m_aInnerAddrSpec.isValid() || (!m_aOuterAddrSpec.isValid() && m_aInnerAddrSpec.isPoorlyValid()))
615 m_pAddrSpec = &m_aInnerAddrSpec;
617 else if(m_aOuterAddrSpec.isPoorlyValid())
619 m_pAddrSpec = &m_aOuterAddrSpec;
621 else
623 m_pAddrSpec = 0;
626 if (m_pAddrSpec)
628 OUString aTheAddrSpec;
629 if (m_pAddrSpec->m_bReparse)
630 aTheAddrSpec = reparse(m_pAddrSpec->m_pBegin, m_pAddrSpec->m_pEnd, true);
631 else
633 sal_Int32 nLen = ( m_pAddrSpec->m_pEnd - m_pAddrSpec->m_pBegin);
634 if (nLen == rInput.getLength())
635 aTheAddrSpec = rInput;
636 else
637 aTheAddrSpec = rInput.copy( (m_pAddrSpec->m_pBegin - rInput.getStr()),
638 nLen);
640 OUString aTheRealName;
641 if (!m_pRealNameBegin ||
642 (m_pAddrSpec == &m_aOuterAddrSpec &&
643 m_pRealNameBegin == m_aOuterAddrSpec.m_pBegin &&
644 m_pRealNameEnd == m_aOuterAddrSpec.m_pEnd &&
645 m_pFirstCommentBegin))
647 if (!m_pFirstCommentBegin)
648 aTheRealName = aTheAddrSpec;
649 else if (m_bFirstCommentReparse)
650 aTheRealName = reparseComment(m_pFirstCommentBegin,
651 m_pFirstCommentEnd);
652 else
653 aTheRealName = rInput.copy( (m_pFirstCommentBegin - rInput.getStr()),
654 (m_pFirstCommentEnd - m_pFirstCommentBegin));
656 else if (m_bRealNameReparse)
657 aTheRealName = reparse(m_pRealNameBegin, m_pRealNameEnd, false);
658 else
660 sal_Int32 nLen = (m_pRealNameContentEnd - m_pRealNameContentBegin);
661 if (nLen == rInput.getLength())
662 aTheRealName = rInput;
663 else
664 aTheRealName = rInput.copy( (m_pRealNameContentBegin - rInput.getStr()), nLen);
666 if (pParser->m_bHasFirst)
667 pParser->m_aRest.push_back(new SvAddressEntry_Impl( aTheAddrSpec,
668 aTheRealName) );
669 else
671 pParser->m_bHasFirst = true;
672 pParser->m_aFirst.m_aAddrSpec = aTheAddrSpec;
673 pParser->m_aFirst.m_aRealName = aTheRealName;
676 if (bDone)
677 return;
678 reset();
680 break;
682 case ':':
683 switch (m_eState)
685 case BEFORE_COLON:
686 m_aOuterAddrSpec.reset();
687 resetRealNameAndFirstComment();
688 m_eState = BEFORE_LESS;
689 break;
691 case BEFORE_LESS:
692 case AFTER_GREATER:
693 m_aOuterAddrSpec.finish();
694 addTokenToRealName();
695 break;
697 case AFTER_LESS:
698 m_aInnerAddrSpec.reset();
699 break;
701 break;
703 case '"':
704 m_eType = TOKEN_QUOTED;
705 break;
707 case '.':
708 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
710 if (m_pAddrSpec->m_eLastElem != ELEMENT_DELIM)
711 addTokenToAddrSpec(ELEMENT_DELIM);
712 else
713 m_pAddrSpec->reset();
715 addTokenToRealName();
716 break;
718 case '[':
719 m_eType = TOKEN_DOMAIN;
720 break;
725 SvAddressParser::SvAddressParser(const OUString& rInput)
726 : m_bHasFirst(false)
728 SvAddressParser_Impl aDoParse(this, rInput);
731 SvAddressParser::~SvAddressParser()
733 for ( size_t i = m_aRest.size(); i > 0; )
734 delete m_aRest[ --i ];
735 m_aRest.clear();
738 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */