bump product version to 4.1.6.2
[LibreOffice.git] / svl / source / misc / adrparse.cxx
blobb8914eaca0532a3133702285eeaadb7692cac17c
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <tools/inetmime.hxx>
21 #include <rtl/ustrbuf.hxx>
22 #include <svl/adrparse.hxx>
24 namespace
27 enum ElementType { ELEMENT_START, ELEMENT_DELIM, ELEMENT_ITEM, ELEMENT_END };
29 //============================================================================
30 struct ParsedAddrSpec
32 sal_Unicode const * m_pBegin;
33 sal_Unicode const * m_pEnd;
34 ElementType m_eLastElem;
35 bool m_bAtFound;
36 bool m_bReparse;
38 ParsedAddrSpec() { reset(); }
40 bool isPoorlyValid() const { return m_eLastElem >= ELEMENT_ITEM; }
42 bool isValid() const { return isPoorlyValid() && m_bAtFound; }
44 inline void reset();
46 inline void finish();
49 inline void ParsedAddrSpec::reset()
51 m_pBegin = 0;
52 m_pEnd = 0;
53 m_eLastElem = ELEMENT_START;
54 m_bAtFound = false;
55 m_bReparse = false;
58 inline void ParsedAddrSpec::finish()
60 if (isPoorlyValid())
61 m_eLastElem = ELEMENT_END;
62 else
63 reset();
68 //============================================================================
69 class SvAddressParser_Impl
71 enum State { BEFORE_COLON, BEFORE_LESS, AFTER_LESS, AFTER_GREATER };
73 enum TokenType { TOKEN_QUOTED = 0x80000000, TOKEN_DOMAIN, TOKEN_COMMENT,
74 TOKEN_ATOM };
76 sal_Unicode const * m_pInputPos;
77 sal_Unicode const * m_pInputEnd;
78 sal_uInt32 m_nCurToken;
79 sal_Unicode const * m_pCurTokenBegin;
80 sal_Unicode const * m_pCurTokenEnd;
81 sal_Unicode const * m_pCurTokenContentBegin;
82 sal_Unicode const * m_pCurTokenContentEnd;
83 bool m_bCurTokenReparse;
84 ParsedAddrSpec m_aOuterAddrSpec;
85 ParsedAddrSpec m_aInnerAddrSpec;
86 ParsedAddrSpec * m_pAddrSpec;
87 sal_Unicode const * m_pRealNameBegin;
88 sal_Unicode const * m_pRealNameEnd;
89 sal_Unicode const * m_pRealNameContentBegin;
90 sal_Unicode const * m_pRealNameContentEnd;
91 bool m_bRealNameReparse;
92 bool m_bRealNameFinished;
93 sal_Unicode const * m_pFirstCommentBegin;
94 sal_Unicode const * m_pFirstCommentEnd;
95 bool m_bFirstCommentReparse;
96 State m_eState;
97 TokenType m_eType;
99 inline void resetRealNameAndFirstComment();
101 inline void reset();
103 inline void addTokenToAddrSpec(ElementType eTokenElem);
105 inline void addTokenToRealName();
107 bool readToken();
109 static OUString reparse(sal_Unicode const * pBegin,
110 sal_Unicode const * pEnd, bool bAddrSpec);
112 static OUString reparseComment(sal_Unicode const * pBegin,
113 sal_Unicode const * pEnd);
115 public:
116 SvAddressParser_Impl(SvAddressParser * pParser, const OUString& rIn);
119 inline void SvAddressParser_Impl::resetRealNameAndFirstComment()
121 m_pRealNameBegin = 0;
122 m_pRealNameEnd = 0;
123 m_pRealNameContentBegin = 0;
124 m_pRealNameContentEnd = 0;
125 m_bRealNameReparse = false;
126 m_bRealNameFinished = false;
127 m_pFirstCommentBegin = 0;
128 m_pFirstCommentEnd = 0;
129 m_bFirstCommentReparse = false;
132 inline void SvAddressParser_Impl::reset()
134 m_aOuterAddrSpec.reset();
135 m_aInnerAddrSpec.reset();
136 m_pAddrSpec = &m_aOuterAddrSpec;
137 resetRealNameAndFirstComment();
138 m_eState = BEFORE_COLON;
139 m_eType = TOKEN_ATOM;
142 inline void SvAddressParser_Impl::addTokenToAddrSpec(ElementType eTokenElem)
144 if (!m_pAddrSpec->m_pBegin)
145 m_pAddrSpec->m_pBegin = m_pCurTokenBegin;
146 else if (m_pAddrSpec->m_pEnd < m_pCurTokenBegin)
147 m_pAddrSpec->m_bReparse = true;
148 m_pAddrSpec->m_pEnd = m_pCurTokenEnd;
149 m_pAddrSpec->m_eLastElem = eTokenElem;
152 inline void SvAddressParser_Impl::addTokenToRealName()
154 if (!m_bRealNameFinished && m_eState != AFTER_LESS)
156 if (!m_pRealNameBegin)
157 m_pRealNameBegin = m_pRealNameContentBegin = m_pCurTokenBegin;
158 else if (m_pRealNameEnd < m_pCurTokenBegin - 1
159 || (m_pRealNameEnd == m_pCurTokenBegin - 1
160 && *m_pRealNameEnd != ' '))
161 m_bRealNameReparse = true;
162 m_pRealNameEnd = m_pRealNameContentEnd = m_pCurTokenEnd;
166 //============================================================================
168 // SvAddressParser_Impl
170 //============================================================================
172 bool SvAddressParser_Impl::readToken()
174 m_nCurToken = m_eType;
175 m_bCurTokenReparse = false;
176 switch (m_eType)
178 case TOKEN_QUOTED:
180 m_pCurTokenBegin = m_pInputPos - 1;
181 m_pCurTokenContentBegin = m_pInputPos;
182 bool bEscaped = false;
183 for (;;)
185 if (m_pInputPos >= m_pInputEnd)
186 return false;
187 sal_Unicode cChar = *m_pInputPos++;
188 if (bEscaped)
190 m_bCurTokenReparse = true;
191 bEscaped = false;
193 else if (cChar == '"')
195 m_pCurTokenEnd = m_pInputPos;
196 m_pCurTokenContentEnd = m_pInputPos - 1;
197 return true;
199 else if (cChar == '\\')
200 bEscaped = true;
204 case TOKEN_DOMAIN:
206 m_pCurTokenBegin = m_pInputPos - 1;
207 m_pCurTokenContentBegin = m_pInputPos;
208 bool bEscaped = false;
209 for (;;)
211 if (m_pInputPos >= m_pInputEnd)
212 return false;
213 sal_Unicode cChar = *m_pInputPos++;
214 if (bEscaped)
215 bEscaped = false;
216 else if (cChar == ']')
218 m_pCurTokenEnd = m_pInputPos;
219 return true;
221 else if (cChar == '\\')
222 bEscaped = true;
226 case TOKEN_COMMENT:
228 m_pCurTokenBegin = m_pInputPos - 1;
229 m_pCurTokenContentBegin = 0;
230 m_pCurTokenContentEnd = 0;
231 bool bEscaped = false;
232 int nLevel = 0;
233 for (;;)
235 if (m_pInputPos >= m_pInputEnd)
236 return false;
237 sal_Unicode cChar = *m_pInputPos++;
238 if (bEscaped)
240 m_bCurTokenReparse = true;
241 m_pCurTokenContentEnd = m_pInputPos;
242 bEscaped = false;
244 else if (cChar == '(')
246 if (!m_pCurTokenContentBegin)
247 m_pCurTokenContentBegin = m_pInputPos - 1;
248 m_pCurTokenContentEnd = m_pInputPos;
249 ++nLevel;
251 else if (cChar == ')')
252 if (nLevel)
254 m_pCurTokenContentEnd = m_pInputPos;
255 --nLevel;
257 else
258 return true;
259 else if (cChar == '\\')
261 if (!m_pCurTokenContentBegin)
262 m_pCurTokenContentBegin = m_pInputPos - 1;
263 bEscaped = true;
265 else if (cChar > ' ' && cChar != 0x7F) // DEL
267 if (!m_pCurTokenContentBegin)
268 m_pCurTokenContentBegin = m_pInputPos - 1;
269 m_pCurTokenContentEnd = m_pInputPos;
274 default:
276 sal_Unicode cChar;
277 for (;;)
279 if (m_pInputPos >= m_pInputEnd)
280 return false;
281 cChar = *m_pInputPos++;
282 if (cChar > ' ' && cChar != 0x7F) // DEL
283 break;
285 m_pCurTokenBegin = m_pInputPos - 1;
286 if (cChar == '"' || cChar == '(' || cChar == ')' || cChar == ','
287 || cChar == '.' || cChar == ':' || cChar == ';'
288 || cChar == '<' || cChar == '>' || cChar == '@'
289 || cChar == '[' || cChar == '\\' || cChar == ']')
291 m_nCurToken = cChar;
292 m_pCurTokenEnd = m_pInputPos;
293 return true;
295 else
296 for (;;)
298 if (m_pInputPos >= m_pInputEnd)
300 m_pCurTokenEnd = m_pInputPos;
301 return true;
303 cChar = *m_pInputPos++;
304 if (cChar <= ' ' || cChar == '"' || cChar == '('
305 || cChar == ')' || cChar == ',' || cChar == '.'
306 || cChar == ':' || cChar == ';' || cChar == '<'
307 || cChar == '>' || cChar == '@' || cChar == '['
308 || cChar == '\\' || cChar == ']'
309 || cChar == 0x7F) // DEL
311 m_pCurTokenEnd = --m_pInputPos;
312 return true;
319 //============================================================================
320 // static
321 OUString SvAddressParser_Impl::reparse(sal_Unicode const * pBegin,
322 sal_Unicode const * pEnd, bool bAddrSpec)
324 OUStringBuffer aResult;
325 TokenType eMode = TOKEN_ATOM;
326 bool bEscaped = false;
327 bool bEndsWithSpace = false;
328 int nLevel = 0;
329 while (pBegin < pEnd)
331 sal_Unicode cChar = *pBegin++;
332 switch (eMode)
334 case TOKEN_QUOTED:
335 if (bEscaped)
337 aResult.append(cChar);
338 bEscaped = false;
340 else if (cChar == '"')
342 if (bAddrSpec)
343 aResult.append(cChar);
344 eMode = TOKEN_ATOM;
346 else if (cChar == '\\')
348 if (bAddrSpec)
349 aResult.append(cChar);
350 bEscaped = true;
352 else
353 aResult.append(cChar);
354 break;
356 case TOKEN_DOMAIN:
357 if (bEscaped)
359 aResult.append(cChar);
360 bEscaped = false;
362 else if (cChar == ']')
364 aResult.append(cChar);
365 eMode = TOKEN_ATOM;
367 else if (cChar == '\\')
369 if (bAddrSpec)
370 aResult.append(cChar);
371 bEscaped = true;
373 else
374 aResult.append(cChar);
375 break;
377 case TOKEN_COMMENT:
378 if (bEscaped)
379 bEscaped = false;
380 else if (cChar == '(')
381 ++nLevel;
382 else if (cChar == ')')
383 if (nLevel)
384 --nLevel;
385 else
386 eMode = TOKEN_ATOM;
387 else if (cChar == '\\')
388 bEscaped = true;
389 break;
391 case TOKEN_ATOM:
392 if (cChar <= ' ' || cChar == 0x7F) // DEL
394 if (!bAddrSpec && !bEndsWithSpace)
396 aResult.append(' ');
397 bEndsWithSpace = true;
400 else if (cChar == '(')
402 if (!bAddrSpec && !bEndsWithSpace)
404 aResult.append(' ');
405 bEndsWithSpace = true;
407 eMode = TOKEN_COMMENT;
409 else
411 bEndsWithSpace = false;
412 if (cChar == '"')
414 if (bAddrSpec)
415 aResult.append(cChar);
416 eMode = TOKEN_QUOTED;
418 else if (cChar == '[')
420 aResult.append(cChar);
421 eMode = TOKEN_QUOTED;
423 else
424 aResult.append(cChar);
426 break;
429 return aResult.makeStringAndClear();
432 //============================================================================
433 // static
434 OUString SvAddressParser_Impl::reparseComment(sal_Unicode const * pBegin,
435 sal_Unicode const * pEnd)
437 OUStringBuffer aResult;
438 while (pBegin < pEnd)
440 sal_Unicode cChar = *pBegin++;
441 if (cChar == '\\')
442 cChar = *pBegin++;
443 aResult.append(cChar);
445 return aResult.makeStringAndClear();
448 //============================================================================
449 SvAddressParser_Impl::SvAddressParser_Impl(SvAddressParser * pParser,
450 const OUString& rInput)
452 m_pInputPos = rInput.getStr();
453 m_pInputEnd = m_pInputPos + rInput.getLength();
455 reset();
456 bool bDone = false;
457 for (;;)
459 if (!readToken())
461 m_bRealNameFinished = true;
462 if (m_eState == AFTER_LESS)
463 m_nCurToken = '>';
464 else
466 m_nCurToken = ',';
467 bDone = true;
470 switch (m_nCurToken)
472 case TOKEN_QUOTED:
473 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
475 if (m_pAddrSpec->m_bAtFound
476 || m_pAddrSpec->m_eLastElem <= ELEMENT_DELIM)
477 m_pAddrSpec->reset();
478 addTokenToAddrSpec(ELEMENT_ITEM);
480 if (!m_bRealNameFinished && m_eState != AFTER_LESS)
482 if (m_bCurTokenReparse)
484 if (!m_pRealNameBegin)
485 m_pRealNameBegin = m_pCurTokenBegin;
486 m_pRealNameEnd = m_pCurTokenEnd;
487 m_bRealNameReparse = true;
489 else if (m_bRealNameReparse)
490 m_pRealNameEnd = m_pCurTokenEnd;
491 else if (!m_pRealNameBegin)
493 m_pRealNameBegin = m_pCurTokenBegin;
494 m_pRealNameContentBegin = m_pCurTokenContentBegin;
495 m_pRealNameEnd = m_pRealNameContentEnd = m_pCurTokenContentEnd;
497 else
499 m_pRealNameEnd = m_pCurTokenEnd;
500 m_bRealNameReparse = true;
503 m_eType = TOKEN_ATOM;
504 break;
506 case TOKEN_DOMAIN:
507 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
509 if (m_pAddrSpec->m_bAtFound && m_pAddrSpec->m_eLastElem == ELEMENT_DELIM)
510 addTokenToAddrSpec(ELEMENT_ITEM);
511 else
512 m_pAddrSpec->reset();
514 addTokenToRealName();
515 m_eType = TOKEN_ATOM;
516 break;
518 case TOKEN_COMMENT:
519 if (!m_bRealNameFinished && m_eState != AFTER_LESS
520 && !m_pFirstCommentBegin && m_pCurTokenContentBegin)
522 m_pFirstCommentBegin = m_pCurTokenContentBegin;
523 m_pFirstCommentEnd = m_pCurTokenContentEnd;
524 m_bFirstCommentReparse = m_bCurTokenReparse;
526 m_eType = TOKEN_ATOM;
527 break;
529 case TOKEN_ATOM:
530 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
532 if (m_pAddrSpec->m_eLastElem != ELEMENT_DELIM)
533 m_pAddrSpec->reset();
534 addTokenToAddrSpec(ELEMENT_ITEM);
536 addTokenToRealName();
537 break;
539 case '(':
540 m_eType = TOKEN_COMMENT;
541 break;
543 case ')':
544 case '\\':
545 case ']':
546 m_pAddrSpec->finish();
547 addTokenToRealName();
548 break;
550 case '<':
551 switch (m_eState)
553 case BEFORE_COLON:
554 case BEFORE_LESS:
555 m_aOuterAddrSpec.finish();
556 if (m_pRealNameBegin)
557 m_bRealNameFinished = true;
558 m_pAddrSpec = &m_aInnerAddrSpec;
559 m_eState = AFTER_LESS;
560 break;
562 case AFTER_LESS:
563 m_aInnerAddrSpec.finish();
564 break;
566 case AFTER_GREATER:
567 m_aOuterAddrSpec.finish();
568 addTokenToRealName();
569 break;
571 break;
573 case '>':
574 if (m_eState == AFTER_LESS)
576 m_aInnerAddrSpec.finish();
577 if (m_aInnerAddrSpec.isValid())
578 m_aOuterAddrSpec.m_eLastElem = ELEMENT_END;
579 m_pAddrSpec = &m_aOuterAddrSpec;
580 m_eState = AFTER_GREATER;
582 else
584 m_aOuterAddrSpec.finish();
585 addTokenToRealName();
587 break;
589 case '@':
590 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
592 if (!m_pAddrSpec->m_bAtFound
593 && m_pAddrSpec->m_eLastElem == ELEMENT_ITEM)
595 addTokenToAddrSpec(ELEMENT_DELIM);
596 m_pAddrSpec->m_bAtFound = true;
598 else
599 m_pAddrSpec->reset();
601 addTokenToRealName();
602 break;
604 case ',':
605 case ';':
606 if (m_eState == AFTER_LESS)
607 if (m_nCurToken == ',')
609 if (m_aInnerAddrSpec.m_eLastElem != ELEMENT_END)
610 m_aInnerAddrSpec.reset();
612 else
613 m_aInnerAddrSpec.finish();
614 else
616 if(m_aInnerAddrSpec.isValid() || (!m_aOuterAddrSpec.isValid() && m_aInnerAddrSpec.isPoorlyValid()))
618 m_pAddrSpec = &m_aInnerAddrSpec;
620 else if(m_aOuterAddrSpec.isPoorlyValid())
622 m_pAddrSpec = &m_aOuterAddrSpec;
624 else
626 m_pAddrSpec = 0;
629 if (m_pAddrSpec)
631 OUString aTheAddrSpec;
632 if (m_pAddrSpec->m_bReparse)
633 aTheAddrSpec = reparse(m_pAddrSpec->m_pBegin, m_pAddrSpec->m_pEnd, true);
634 else
636 sal_Int32 nLen = ( m_pAddrSpec->m_pEnd - m_pAddrSpec->m_pBegin);
637 if (nLen == rInput.getLength())
638 aTheAddrSpec = rInput;
639 else
640 aTheAddrSpec = rInput.copy( (m_pAddrSpec->m_pBegin - rInput.getStr()),
641 nLen);
643 OUString aTheRealName;
644 if (!m_pRealNameBegin ||
645 (m_pAddrSpec == &m_aOuterAddrSpec &&
646 m_pRealNameBegin == m_aOuterAddrSpec.m_pBegin &&
647 m_pRealNameEnd == m_aOuterAddrSpec.m_pEnd &&
648 m_pFirstCommentBegin))
650 if (!m_pFirstCommentBegin)
651 aTheRealName = aTheAddrSpec;
652 else if (m_bFirstCommentReparse)
653 aTheRealName = reparseComment(m_pFirstCommentBegin,
654 m_pFirstCommentEnd);
655 else
656 aTheRealName = rInput.copy( (m_pFirstCommentBegin - rInput.getStr()),
657 (m_pFirstCommentEnd - m_pFirstCommentBegin));
659 else if (m_bRealNameReparse)
660 aTheRealName = reparse(m_pRealNameBegin, m_pRealNameEnd, false);
661 else
663 sal_Int32 nLen = (m_pRealNameContentEnd - m_pRealNameContentBegin);
664 if (nLen == rInput.getLength())
665 aTheRealName = rInput;
666 else
667 aTheRealName = rInput.copy( (m_pRealNameContentBegin - rInput.getStr()), nLen);
669 if (pParser->m_bHasFirst)
670 pParser->m_aRest.push_back(new SvAddressEntry_Impl( aTheAddrSpec,
671 aTheRealName) );
672 else
674 pParser->m_bHasFirst = true;
675 pParser->m_aFirst.m_aAddrSpec = aTheAddrSpec;
676 pParser->m_aFirst.m_aRealName = aTheRealName;
679 if (bDone)
680 return;
681 reset();
683 break;
685 case ':':
686 switch (m_eState)
688 case BEFORE_COLON:
689 m_aOuterAddrSpec.reset();
690 resetRealNameAndFirstComment();
691 m_eState = BEFORE_LESS;
692 break;
694 case BEFORE_LESS:
695 case AFTER_GREATER:
696 m_aOuterAddrSpec.finish();
697 addTokenToRealName();
698 break;
700 case AFTER_LESS:
701 m_aInnerAddrSpec.reset();
702 break;
704 break;
706 case '"':
707 m_eType = TOKEN_QUOTED;
708 break;
710 case '.':
711 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
713 if (m_pAddrSpec->m_eLastElem != ELEMENT_DELIM)
714 addTokenToAddrSpec(ELEMENT_DELIM);
715 else
716 m_pAddrSpec->reset();
718 addTokenToRealName();
719 break;
721 case '[':
722 m_eType = TOKEN_DOMAIN;
723 break;
728 //============================================================================
730 // SvAddressParser
732 //============================================================================
734 SvAddressParser::SvAddressParser(const OUString& rInput)
735 : m_bHasFirst(false)
737 SvAddressParser_Impl aDoParse(this, rInput);
740 //============================================================================
741 SvAddressParser::~SvAddressParser()
743 for ( size_t i = m_aRest.size(); i > 0; )
744 delete m_aRest[ --i ];
745 m_aRest.clear();
748 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */