1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: adrparse.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_svtools.hxx"
33 #include <tools/inetmime.hxx>
34 #include <adrparse.hxx>
36 namespace unnamed_svtools_adrparse
{}
37 using namespace unnamed_svtools_adrparse
;
38 // unnamed namespaces don't work well yet
40 //============================================================================
41 namespace unnamed_svtools_adrparse
{
43 enum ElementType
{ ELEMENT_START
, ELEMENT_DELIM
, ELEMENT_ITEM
, ELEMENT_END
};
45 //============================================================================
48 sal_Unicode
const * m_pBegin
;
49 sal_Unicode
const * m_pEnd
;
50 ElementType m_eLastElem
;
54 ParsedAddrSpec() { reset(); }
56 bool isPoorlyValid() const { return m_eLastElem
>= ELEMENT_ITEM
; }
58 bool isValid() const { return isPoorlyValid() && m_bAtFound
; }
65 inline void ParsedAddrSpec::reset()
69 m_eLastElem
= ELEMENT_START
;
74 inline void ParsedAddrSpec::finish()
77 m_eLastElem
= ELEMENT_END
;
84 //============================================================================
85 class SvAddressParser_Impl
87 enum State
{ BEFORE_COLON
, BEFORE_LESS
, AFTER_LESS
, AFTER_GREATER
};
89 enum TokenType
{ TOKEN_QUOTED
= 0x80000000, TOKEN_DOMAIN
, TOKEN_COMMENT
,
92 sal_Unicode
const * m_pInputPos
;
93 sal_Unicode
const * m_pInputEnd
;
94 sal_uInt32 m_nCurToken
;
95 sal_Unicode
const * m_pCurTokenBegin
;
96 sal_Unicode
const * m_pCurTokenEnd
;
97 sal_Unicode
const * m_pCurTokenContentBegin
;
98 sal_Unicode
const * m_pCurTokenContentEnd
;
99 bool m_bCurTokenReparse
;
100 ParsedAddrSpec m_aOuterAddrSpec
;
101 ParsedAddrSpec m_aInnerAddrSpec
;
102 ParsedAddrSpec
* m_pAddrSpec
;
103 sal_Unicode
const * m_pRealNameBegin
;
104 sal_Unicode
const * m_pRealNameEnd
;
105 sal_Unicode
const * m_pRealNameContentBegin
;
106 sal_Unicode
const * m_pRealNameContentEnd
;
107 bool m_bRealNameReparse
;
108 bool m_bRealNameFinished
;
109 sal_Unicode
const * m_pFirstCommentBegin
;
110 sal_Unicode
const * m_pFirstCommentEnd
;
111 bool m_bFirstCommentReparse
;
115 inline void resetRealNameAndFirstComment();
119 inline void addTokenToAddrSpec(ElementType eTokenElem
);
121 inline void addTokenToRealName();
125 static UniString
reparse(sal_Unicode
const * pBegin
,
126 sal_Unicode
const * pEnd
, bool bAddrSpec
);
128 static UniString
reparseComment(sal_Unicode
const * pBegin
,
129 sal_Unicode
const * pEnd
);
132 SvAddressParser_Impl(SvAddressParser
* pParser
, UniString
const & rInput
);
135 inline void SvAddressParser_Impl::resetRealNameAndFirstComment()
137 m_pRealNameBegin
= 0;
139 m_pRealNameContentBegin
= 0;
140 m_pRealNameContentEnd
= 0;
141 m_bRealNameReparse
= false;
142 m_bRealNameFinished
= false;
143 m_pFirstCommentBegin
= 0;
144 m_pFirstCommentEnd
= 0;
145 m_bFirstCommentReparse
= false;
148 inline void SvAddressParser_Impl::reset()
150 m_aOuterAddrSpec
.reset();
151 m_aInnerAddrSpec
.reset();
152 m_pAddrSpec
= &m_aOuterAddrSpec
;
153 resetRealNameAndFirstComment();
154 m_eState
= BEFORE_COLON
;
155 m_eType
= TOKEN_ATOM
;
158 inline void SvAddressParser_Impl::addTokenToAddrSpec(ElementType eTokenElem
)
160 if (!m_pAddrSpec
->m_pBegin
)
161 m_pAddrSpec
->m_pBegin
= m_pCurTokenBegin
;
162 else if (m_pAddrSpec
->m_pEnd
< m_pCurTokenBegin
)
163 m_pAddrSpec
->m_bReparse
= true;
164 m_pAddrSpec
->m_pEnd
= m_pCurTokenEnd
;
165 m_pAddrSpec
->m_eLastElem
= eTokenElem
;
168 inline void SvAddressParser_Impl::addTokenToRealName()
170 if (!m_bRealNameFinished
&& m_eState
!= AFTER_LESS
)
172 if (!m_pRealNameBegin
)
173 m_pRealNameBegin
= m_pRealNameContentBegin
= m_pCurTokenBegin
;
174 else if (m_pRealNameEnd
< m_pCurTokenBegin
- 1
175 || (m_pRealNameEnd
== m_pCurTokenBegin
- 1
176 && *m_pRealNameEnd
!= ' '))
177 m_bRealNameReparse
= true;
178 m_pRealNameEnd
= m_pRealNameContentEnd
= m_pCurTokenEnd
;
182 //============================================================================
184 // SvAddressParser_Impl
186 //============================================================================
188 bool SvAddressParser_Impl::readToken()
190 m_nCurToken
= m_eType
;
191 m_bCurTokenReparse
= false;
196 m_pCurTokenBegin
= m_pInputPos
- 1;
197 m_pCurTokenContentBegin
= m_pInputPos
;
198 bool bEscaped
= false;
201 if (m_pInputPos
>= m_pInputEnd
)
203 sal_Unicode cChar
= *m_pInputPos
++;
206 m_bCurTokenReparse
= true;
209 else if (cChar
== '"')
211 m_pCurTokenEnd
= m_pInputPos
;
212 m_pCurTokenContentEnd
= m_pInputPos
- 1;
215 else if (cChar
== '\\')
222 m_pCurTokenBegin
= m_pInputPos
- 1;
223 m_pCurTokenContentBegin
= m_pInputPos
;
224 bool bEscaped
= false;
227 if (m_pInputPos
>= m_pInputEnd
)
229 sal_Unicode cChar
= *m_pInputPos
++;
232 else if (cChar
== ']')
234 m_pCurTokenEnd
= m_pInputPos
;
237 else if (cChar
== '\\')
244 m_pCurTokenBegin
= m_pInputPos
- 1;
245 m_pCurTokenContentBegin
= 0;
246 m_pCurTokenContentEnd
= 0;
247 bool bEscaped
= false;
248 xub_StrLen nLevel
= 0;
251 if (m_pInputPos
>= m_pInputEnd
)
253 sal_Unicode cChar
= *m_pInputPos
++;
256 m_bCurTokenReparse
= true;
257 m_pCurTokenContentEnd
= m_pInputPos
;
260 else if (cChar
== '(')
262 if (!m_pCurTokenContentBegin
)
263 m_pCurTokenContentBegin
= m_pInputPos
- 1;
264 m_pCurTokenContentEnd
= m_pInputPos
;
267 else if (cChar
== ')')
270 m_pCurTokenContentEnd
= m_pInputPos
;
275 else if (cChar
== '\\')
277 if (!m_pCurTokenContentBegin
)
278 m_pCurTokenContentBegin
= m_pInputPos
- 1;
281 else if (cChar
> ' ' && cChar
!= 0x7F) // DEL
283 if (!m_pCurTokenContentBegin
)
284 m_pCurTokenContentBegin
= m_pInputPos
- 1;
285 m_pCurTokenContentEnd
= m_pInputPos
;
295 if (m_pInputPos
>= m_pInputEnd
)
297 cChar
= *m_pInputPos
++;
298 if (cChar
> ' ' && cChar
!= 0x7F) // DEL
301 m_pCurTokenBegin
= m_pInputPos
- 1;
302 if (cChar
== '"' || cChar
== '(' || cChar
== ')' || cChar
== ','
303 || cChar
== '.' || cChar
== ':' || cChar
== ';'
304 || cChar
== '<' || cChar
== '>' || cChar
== '@'
305 || cChar
== '[' || cChar
== '\\' || cChar
== ']')
308 m_pCurTokenEnd
= m_pInputPos
;
314 if (m_pInputPos
>= m_pInputEnd
)
316 m_pCurTokenEnd
= m_pInputPos
;
319 cChar
= *m_pInputPos
++;
320 if (cChar
<= ' ' || cChar
== '"' || cChar
== '('
321 || cChar
== ')' || cChar
== ',' || cChar
== '.'
322 || cChar
== ':' || cChar
== ';' || cChar
== '<'
323 || cChar
== '>' || cChar
== '@' || cChar
== '['
324 || cChar
== '\\' || cChar
== ']'
325 || cChar
== 0x7F) // DEL
327 m_pCurTokenEnd
= --m_pInputPos
;
335 //============================================================================
337 UniString
SvAddressParser_Impl::reparse(sal_Unicode
const * pBegin
,
338 sal_Unicode
const * pEnd
,
342 TokenType eMode
= TOKEN_ATOM
;
343 bool bEscaped
= false;
344 bool bEndsWithSpace
= false;
345 xub_StrLen nLevel
= 0;
346 while (pBegin
< pEnd
)
348 sal_Unicode cChar
= *pBegin
++;
357 else if (cChar
== '"')
363 else if (cChar
== '\\')
379 else if (cChar
== ']')
384 else if (cChar
== '\\')
397 else if (cChar
== '(')
399 else if (cChar
== ')')
404 else if (cChar
== '\\')
409 if (cChar
<= ' ' || cChar
== 0x7F) // DEL
411 if (!bAddrSpec
&& !bEndsWithSpace
)
414 bEndsWithSpace
= true;
417 else if (cChar
== '(')
419 if (!bAddrSpec
&& !bEndsWithSpace
)
422 bEndsWithSpace
= true;
424 eMode
= TOKEN_COMMENT
;
428 bEndsWithSpace
= false;
433 eMode
= TOKEN_QUOTED
;
435 else if (cChar
== '[')
438 eMode
= TOKEN_QUOTED
;
449 //============================================================================
451 UniString
SvAddressParser_Impl::reparseComment(sal_Unicode
const * pBegin
,
452 sal_Unicode
const * pEnd
)
455 while (pBegin
< pEnd
)
457 sal_Unicode cChar
= *pBegin
++;
465 //============================================================================
466 SvAddressParser_Impl::SvAddressParser_Impl(SvAddressParser
* pParser
,
467 UniString
const & rInput
)
469 m_pInputPos
= rInput
.GetBuffer();
470 m_pInputEnd
= m_pInputPos
+ rInput
.Len();
478 m_bRealNameFinished
= true;
479 if (m_eState
== AFTER_LESS
)
490 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
492 if (m_pAddrSpec
->m_bAtFound
493 || m_pAddrSpec
->m_eLastElem
<= ELEMENT_DELIM
)
494 m_pAddrSpec
->reset();
495 addTokenToAddrSpec(ELEMENT_ITEM
);
497 if (!m_bRealNameFinished
&& m_eState
!= AFTER_LESS
)
499 if (m_bCurTokenReparse
)
501 if (!m_pRealNameBegin
)
502 m_pRealNameBegin
= m_pCurTokenBegin
;
503 m_pRealNameEnd
= m_pCurTokenEnd
;
504 m_bRealNameReparse
= true;
506 else if (m_bRealNameReparse
)
507 m_pRealNameEnd
= m_pCurTokenEnd
;
508 else if (!m_pRealNameBegin
)
510 m_pRealNameBegin
= m_pCurTokenBegin
;
511 m_pRealNameContentBegin
= m_pCurTokenContentBegin
;
512 m_pRealNameEnd
= m_pRealNameContentEnd
513 = m_pCurTokenContentEnd
;
517 m_pRealNameEnd
= m_pCurTokenEnd
;
518 m_bRealNameReparse
= true;
521 m_eType
= TOKEN_ATOM
;
525 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
527 if (m_pAddrSpec
->m_bAtFound
528 && m_pAddrSpec
->m_eLastElem
== ELEMENT_DELIM
)
529 addTokenToAddrSpec(ELEMENT_ITEM
);
531 m_pAddrSpec
->reset();
533 addTokenToRealName();
534 m_eType
= TOKEN_ATOM
;
538 if (!m_bRealNameFinished
&& m_eState
!= AFTER_LESS
539 && !m_pFirstCommentBegin
&& m_pCurTokenContentBegin
)
541 m_pFirstCommentBegin
= m_pCurTokenContentBegin
;
542 m_pFirstCommentEnd
= m_pCurTokenContentEnd
;
543 m_bFirstCommentReparse
= m_bCurTokenReparse
;
545 m_eType
= TOKEN_ATOM
;
549 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
551 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_DELIM
)
552 m_pAddrSpec
->reset();
553 addTokenToAddrSpec(ELEMENT_ITEM
);
555 addTokenToRealName();
559 m_eType
= TOKEN_COMMENT
;
565 m_pAddrSpec
->finish();
566 addTokenToRealName();
574 m_aOuterAddrSpec
.finish();
575 if (m_pRealNameBegin
)
576 m_bRealNameFinished
= true;
577 m_pAddrSpec
= &m_aInnerAddrSpec
;
578 m_eState
= AFTER_LESS
;
582 m_aInnerAddrSpec
.finish();
586 m_aOuterAddrSpec
.finish();
587 addTokenToRealName();
593 if (m_eState
== AFTER_LESS
)
595 m_aInnerAddrSpec
.finish();
596 if (m_aInnerAddrSpec
.isValid())
597 m_aOuterAddrSpec
.m_eLastElem
= ELEMENT_END
;
598 m_pAddrSpec
= &m_aOuterAddrSpec
;
599 m_eState
= AFTER_GREATER
;
603 m_aOuterAddrSpec
.finish();
604 addTokenToRealName();
609 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
611 if (!m_pAddrSpec
->m_bAtFound
612 && m_pAddrSpec
->m_eLastElem
== ELEMENT_ITEM
)
614 addTokenToAddrSpec(ELEMENT_DELIM
);
615 m_pAddrSpec
->m_bAtFound
= true;
618 m_pAddrSpec
->reset();
620 addTokenToRealName();
625 if (m_eState
== AFTER_LESS
)
626 if (m_nCurToken
== ',')
628 if (m_aInnerAddrSpec
.m_eLastElem
630 m_aInnerAddrSpec
.reset();
633 m_aInnerAddrSpec
.finish();
636 m_pAddrSpec
= m_aInnerAddrSpec
.isValid()
637 || (!m_aOuterAddrSpec
.isValid()
638 && m_aInnerAddrSpec
.isPoorlyValid()) ?
640 m_aOuterAddrSpec
.isPoorlyValid() ?
641 &m_aOuterAddrSpec
: 0;
644 UniString aTheAddrSpec
;
645 if (m_pAddrSpec
->m_bReparse
)
646 aTheAddrSpec
= reparse(m_pAddrSpec
->m_pBegin
,
647 m_pAddrSpec
->m_pEnd
, true);
651 sal::static_int_cast
< xub_StrLen
>(
653 - m_pAddrSpec
->m_pBegin
);
654 if (nLen
== rInput
.Len())
655 aTheAddrSpec
= rInput
;
659 sal::static_int_cast
< xub_StrLen
>(
660 m_pAddrSpec
->m_pBegin
661 - rInput
.GetBuffer()),
664 UniString aTheRealName
;
665 if (!m_pRealNameBegin
666 || (m_pAddrSpec
== &m_aOuterAddrSpec
668 == m_aOuterAddrSpec
.m_pBegin
669 && m_pRealNameEnd
== m_aOuterAddrSpec
.m_pEnd
670 && m_pFirstCommentBegin
))
671 if (!m_pFirstCommentBegin
)
672 aTheRealName
= aTheAddrSpec
;
673 else if (m_bFirstCommentReparse
)
675 = reparseComment(m_pFirstCommentBegin
,
680 sal::static_int_cast
< xub_StrLen
>(
682 - rInput
.GetBuffer()),
683 sal::static_int_cast
< xub_StrLen
>(
685 - m_pFirstCommentBegin
));
686 else if (m_bRealNameReparse
)
687 aTheRealName
= reparse(m_pRealNameBegin
,
688 m_pRealNameEnd
, false);
692 sal::static_int_cast
< xub_StrLen
>(
693 m_pRealNameContentEnd
694 - m_pRealNameContentBegin
);
695 if (nLen
== rInput
.Len())
696 aTheRealName
= rInput
;
700 sal::static_int_cast
< xub_StrLen
>(
701 m_pRealNameContentBegin
702 - rInput
.GetBuffer()),
705 if (pParser
->m_bHasFirst
)
706 pParser
->m_aRest
.Insert(new SvAddressEntry_Impl(
712 pParser
->m_bHasFirst
= true;
713 pParser
->m_aFirst
.m_aAddrSpec
= aTheAddrSpec
;
714 pParser
->m_aFirst
.m_aRealName
= aTheRealName
;
727 m_aOuterAddrSpec
.reset();
728 resetRealNameAndFirstComment();
729 m_eState
= BEFORE_LESS
;
734 m_aOuterAddrSpec
.finish();
735 addTokenToRealName();
739 m_aInnerAddrSpec
.reset();
745 m_eType
= TOKEN_QUOTED
;
749 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
751 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_DELIM
)
752 addTokenToAddrSpec(ELEMENT_DELIM
);
754 m_pAddrSpec
->reset();
756 addTokenToRealName();
760 m_eType
= TOKEN_DOMAIN
;
766 //============================================================================
770 //============================================================================
772 SvAddressParser::SvAddressParser(UniString
const & rInput
): m_bHasFirst(false)
774 SvAddressParser_Impl(this, rInput
);
777 //============================================================================
778 SvAddressParser::~SvAddressParser()
780 for (ULONG i
= m_aRest
.Count(); i
!= 0;)
781 delete m_aRest
.Remove(--i
);
784 //============================================================================
786 bool SvAddressParser::createRFC822Mailbox(String
const & rPhrase
,
787 String
const & rAddrSpec
,
791 sal_Unicode
const * p
= rAddrSpec
.GetBuffer();
792 sal_Unicode
const * pEnd
= p
+ rAddrSpec
.Len();
793 {for (bool bSegment
= false;;)
795 p
= INetMIME::skipLinearWhiteSpaceComment(p
, pEnd
);
800 sal_Unicode c
= *p
++;
806 p
= INetMIME::skipLinearWhiteSpaceComment(p
, pEnd
);
814 aTheAddrSpec
+= *p
++;
817 if (INetMIME::startsWithLineFolding(p
, pEnd
))
823 if (*p
== '\x0D' || (*p
== '\\' && ++p
== pEnd
)
824 || !INetMIME::isUSASCII(*p
))
826 if (INetMIME::needsQuotedStringEscape(*p
))
827 aTheAddrSpec
+= '\\';
828 aTheAddrSpec
+= *p
++;
830 aTheAddrSpec
+= *p
++;
832 else if (INetMIME::isAtomChar(*p
))
833 while (p
!= pEnd
&& INetMIME::isAtomChar(*p
))
834 aTheAddrSpec
+= *p
++;
839 {for (bool bSegment
= false;;)
841 p
= INetMIME::skipLinearWhiteSpaceComment(p
, pEnd
);
854 p
= INetMIME::skipLinearWhiteSpaceComment(p
, pEnd
);
862 aTheAddrSpec
+= *p
++;
865 if (INetMIME::startsWithLineFolding(p
, pEnd
))
871 if (*p
== '\x0D' || *p
== '[' || (*p
== '\\' && ++p
== pEnd
)
872 || !INetMIME::isUSASCII(*p
))
874 if (*p
>= '[' && *p
<= ']')
875 aTheAddrSpec
+= '\\';
876 aTheAddrSpec
+= *p
++;
878 aTheAddrSpec
+= *p
++;
880 else if (INetMIME::isAtomChar(*p
))
881 while (p
!= pEnd
&& INetMIME::isAtomChar(*p
))
882 aTheAddrSpec
+= *p
++;
887 if (rPhrase
.Len() == 0)
888 rMailbox
= aTheAddrSpec
;
891 bool bQuotedString
= false;
892 p
= rPhrase
.GetBuffer();
893 pEnd
= p
+ rPhrase
.Len();
894 for (;p
!= pEnd
; ++p
)
895 if (!(INetMIME::isAtomChar(*p
)))
897 bQuotedString
= true;
904 for (p
= rPhrase
.GetBuffer(); p
!= pEnd
; ++p
)
906 if (INetMIME::needsQuotedStringEscape(*p
))
913 aTheMailbox
= rPhrase
;
914 aTheMailbox
.AppendAscii(RTL_CONSTASCII_STRINGPARAM(" <"));
915 aTheMailbox
+= aTheAddrSpec
;
917 rMailbox
= aTheMailbox
;