1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <tools/inetmime.hxx>
21 #include <rtl/ustrbuf.hxx>
22 #include <svl/adrparse.hxx>
27 enum ElementType
{ ELEMENT_START
, ELEMENT_DELIM
, ELEMENT_ITEM
, ELEMENT_END
};
29 //============================================================================
32 sal_Unicode
const * m_pBegin
;
33 sal_Unicode
const * m_pEnd
;
34 ElementType m_eLastElem
;
38 ParsedAddrSpec() { reset(); }
40 bool isPoorlyValid() const { return m_eLastElem
>= ELEMENT_ITEM
; }
42 bool isValid() const { return isPoorlyValid() && m_bAtFound
; }
49 inline void ParsedAddrSpec::reset()
53 m_eLastElem
= ELEMENT_START
;
58 inline void ParsedAddrSpec::finish()
61 m_eLastElem
= ELEMENT_END
;
68 //============================================================================
69 class SvAddressParser_Impl
71 enum State
{ BEFORE_COLON
, BEFORE_LESS
, AFTER_LESS
, AFTER_GREATER
};
73 enum TokenType
{ TOKEN_QUOTED
= 0x80000000, TOKEN_DOMAIN
, TOKEN_COMMENT
,
76 sal_Unicode
const * m_pInputPos
;
77 sal_Unicode
const * m_pInputEnd
;
78 sal_uInt32 m_nCurToken
;
79 sal_Unicode
const * m_pCurTokenBegin
;
80 sal_Unicode
const * m_pCurTokenEnd
;
81 sal_Unicode
const * m_pCurTokenContentBegin
;
82 sal_Unicode
const * m_pCurTokenContentEnd
;
83 bool m_bCurTokenReparse
;
84 ParsedAddrSpec m_aOuterAddrSpec
;
85 ParsedAddrSpec m_aInnerAddrSpec
;
86 ParsedAddrSpec
* m_pAddrSpec
;
87 sal_Unicode
const * m_pRealNameBegin
;
88 sal_Unicode
const * m_pRealNameEnd
;
89 sal_Unicode
const * m_pRealNameContentBegin
;
90 sal_Unicode
const * m_pRealNameContentEnd
;
91 bool m_bRealNameReparse
;
92 bool m_bRealNameFinished
;
93 sal_Unicode
const * m_pFirstCommentBegin
;
94 sal_Unicode
const * m_pFirstCommentEnd
;
95 bool m_bFirstCommentReparse
;
99 inline void resetRealNameAndFirstComment();
103 inline void addTokenToAddrSpec(ElementType eTokenElem
);
105 inline void addTokenToRealName();
109 static OUString
reparse(sal_Unicode
const * pBegin
,
110 sal_Unicode
const * pEnd
, bool bAddrSpec
);
112 static OUString
reparseComment(sal_Unicode
const * pBegin
,
113 sal_Unicode
const * pEnd
);
116 SvAddressParser_Impl(SvAddressParser
* pParser
, const OUString
& rIn
);
119 inline void SvAddressParser_Impl::resetRealNameAndFirstComment()
121 m_pRealNameBegin
= 0;
123 m_pRealNameContentBegin
= 0;
124 m_pRealNameContentEnd
= 0;
125 m_bRealNameReparse
= false;
126 m_bRealNameFinished
= false;
127 m_pFirstCommentBegin
= 0;
128 m_pFirstCommentEnd
= 0;
129 m_bFirstCommentReparse
= false;
132 inline void SvAddressParser_Impl::reset()
134 m_aOuterAddrSpec
.reset();
135 m_aInnerAddrSpec
.reset();
136 m_pAddrSpec
= &m_aOuterAddrSpec
;
137 resetRealNameAndFirstComment();
138 m_eState
= BEFORE_COLON
;
139 m_eType
= TOKEN_ATOM
;
142 inline void SvAddressParser_Impl::addTokenToAddrSpec(ElementType eTokenElem
)
144 if (!m_pAddrSpec
->m_pBegin
)
145 m_pAddrSpec
->m_pBegin
= m_pCurTokenBegin
;
146 else if (m_pAddrSpec
->m_pEnd
< m_pCurTokenBegin
)
147 m_pAddrSpec
->m_bReparse
= true;
148 m_pAddrSpec
->m_pEnd
= m_pCurTokenEnd
;
149 m_pAddrSpec
->m_eLastElem
= eTokenElem
;
152 inline void SvAddressParser_Impl::addTokenToRealName()
154 if (!m_bRealNameFinished
&& m_eState
!= AFTER_LESS
)
156 if (!m_pRealNameBegin
)
157 m_pRealNameBegin
= m_pRealNameContentBegin
= m_pCurTokenBegin
;
158 else if (m_pRealNameEnd
< m_pCurTokenBegin
- 1
159 || (m_pRealNameEnd
== m_pCurTokenBegin
- 1
160 && *m_pRealNameEnd
!= ' '))
161 m_bRealNameReparse
= true;
162 m_pRealNameEnd
= m_pRealNameContentEnd
= m_pCurTokenEnd
;
166 //============================================================================
168 // SvAddressParser_Impl
170 //============================================================================
172 bool SvAddressParser_Impl::readToken()
174 m_nCurToken
= m_eType
;
175 m_bCurTokenReparse
= false;
180 m_pCurTokenBegin
= m_pInputPos
- 1;
181 m_pCurTokenContentBegin
= m_pInputPos
;
182 bool bEscaped
= false;
185 if (m_pInputPos
>= m_pInputEnd
)
187 sal_Unicode cChar
= *m_pInputPos
++;
190 m_bCurTokenReparse
= true;
193 else if (cChar
== '"')
195 m_pCurTokenEnd
= m_pInputPos
;
196 m_pCurTokenContentEnd
= m_pInputPos
- 1;
199 else if (cChar
== '\\')
206 m_pCurTokenBegin
= m_pInputPos
- 1;
207 m_pCurTokenContentBegin
= m_pInputPos
;
208 bool bEscaped
= false;
211 if (m_pInputPos
>= m_pInputEnd
)
213 sal_Unicode cChar
= *m_pInputPos
++;
216 else if (cChar
== ']')
218 m_pCurTokenEnd
= m_pInputPos
;
221 else if (cChar
== '\\')
228 m_pCurTokenBegin
= m_pInputPos
- 1;
229 m_pCurTokenContentBegin
= 0;
230 m_pCurTokenContentEnd
= 0;
231 bool bEscaped
= false;
235 if (m_pInputPos
>= m_pInputEnd
)
237 sal_Unicode cChar
= *m_pInputPos
++;
240 m_bCurTokenReparse
= true;
241 m_pCurTokenContentEnd
= m_pInputPos
;
244 else if (cChar
== '(')
246 if (!m_pCurTokenContentBegin
)
247 m_pCurTokenContentBegin
= m_pInputPos
- 1;
248 m_pCurTokenContentEnd
= m_pInputPos
;
251 else if (cChar
== ')')
254 m_pCurTokenContentEnd
= m_pInputPos
;
259 else if (cChar
== '\\')
261 if (!m_pCurTokenContentBegin
)
262 m_pCurTokenContentBegin
= m_pInputPos
- 1;
265 else if (cChar
> ' ' && cChar
!= 0x7F) // DEL
267 if (!m_pCurTokenContentBegin
)
268 m_pCurTokenContentBegin
= m_pInputPos
- 1;
269 m_pCurTokenContentEnd
= m_pInputPos
;
279 if (m_pInputPos
>= m_pInputEnd
)
281 cChar
= *m_pInputPos
++;
282 if (cChar
> ' ' && cChar
!= 0x7F) // DEL
285 m_pCurTokenBegin
= m_pInputPos
- 1;
286 if (cChar
== '"' || cChar
== '(' || cChar
== ')' || cChar
== ','
287 || cChar
== '.' || cChar
== ':' || cChar
== ';'
288 || cChar
== '<' || cChar
== '>' || cChar
== '@'
289 || cChar
== '[' || cChar
== '\\' || cChar
== ']')
292 m_pCurTokenEnd
= m_pInputPos
;
298 if (m_pInputPos
>= m_pInputEnd
)
300 m_pCurTokenEnd
= m_pInputPos
;
303 cChar
= *m_pInputPos
++;
304 if (cChar
<= ' ' || cChar
== '"' || cChar
== '('
305 || cChar
== ')' || cChar
== ',' || cChar
== '.'
306 || cChar
== ':' || cChar
== ';' || cChar
== '<'
307 || cChar
== '>' || cChar
== '@' || cChar
== '['
308 || cChar
== '\\' || cChar
== ']'
309 || cChar
== 0x7F) // DEL
311 m_pCurTokenEnd
= --m_pInputPos
;
319 //============================================================================
321 OUString
SvAddressParser_Impl::reparse(sal_Unicode
const * pBegin
,
322 sal_Unicode
const * pEnd
, bool bAddrSpec
)
324 OUStringBuffer aResult
;
325 TokenType eMode
= TOKEN_ATOM
;
326 bool bEscaped
= false;
327 bool bEndsWithSpace
= false;
329 while (pBegin
< pEnd
)
331 sal_Unicode cChar
= *pBegin
++;
337 aResult
.append(cChar
);
340 else if (cChar
== '"')
343 aResult
.append(cChar
);
346 else if (cChar
== '\\')
349 aResult
.append(cChar
);
353 aResult
.append(cChar
);
359 aResult
.append(cChar
);
362 else if (cChar
== ']')
364 aResult
.append(cChar
);
367 else if (cChar
== '\\')
370 aResult
.append(cChar
);
374 aResult
.append(cChar
);
380 else if (cChar
== '(')
382 else if (cChar
== ')')
387 else if (cChar
== '\\')
392 if (cChar
<= ' ' || cChar
== 0x7F) // DEL
394 if (!bAddrSpec
&& !bEndsWithSpace
)
397 bEndsWithSpace
= true;
400 else if (cChar
== '(')
402 if (!bAddrSpec
&& !bEndsWithSpace
)
405 bEndsWithSpace
= true;
407 eMode
= TOKEN_COMMENT
;
411 bEndsWithSpace
= false;
415 aResult
.append(cChar
);
416 eMode
= TOKEN_QUOTED
;
418 else if (cChar
== '[')
420 aResult
.append(cChar
);
421 eMode
= TOKEN_QUOTED
;
424 aResult
.append(cChar
);
429 return aResult
.makeStringAndClear();
432 //============================================================================
434 OUString
SvAddressParser_Impl::reparseComment(sal_Unicode
const * pBegin
,
435 sal_Unicode
const * pEnd
)
437 OUStringBuffer aResult
;
438 while (pBegin
< pEnd
)
440 sal_Unicode cChar
= *pBegin
++;
443 aResult
.append(cChar
);
445 return aResult
.makeStringAndClear();
448 //============================================================================
449 SvAddressParser_Impl::SvAddressParser_Impl(SvAddressParser
* pParser
,
450 const OUString
& rInput
)
452 m_pInputPos
= rInput
.getStr();
453 m_pInputEnd
= m_pInputPos
+ rInput
.getLength();
461 m_bRealNameFinished
= true;
462 if (m_eState
== AFTER_LESS
)
473 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
475 if (m_pAddrSpec
->m_bAtFound
476 || m_pAddrSpec
->m_eLastElem
<= ELEMENT_DELIM
)
477 m_pAddrSpec
->reset();
478 addTokenToAddrSpec(ELEMENT_ITEM
);
480 if (!m_bRealNameFinished
&& m_eState
!= AFTER_LESS
)
482 if (m_bCurTokenReparse
)
484 if (!m_pRealNameBegin
)
485 m_pRealNameBegin
= m_pCurTokenBegin
;
486 m_pRealNameEnd
= m_pCurTokenEnd
;
487 m_bRealNameReparse
= true;
489 else if (m_bRealNameReparse
)
490 m_pRealNameEnd
= m_pCurTokenEnd
;
491 else if (!m_pRealNameBegin
)
493 m_pRealNameBegin
= m_pCurTokenBegin
;
494 m_pRealNameContentBegin
= m_pCurTokenContentBegin
;
495 m_pRealNameEnd
= m_pRealNameContentEnd
= m_pCurTokenContentEnd
;
499 m_pRealNameEnd
= m_pCurTokenEnd
;
500 m_bRealNameReparse
= true;
503 m_eType
= TOKEN_ATOM
;
507 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
509 if (m_pAddrSpec
->m_bAtFound
&& m_pAddrSpec
->m_eLastElem
== ELEMENT_DELIM
)
510 addTokenToAddrSpec(ELEMENT_ITEM
);
512 m_pAddrSpec
->reset();
514 addTokenToRealName();
515 m_eType
= TOKEN_ATOM
;
519 if (!m_bRealNameFinished
&& m_eState
!= AFTER_LESS
520 && !m_pFirstCommentBegin
&& m_pCurTokenContentBegin
)
522 m_pFirstCommentBegin
= m_pCurTokenContentBegin
;
523 m_pFirstCommentEnd
= m_pCurTokenContentEnd
;
524 m_bFirstCommentReparse
= m_bCurTokenReparse
;
526 m_eType
= TOKEN_ATOM
;
530 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
532 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_DELIM
)
533 m_pAddrSpec
->reset();
534 addTokenToAddrSpec(ELEMENT_ITEM
);
536 addTokenToRealName();
540 m_eType
= TOKEN_COMMENT
;
546 m_pAddrSpec
->finish();
547 addTokenToRealName();
555 m_aOuterAddrSpec
.finish();
556 if (m_pRealNameBegin
)
557 m_bRealNameFinished
= true;
558 m_pAddrSpec
= &m_aInnerAddrSpec
;
559 m_eState
= AFTER_LESS
;
563 m_aInnerAddrSpec
.finish();
567 m_aOuterAddrSpec
.finish();
568 addTokenToRealName();
574 if (m_eState
== AFTER_LESS
)
576 m_aInnerAddrSpec
.finish();
577 if (m_aInnerAddrSpec
.isValid())
578 m_aOuterAddrSpec
.m_eLastElem
= ELEMENT_END
;
579 m_pAddrSpec
= &m_aOuterAddrSpec
;
580 m_eState
= AFTER_GREATER
;
584 m_aOuterAddrSpec
.finish();
585 addTokenToRealName();
590 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
592 if (!m_pAddrSpec
->m_bAtFound
593 && m_pAddrSpec
->m_eLastElem
== ELEMENT_ITEM
)
595 addTokenToAddrSpec(ELEMENT_DELIM
);
596 m_pAddrSpec
->m_bAtFound
= true;
599 m_pAddrSpec
->reset();
601 addTokenToRealName();
606 if (m_eState
== AFTER_LESS
)
607 if (m_nCurToken
== ',')
609 if (m_aInnerAddrSpec
.m_eLastElem
!= ELEMENT_END
)
610 m_aInnerAddrSpec
.reset();
613 m_aInnerAddrSpec
.finish();
616 if(m_aInnerAddrSpec
.isValid() || (!m_aOuterAddrSpec
.isValid() && m_aInnerAddrSpec
.isPoorlyValid()))
618 m_pAddrSpec
= &m_aInnerAddrSpec
;
620 else if(m_aOuterAddrSpec
.isPoorlyValid())
622 m_pAddrSpec
= &m_aOuterAddrSpec
;
631 OUString aTheAddrSpec
;
632 if (m_pAddrSpec
->m_bReparse
)
633 aTheAddrSpec
= reparse(m_pAddrSpec
->m_pBegin
, m_pAddrSpec
->m_pEnd
, true);
636 sal_Int32 nLen
= ( m_pAddrSpec
->m_pEnd
- m_pAddrSpec
->m_pBegin
);
637 if (nLen
== rInput
.getLength())
638 aTheAddrSpec
= rInput
;
640 aTheAddrSpec
= rInput
.copy( (m_pAddrSpec
->m_pBegin
- rInput
.getStr()),
643 OUString aTheRealName
;
644 if (!m_pRealNameBegin
||
645 (m_pAddrSpec
== &m_aOuterAddrSpec
&&
646 m_pRealNameBegin
== m_aOuterAddrSpec
.m_pBegin
&&
647 m_pRealNameEnd
== m_aOuterAddrSpec
.m_pEnd
&&
648 m_pFirstCommentBegin
))
650 if (!m_pFirstCommentBegin
)
651 aTheRealName
= aTheAddrSpec
;
652 else if (m_bFirstCommentReparse
)
653 aTheRealName
= reparseComment(m_pFirstCommentBegin
,
656 aTheRealName
= rInput
.copy( (m_pFirstCommentBegin
- rInput
.getStr()),
657 (m_pFirstCommentEnd
- m_pFirstCommentBegin
));
659 else if (m_bRealNameReparse
)
660 aTheRealName
= reparse(m_pRealNameBegin
, m_pRealNameEnd
, false);
663 sal_Int32 nLen
= (m_pRealNameContentEnd
- m_pRealNameContentBegin
);
664 if (nLen
== rInput
.getLength())
665 aTheRealName
= rInput
;
667 aTheRealName
= rInput
.copy( (m_pRealNameContentBegin
- rInput
.getStr()), nLen
);
669 if (pParser
->m_bHasFirst
)
670 pParser
->m_aRest
.push_back(new SvAddressEntry_Impl( aTheAddrSpec
,
674 pParser
->m_bHasFirst
= true;
675 pParser
->m_aFirst
.m_aAddrSpec
= aTheAddrSpec
;
676 pParser
->m_aFirst
.m_aRealName
= aTheRealName
;
689 m_aOuterAddrSpec
.reset();
690 resetRealNameAndFirstComment();
691 m_eState
= BEFORE_LESS
;
696 m_aOuterAddrSpec
.finish();
697 addTokenToRealName();
701 m_aInnerAddrSpec
.reset();
707 m_eType
= TOKEN_QUOTED
;
711 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
713 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_DELIM
)
714 addTokenToAddrSpec(ELEMENT_DELIM
);
716 m_pAddrSpec
->reset();
718 addTokenToRealName();
722 m_eType
= TOKEN_DOMAIN
;
728 //============================================================================
732 //============================================================================
734 SvAddressParser::SvAddressParser(const OUString
& rInput
)
737 SvAddressParser_Impl
aDoParse(this, rInput
);
740 //============================================================================
741 SvAddressParser::~SvAddressParser()
743 for ( size_t i
= m_aRest
.size(); i
> 0; )
744 delete m_aRest
[ --i
];
748 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */