1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <tools/inetmime.hxx>
21 #include <rtl/ustrbuf.hxx>
22 #include <svl/adrparse.hxx>
27 enum ElementType
{ ELEMENT_START
, ELEMENT_DELIM
, ELEMENT_ITEM
, ELEMENT_END
};
31 sal_Unicode
const * m_pBegin
;
32 sal_Unicode
const * m_pEnd
;
33 ElementType m_eLastElem
;
37 ParsedAddrSpec() { reset(); }
39 bool isPoorlyValid() const { return m_eLastElem
>= ELEMENT_ITEM
; }
41 bool isValid() const { return isPoorlyValid() && m_bAtFound
; }
48 inline void ParsedAddrSpec::reset()
52 m_eLastElem
= ELEMENT_START
;
57 inline void ParsedAddrSpec::finish()
60 m_eLastElem
= ELEMENT_END
;
67 class SvAddressParser_Impl
69 enum State
{ BEFORE_COLON
, BEFORE_LESS
, AFTER_LESS
, AFTER_GREATER
};
71 enum TokenType
{ TOKEN_QUOTED
= 0x80000000, TOKEN_DOMAIN
, TOKEN_COMMENT
,
74 sal_Unicode
const * m_pInputPos
;
75 sal_Unicode
const * m_pInputEnd
;
76 sal_uInt32 m_nCurToken
;
77 sal_Unicode
const * m_pCurTokenBegin
;
78 sal_Unicode
const * m_pCurTokenEnd
;
79 sal_Unicode
const * m_pCurTokenContentBegin
;
80 sal_Unicode
const * m_pCurTokenContentEnd
;
81 bool m_bCurTokenReparse
;
82 ParsedAddrSpec m_aOuterAddrSpec
;
83 ParsedAddrSpec m_aInnerAddrSpec
;
84 ParsedAddrSpec
* m_pAddrSpec
;
85 sal_Unicode
const * m_pRealNameBegin
;
86 sal_Unicode
const * m_pRealNameEnd
;
87 sal_Unicode
const * m_pRealNameContentBegin
;
88 sal_Unicode
const * m_pRealNameContentEnd
;
89 bool m_bRealNameReparse
;
90 bool m_bRealNameFinished
;
91 sal_Unicode
const * m_pFirstCommentBegin
;
92 sal_Unicode
const * m_pFirstCommentEnd
;
93 bool m_bFirstCommentReparse
;
97 inline void resetRealNameAndFirstComment();
101 inline void addTokenToAddrSpec(ElementType eTokenElem
);
103 inline void addTokenToRealName();
107 static OUString
reparse(sal_Unicode
const * pBegin
,
108 sal_Unicode
const * pEnd
, bool bAddrSpec
);
110 static OUString
reparseComment(sal_Unicode
const * pBegin
,
111 sal_Unicode
const * pEnd
);
114 SvAddressParser_Impl(SvAddressParser
* pParser
, const OUString
& rIn
);
117 inline void SvAddressParser_Impl::resetRealNameAndFirstComment()
119 m_pRealNameBegin
= 0;
121 m_pRealNameContentBegin
= 0;
122 m_pRealNameContentEnd
= 0;
123 m_bRealNameReparse
= false;
124 m_bRealNameFinished
= false;
125 m_pFirstCommentBegin
= 0;
126 m_pFirstCommentEnd
= 0;
127 m_bFirstCommentReparse
= false;
130 inline void SvAddressParser_Impl::reset()
132 m_aOuterAddrSpec
.reset();
133 m_aInnerAddrSpec
.reset();
134 m_pAddrSpec
= &m_aOuterAddrSpec
;
135 resetRealNameAndFirstComment();
136 m_eState
= BEFORE_COLON
;
137 m_eType
= TOKEN_ATOM
;
140 inline void SvAddressParser_Impl::addTokenToAddrSpec(ElementType eTokenElem
)
142 if (!m_pAddrSpec
->m_pBegin
)
143 m_pAddrSpec
->m_pBegin
= m_pCurTokenBegin
;
144 else if (m_pAddrSpec
->m_pEnd
< m_pCurTokenBegin
)
145 m_pAddrSpec
->m_bReparse
= true;
146 m_pAddrSpec
->m_pEnd
= m_pCurTokenEnd
;
147 m_pAddrSpec
->m_eLastElem
= eTokenElem
;
150 inline void SvAddressParser_Impl::addTokenToRealName()
152 if (!m_bRealNameFinished
&& m_eState
!= AFTER_LESS
)
154 if (!m_pRealNameBegin
)
155 m_pRealNameBegin
= m_pRealNameContentBegin
= m_pCurTokenBegin
;
156 else if (m_pRealNameEnd
< m_pCurTokenBegin
- 1
157 || (m_pRealNameEnd
== m_pCurTokenBegin
- 1
158 && *m_pRealNameEnd
!= ' '))
159 m_bRealNameReparse
= true;
160 m_pRealNameEnd
= m_pRealNameContentEnd
= m_pCurTokenEnd
;
165 // SvAddressParser_Impl
168 bool SvAddressParser_Impl::readToken()
170 m_nCurToken
= m_eType
;
171 m_bCurTokenReparse
= false;
176 m_pCurTokenBegin
= m_pInputPos
- 1;
177 m_pCurTokenContentBegin
= m_pInputPos
;
178 bool bEscaped
= false;
181 if (m_pInputPos
>= m_pInputEnd
)
183 sal_Unicode cChar
= *m_pInputPos
++;
186 m_bCurTokenReparse
= true;
189 else if (cChar
== '"')
191 m_pCurTokenEnd
= m_pInputPos
;
192 m_pCurTokenContentEnd
= m_pInputPos
- 1;
195 else if (cChar
== '\\')
202 m_pCurTokenBegin
= m_pInputPos
- 1;
203 m_pCurTokenContentBegin
= m_pInputPos
;
204 bool bEscaped
= false;
207 if (m_pInputPos
>= m_pInputEnd
)
209 sal_Unicode cChar
= *m_pInputPos
++;
212 else if (cChar
== ']')
214 m_pCurTokenEnd
= m_pInputPos
;
217 else if (cChar
== '\\')
224 m_pCurTokenBegin
= m_pInputPos
- 1;
225 m_pCurTokenContentBegin
= 0;
226 m_pCurTokenContentEnd
= 0;
227 bool bEscaped
= false;
231 if (m_pInputPos
>= m_pInputEnd
)
233 sal_Unicode cChar
= *m_pInputPos
++;
236 m_bCurTokenReparse
= true;
237 m_pCurTokenContentEnd
= m_pInputPos
;
240 else if (cChar
== '(')
242 if (!m_pCurTokenContentBegin
)
243 m_pCurTokenContentBegin
= m_pInputPos
- 1;
244 m_pCurTokenContentEnd
= m_pInputPos
;
247 else if (cChar
== ')')
250 m_pCurTokenContentEnd
= m_pInputPos
;
255 else if (cChar
== '\\')
257 if (!m_pCurTokenContentBegin
)
258 m_pCurTokenContentBegin
= m_pInputPos
- 1;
261 else if (cChar
> ' ' && cChar
!= 0x7F) // DEL
263 if (!m_pCurTokenContentBegin
)
264 m_pCurTokenContentBegin
= m_pInputPos
- 1;
265 m_pCurTokenContentEnd
= m_pInputPos
;
275 if (m_pInputPos
>= m_pInputEnd
)
277 cChar
= *m_pInputPos
++;
278 if (cChar
> ' ' && cChar
!= 0x7F) // DEL
281 m_pCurTokenBegin
= m_pInputPos
- 1;
282 if (cChar
== '"' || cChar
== '(' || cChar
== ')' || cChar
== ','
283 || cChar
== '.' || cChar
== ':' || cChar
== ';'
284 || cChar
== '<' || cChar
== '>' || cChar
== '@'
285 || cChar
== '[' || cChar
== '\\' || cChar
== ']')
288 m_pCurTokenEnd
= m_pInputPos
;
294 if (m_pInputPos
>= m_pInputEnd
)
296 m_pCurTokenEnd
= m_pInputPos
;
299 cChar
= *m_pInputPos
++;
300 if (cChar
<= ' ' || cChar
== '"' || cChar
== '('
301 || cChar
== ')' || cChar
== ',' || cChar
== '.'
302 || cChar
== ':' || cChar
== ';' || cChar
== '<'
303 || cChar
== '>' || cChar
== '@' || cChar
== '['
304 || cChar
== '\\' || cChar
== ']'
305 || cChar
== 0x7F) // DEL
307 m_pCurTokenEnd
= --m_pInputPos
;
316 OUString
SvAddressParser_Impl::reparse(sal_Unicode
const * pBegin
,
317 sal_Unicode
const * pEnd
, bool bAddrSpec
)
319 OUStringBuffer aResult
;
320 TokenType eMode
= TOKEN_ATOM
;
321 bool bEscaped
= false;
322 bool bEndsWithSpace
= false;
324 while (pBegin
< pEnd
)
326 sal_Unicode cChar
= *pBegin
++;
332 aResult
.append(cChar
);
335 else if (cChar
== '"')
338 aResult
.append(cChar
);
341 else if (cChar
== '\\')
344 aResult
.append(cChar
);
348 aResult
.append(cChar
);
354 aResult
.append(cChar
);
357 else if (cChar
== ']')
359 aResult
.append(cChar
);
362 else if (cChar
== '\\')
365 aResult
.append(cChar
);
369 aResult
.append(cChar
);
375 else if (cChar
== '(')
377 else if (cChar
== ')')
382 else if (cChar
== '\\')
387 if (cChar
<= ' ' || cChar
== 0x7F) // DEL
389 if (!bAddrSpec
&& !bEndsWithSpace
)
392 bEndsWithSpace
= true;
395 else if (cChar
== '(')
397 if (!bAddrSpec
&& !bEndsWithSpace
)
400 bEndsWithSpace
= true;
402 eMode
= TOKEN_COMMENT
;
406 bEndsWithSpace
= false;
410 aResult
.append(cChar
);
411 eMode
= TOKEN_QUOTED
;
413 else if (cChar
== '[')
415 aResult
.append(cChar
);
416 eMode
= TOKEN_QUOTED
;
419 aResult
.append(cChar
);
424 return aResult
.makeStringAndClear();
428 OUString
SvAddressParser_Impl::reparseComment(sal_Unicode
const * pBegin
,
429 sal_Unicode
const * pEnd
)
431 OUStringBuffer aResult
;
432 while (pBegin
< pEnd
)
434 sal_Unicode cChar
= *pBegin
++;
437 aResult
.append(cChar
);
439 return aResult
.makeStringAndClear();
442 SvAddressParser_Impl::SvAddressParser_Impl(SvAddressParser
* pParser
,
443 const OUString
& rInput
)
444 : m_pCurTokenBegin(NULL
)
445 , m_pCurTokenEnd(NULL
)
446 , m_pCurTokenContentBegin(NULL
)
447 , m_pCurTokenContentEnd(NULL
)
449 m_pInputPos
= rInput
.getStr();
450 m_pInputEnd
= m_pInputPos
+ rInput
.getLength();
458 m_bRealNameFinished
= true;
459 if (m_eState
== AFTER_LESS
)
470 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
472 if (m_pAddrSpec
->m_bAtFound
473 || m_pAddrSpec
->m_eLastElem
<= ELEMENT_DELIM
)
474 m_pAddrSpec
->reset();
475 addTokenToAddrSpec(ELEMENT_ITEM
);
477 if (!m_bRealNameFinished
&& m_eState
!= AFTER_LESS
)
479 if (m_bCurTokenReparse
)
481 if (!m_pRealNameBegin
)
482 m_pRealNameBegin
= m_pCurTokenBegin
;
483 m_pRealNameEnd
= m_pCurTokenEnd
;
484 m_bRealNameReparse
= true;
486 else if (m_bRealNameReparse
)
487 m_pRealNameEnd
= m_pCurTokenEnd
;
488 else if (!m_pRealNameBegin
)
490 m_pRealNameBegin
= m_pCurTokenBegin
;
491 m_pRealNameContentBegin
= m_pCurTokenContentBegin
;
492 m_pRealNameEnd
= m_pRealNameContentEnd
= m_pCurTokenContentEnd
;
496 m_pRealNameEnd
= m_pCurTokenEnd
;
497 m_bRealNameReparse
= true;
500 m_eType
= TOKEN_ATOM
;
504 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
506 if (m_pAddrSpec
->m_bAtFound
&& m_pAddrSpec
->m_eLastElem
== ELEMENT_DELIM
)
507 addTokenToAddrSpec(ELEMENT_ITEM
);
509 m_pAddrSpec
->reset();
511 addTokenToRealName();
512 m_eType
= TOKEN_ATOM
;
516 if (!m_bRealNameFinished
&& m_eState
!= AFTER_LESS
517 && !m_pFirstCommentBegin
&& m_pCurTokenContentBegin
)
519 m_pFirstCommentBegin
= m_pCurTokenContentBegin
;
520 m_pFirstCommentEnd
= m_pCurTokenContentEnd
;
521 m_bFirstCommentReparse
= m_bCurTokenReparse
;
523 m_eType
= TOKEN_ATOM
;
527 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
529 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_DELIM
)
530 m_pAddrSpec
->reset();
531 addTokenToAddrSpec(ELEMENT_ITEM
);
533 addTokenToRealName();
537 m_eType
= TOKEN_COMMENT
;
543 m_pAddrSpec
->finish();
544 addTokenToRealName();
552 m_aOuterAddrSpec
.finish();
553 if (m_pRealNameBegin
)
554 m_bRealNameFinished
= true;
555 m_pAddrSpec
= &m_aInnerAddrSpec
;
556 m_eState
= AFTER_LESS
;
560 m_aInnerAddrSpec
.finish();
564 m_aOuterAddrSpec
.finish();
565 addTokenToRealName();
571 if (m_eState
== AFTER_LESS
)
573 m_aInnerAddrSpec
.finish();
574 if (m_aInnerAddrSpec
.isValid())
575 m_aOuterAddrSpec
.m_eLastElem
= ELEMENT_END
;
576 m_pAddrSpec
= &m_aOuterAddrSpec
;
577 m_eState
= AFTER_GREATER
;
581 m_aOuterAddrSpec
.finish();
582 addTokenToRealName();
587 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
589 if (!m_pAddrSpec
->m_bAtFound
590 && m_pAddrSpec
->m_eLastElem
== ELEMENT_ITEM
)
592 addTokenToAddrSpec(ELEMENT_DELIM
);
593 m_pAddrSpec
->m_bAtFound
= true;
596 m_pAddrSpec
->reset();
598 addTokenToRealName();
603 if (m_eState
== AFTER_LESS
)
604 if (m_nCurToken
== ',')
606 if (m_aInnerAddrSpec
.m_eLastElem
!= ELEMENT_END
)
607 m_aInnerAddrSpec
.reset();
610 m_aInnerAddrSpec
.finish();
613 if(m_aInnerAddrSpec
.isValid() || (!m_aOuterAddrSpec
.isValid() && m_aInnerAddrSpec
.isPoorlyValid()))
615 m_pAddrSpec
= &m_aInnerAddrSpec
;
617 else if(m_aOuterAddrSpec
.isPoorlyValid())
619 m_pAddrSpec
= &m_aOuterAddrSpec
;
628 OUString aTheAddrSpec
;
629 if (m_pAddrSpec
->m_bReparse
)
630 aTheAddrSpec
= reparse(m_pAddrSpec
->m_pBegin
, m_pAddrSpec
->m_pEnd
, true);
633 sal_Int32 nLen
= ( m_pAddrSpec
->m_pEnd
- m_pAddrSpec
->m_pBegin
);
634 if (nLen
== rInput
.getLength())
635 aTheAddrSpec
= rInput
;
637 aTheAddrSpec
= rInput
.copy( (m_pAddrSpec
->m_pBegin
- rInput
.getStr()),
640 OUString aTheRealName
;
641 if (!m_pRealNameBegin
||
642 (m_pAddrSpec
== &m_aOuterAddrSpec
&&
643 m_pRealNameBegin
== m_aOuterAddrSpec
.m_pBegin
&&
644 m_pRealNameEnd
== m_aOuterAddrSpec
.m_pEnd
&&
645 m_pFirstCommentBegin
))
647 if (!m_pFirstCommentBegin
)
648 aTheRealName
= aTheAddrSpec
;
649 else if (m_bFirstCommentReparse
)
650 aTheRealName
= reparseComment(m_pFirstCommentBegin
,
653 aTheRealName
= rInput
.copy( (m_pFirstCommentBegin
- rInput
.getStr()),
654 (m_pFirstCommentEnd
- m_pFirstCommentBegin
));
656 else if (m_bRealNameReparse
)
657 aTheRealName
= reparse(m_pRealNameBegin
, m_pRealNameEnd
, false);
660 sal_Int32 nLen
= (m_pRealNameContentEnd
- m_pRealNameContentBegin
);
661 if (nLen
== rInput
.getLength())
662 aTheRealName
= rInput
;
664 aTheRealName
= rInput
.copy( (m_pRealNameContentBegin
- rInput
.getStr()), nLen
);
666 if (pParser
->m_bHasFirst
)
667 pParser
->m_aRest
.push_back(new SvAddressEntry_Impl( aTheAddrSpec
,
671 pParser
->m_bHasFirst
= true;
672 pParser
->m_aFirst
.m_aAddrSpec
= aTheAddrSpec
;
673 pParser
->m_aFirst
.m_aRealName
= aTheRealName
;
686 m_aOuterAddrSpec
.reset();
687 resetRealNameAndFirstComment();
688 m_eState
= BEFORE_LESS
;
693 m_aOuterAddrSpec
.finish();
694 addTokenToRealName();
698 m_aInnerAddrSpec
.reset();
704 m_eType
= TOKEN_QUOTED
;
708 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
710 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_DELIM
)
711 addTokenToAddrSpec(ELEMENT_DELIM
);
713 m_pAddrSpec
->reset();
715 addTokenToRealName();
719 m_eType
= TOKEN_DOMAIN
;
725 SvAddressParser::SvAddressParser(const OUString
& rInput
)
728 SvAddressParser_Impl
aDoParse(this, rInput
);
731 SvAddressParser::~SvAddressParser()
733 for ( size_t i
= m_aRest
.size(); i
> 0; )
734 delete m_aRest
[ --i
];
738 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */