1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <rtl/ustrbuf.hxx>
21 #include <svl/adrparse.hxx>
26 enum ElementType
{ ELEMENT_START
, ELEMENT_DELIM
, ELEMENT_ITEM
, ELEMENT_END
};
30 sal_Unicode
const * m_pBegin
;
31 sal_Unicode
const * m_pEnd
;
32 ElementType m_eLastElem
;
36 ParsedAddrSpec() { reset(); }
38 bool isPoorlyValid() const { return m_eLastElem
>= ELEMENT_ITEM
; }
40 bool isValid() const { return isPoorlyValid() && m_bAtFound
; }
47 void ParsedAddrSpec::reset()
51 m_eLastElem
= ELEMENT_START
;
56 void ParsedAddrSpec::finish()
59 m_eLastElem
= ELEMENT_END
;
66 class SvAddressParser_Impl
68 enum State
{ BEFORE_COLON
, BEFORE_LESS
, AFTER_LESS
, AFTER_GREATER
};
70 enum TokenType
: sal_uInt32
{
71 TOKEN_QUOTED
= 0x80000000, TOKEN_DOMAIN
, TOKEN_COMMENT
, TOKEN_ATOM
};
73 sal_Unicode
const * m_pInputPos
;
74 sal_Unicode
const * m_pInputEnd
;
75 sal_uInt32 m_nCurToken
;
76 sal_Unicode
const * m_pCurTokenBegin
;
77 sal_Unicode
const * m_pCurTokenEnd
;
78 sal_Unicode
const * m_pCurTokenContentBegin
;
79 sal_Unicode
const * m_pCurTokenContentEnd
;
80 bool m_bCurTokenReparse
;
81 ParsedAddrSpec m_aOuterAddrSpec
;
82 ParsedAddrSpec m_aInnerAddrSpec
;
83 ParsedAddrSpec
* m_pAddrSpec
;
84 sal_Unicode
const * m_pRealNameBegin
;
85 sal_Unicode
const * m_pRealNameEnd
;
86 sal_Unicode
const * m_pRealNameContentBegin
;
87 sal_Unicode
const * m_pRealNameContentEnd
;
88 bool m_bRealNameReparse
;
89 bool m_bRealNameFinished
;
90 sal_Unicode
const * m_pFirstCommentBegin
;
91 sal_Unicode
const * m_pFirstCommentEnd
;
92 bool m_bFirstCommentReparse
;
96 inline void resetRealNameAndFirstComment();
100 void addTokenToAddrSpec(ElementType eTokenElem
);
102 inline void addTokenToRealName();
106 static OUString
reparse(sal_Unicode
const * pBegin
,
107 sal_Unicode
const * pEnd
, bool bAddrSpec
);
109 static OUString
reparseComment(sal_Unicode
const * pBegin
,
110 sal_Unicode
const * pEnd
);
113 SvAddressParser_Impl(SvAddressParser
* pParser
, const OUString
& rIn
);
116 inline void SvAddressParser_Impl::resetRealNameAndFirstComment()
118 m_pRealNameBegin
= nullptr;
119 m_pRealNameEnd
= nullptr;
120 m_pRealNameContentBegin
= nullptr;
121 m_pRealNameContentEnd
= nullptr;
122 m_bRealNameReparse
= false;
123 m_bRealNameFinished
= false;
124 m_pFirstCommentBegin
= nullptr;
125 m_pFirstCommentEnd
= nullptr;
126 m_bFirstCommentReparse
= false;
129 inline void SvAddressParser_Impl::reset()
131 m_aOuterAddrSpec
.reset();
132 m_aInnerAddrSpec
.reset();
133 m_pAddrSpec
= &m_aOuterAddrSpec
;
134 resetRealNameAndFirstComment();
135 m_eState
= BEFORE_COLON
;
136 m_eType
= TOKEN_ATOM
;
139 void SvAddressParser_Impl::addTokenToAddrSpec(ElementType eTokenElem
)
141 if (!m_pAddrSpec
->m_pBegin
)
142 m_pAddrSpec
->m_pBegin
= m_pCurTokenBegin
;
143 else if (m_pAddrSpec
->m_pEnd
< m_pCurTokenBegin
)
144 m_pAddrSpec
->m_bReparse
= true;
145 m_pAddrSpec
->m_pEnd
= m_pCurTokenEnd
;
146 m_pAddrSpec
->m_eLastElem
= eTokenElem
;
149 inline void SvAddressParser_Impl::addTokenToRealName()
151 if (!m_bRealNameFinished
&& m_eState
!= AFTER_LESS
)
153 if (!m_pRealNameBegin
)
154 m_pRealNameBegin
= m_pRealNameContentBegin
= m_pCurTokenBegin
;
155 else if (m_pRealNameEnd
< m_pCurTokenBegin
- 1
156 || (m_pRealNameEnd
== m_pCurTokenBegin
- 1
157 && *m_pRealNameEnd
!= ' '))
158 m_bRealNameReparse
= true;
159 m_pRealNameEnd
= m_pRealNameContentEnd
= m_pCurTokenEnd
;
164 // SvAddressParser_Impl
167 bool SvAddressParser_Impl::readToken()
169 m_nCurToken
= m_eType
;
170 m_bCurTokenReparse
= false;
175 m_pCurTokenBegin
= m_pInputPos
- 1;
176 m_pCurTokenContentBegin
= m_pInputPos
;
177 bool bEscaped
= false;
180 if (m_pInputPos
>= m_pInputEnd
)
182 sal_Unicode cChar
= *m_pInputPos
++;
185 m_bCurTokenReparse
= true;
188 else if (cChar
== '"')
190 m_pCurTokenEnd
= m_pInputPos
;
191 m_pCurTokenContentEnd
= m_pInputPos
- 1;
194 else if (cChar
== '\\')
201 m_pCurTokenBegin
= m_pInputPos
- 1;
202 m_pCurTokenContentBegin
= m_pInputPos
;
203 bool bEscaped
= false;
206 if (m_pInputPos
>= m_pInputEnd
)
208 sal_Unicode cChar
= *m_pInputPos
++;
211 else if (cChar
== ']')
213 m_pCurTokenEnd
= m_pInputPos
;
216 else if (cChar
== '\\')
223 m_pCurTokenBegin
= m_pInputPos
- 1;
224 m_pCurTokenContentBegin
= nullptr;
225 m_pCurTokenContentEnd
= nullptr;
226 bool bEscaped
= false;
230 if (m_pInputPos
>= m_pInputEnd
)
232 sal_Unicode cChar
= *m_pInputPos
++;
235 m_bCurTokenReparse
= true;
236 m_pCurTokenContentEnd
= m_pInputPos
;
239 else if (cChar
== '(')
241 if (!m_pCurTokenContentBegin
)
242 m_pCurTokenContentBegin
= m_pInputPos
- 1;
243 m_pCurTokenContentEnd
= m_pInputPos
;
246 else if (cChar
== ')')
249 m_pCurTokenContentEnd
= m_pInputPos
;
254 else if (cChar
== '\\')
256 if (!m_pCurTokenContentBegin
)
257 m_pCurTokenContentBegin
= m_pInputPos
- 1;
260 else if (cChar
> ' ' && cChar
!= 0x7F) // DEL
262 if (!m_pCurTokenContentBegin
)
263 m_pCurTokenContentBegin
= m_pInputPos
- 1;
264 m_pCurTokenContentEnd
= m_pInputPos
;
274 if (m_pInputPos
>= m_pInputEnd
)
276 cChar
= *m_pInputPos
++;
277 if (cChar
> ' ' && cChar
!= 0x7F) // DEL
280 m_pCurTokenBegin
= m_pInputPos
- 1;
281 if (cChar
== '"' || cChar
== '(' || cChar
== ')' || cChar
== ','
282 || cChar
== '.' || cChar
== ':' || cChar
== ';'
283 || cChar
== '<' || cChar
== '>' || cChar
== '@'
284 || cChar
== '[' || cChar
== '\\' || cChar
== ']')
287 m_pCurTokenEnd
= m_pInputPos
;
293 if (m_pInputPos
>= m_pInputEnd
)
295 m_pCurTokenEnd
= m_pInputPos
;
298 cChar
= *m_pInputPos
++;
299 if (cChar
<= ' ' || cChar
== '"' || cChar
== '('
300 || cChar
== ')' || cChar
== ',' || cChar
== '.'
301 || cChar
== ':' || cChar
== ';' || cChar
== '<'
302 || cChar
== '>' || cChar
== '@' || cChar
== '['
303 || cChar
== '\\' || cChar
== ']'
304 || cChar
== 0x7F) // DEL
306 m_pCurTokenEnd
= --m_pInputPos
;
315 OUString
SvAddressParser_Impl::reparse(sal_Unicode
const * pBegin
,
316 sal_Unicode
const * pEnd
, bool bAddrSpec
)
318 OUStringBuffer aResult
;
319 TokenType eMode
= TOKEN_ATOM
;
320 bool bEscaped
= false;
321 bool bEndsWithSpace
= false;
323 while (pBegin
< pEnd
)
325 sal_Unicode cChar
= *pBegin
++;
331 aResult
.append(cChar
);
334 else if (cChar
== '"')
337 aResult
.append(cChar
);
340 else if (cChar
== '\\')
343 aResult
.append(cChar
);
347 aResult
.append(cChar
);
353 aResult
.append(cChar
);
356 else if (cChar
== ']')
358 aResult
.append(cChar
);
361 else if (cChar
== '\\')
364 aResult
.append(cChar
);
368 aResult
.append(cChar
);
374 else if (cChar
== '(')
376 else if (cChar
== ')')
381 else if (cChar
== '\\')
386 if (cChar
<= ' ' || cChar
== 0x7F) // DEL
388 if (!bAddrSpec
&& !bEndsWithSpace
)
391 bEndsWithSpace
= true;
394 else if (cChar
== '(')
396 if (!bAddrSpec
&& !bEndsWithSpace
)
399 bEndsWithSpace
= true;
401 eMode
= TOKEN_COMMENT
;
405 bEndsWithSpace
= false;
409 aResult
.append(cChar
);
410 eMode
= TOKEN_QUOTED
;
412 else if (cChar
== '[')
414 aResult
.append(cChar
);
415 eMode
= TOKEN_QUOTED
;
418 aResult
.append(cChar
);
423 return aResult
.makeStringAndClear();
427 OUString
SvAddressParser_Impl::reparseComment(sal_Unicode
const * pBegin
,
428 sal_Unicode
const * pEnd
)
430 OUStringBuffer aResult
;
431 while (pBegin
< pEnd
)
433 sal_Unicode cChar
= *pBegin
++;
436 aResult
.append(cChar
);
438 return aResult
.makeStringAndClear();
441 SvAddressParser_Impl::SvAddressParser_Impl(SvAddressParser
* pParser
,
442 const OUString
& rInput
)
443 : m_pCurTokenBegin(nullptr)
444 , m_pCurTokenEnd(nullptr)
445 , m_pCurTokenContentBegin(nullptr)
446 , m_pCurTokenContentEnd(nullptr)
448 m_pInputPos
= rInput
.getStr();
449 m_pInputEnd
= m_pInputPos
+ rInput
.getLength();
457 m_bRealNameFinished
= true;
458 if (m_eState
== AFTER_LESS
)
469 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
471 if (m_pAddrSpec
->m_bAtFound
472 || m_pAddrSpec
->m_eLastElem
<= ELEMENT_DELIM
)
473 m_pAddrSpec
->reset();
474 addTokenToAddrSpec(ELEMENT_ITEM
);
476 if (!m_bRealNameFinished
&& m_eState
!= AFTER_LESS
)
478 if (m_bCurTokenReparse
)
480 if (!m_pRealNameBegin
)
481 m_pRealNameBegin
= m_pCurTokenBegin
;
482 m_pRealNameEnd
= m_pCurTokenEnd
;
483 m_bRealNameReparse
= true;
485 else if (m_bRealNameReparse
)
486 m_pRealNameEnd
= m_pCurTokenEnd
;
487 else if (!m_pRealNameBegin
)
489 m_pRealNameBegin
= m_pCurTokenBegin
;
490 m_pRealNameContentBegin
= m_pCurTokenContentBegin
;
491 m_pRealNameEnd
= m_pRealNameContentEnd
= m_pCurTokenContentEnd
;
495 m_pRealNameEnd
= m_pCurTokenEnd
;
496 m_bRealNameReparse
= true;
499 m_eType
= TOKEN_ATOM
;
503 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
505 if (m_pAddrSpec
->m_bAtFound
&& m_pAddrSpec
->m_eLastElem
== ELEMENT_DELIM
)
506 addTokenToAddrSpec(ELEMENT_ITEM
);
508 m_pAddrSpec
->reset();
510 addTokenToRealName();
511 m_eType
= TOKEN_ATOM
;
515 if (!m_bRealNameFinished
&& m_eState
!= AFTER_LESS
516 && !m_pFirstCommentBegin
&& m_pCurTokenContentBegin
)
518 m_pFirstCommentBegin
= m_pCurTokenContentBegin
;
519 m_pFirstCommentEnd
= m_pCurTokenContentEnd
;
520 m_bFirstCommentReparse
= m_bCurTokenReparse
;
522 m_eType
= TOKEN_ATOM
;
526 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
528 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_DELIM
)
529 m_pAddrSpec
->reset();
530 addTokenToAddrSpec(ELEMENT_ITEM
);
532 addTokenToRealName();
536 m_eType
= TOKEN_COMMENT
;
542 m_pAddrSpec
->finish();
543 addTokenToRealName();
551 m_aOuterAddrSpec
.finish();
552 if (m_pRealNameBegin
)
553 m_bRealNameFinished
= true;
554 m_pAddrSpec
= &m_aInnerAddrSpec
;
555 m_eState
= AFTER_LESS
;
559 m_aInnerAddrSpec
.finish();
563 m_aOuterAddrSpec
.finish();
564 addTokenToRealName();
570 if (m_eState
== AFTER_LESS
)
572 m_aInnerAddrSpec
.finish();
573 if (m_aInnerAddrSpec
.isValid())
574 m_aOuterAddrSpec
.m_eLastElem
= ELEMENT_END
;
575 m_pAddrSpec
= &m_aOuterAddrSpec
;
576 m_eState
= AFTER_GREATER
;
580 m_aOuterAddrSpec
.finish();
581 addTokenToRealName();
586 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
588 if (!m_pAddrSpec
->m_bAtFound
589 && m_pAddrSpec
->m_eLastElem
== ELEMENT_ITEM
)
591 addTokenToAddrSpec(ELEMENT_DELIM
);
592 m_pAddrSpec
->m_bAtFound
= true;
595 m_pAddrSpec
->reset();
597 addTokenToRealName();
602 if (m_eState
== AFTER_LESS
)
603 if (m_nCurToken
== ',')
605 if (m_aInnerAddrSpec
.m_eLastElem
!= ELEMENT_END
)
606 m_aInnerAddrSpec
.reset();
609 m_aInnerAddrSpec
.finish();
612 if(m_aInnerAddrSpec
.isValid() || (!m_aOuterAddrSpec
.isValid() && m_aInnerAddrSpec
.isPoorlyValid()))
614 m_pAddrSpec
= &m_aInnerAddrSpec
;
616 else if(m_aOuterAddrSpec
.isPoorlyValid())
618 m_pAddrSpec
= &m_aOuterAddrSpec
;
622 m_pAddrSpec
= nullptr;
627 OUString aTheAddrSpec
;
628 if (m_pAddrSpec
->m_bReparse
)
629 aTheAddrSpec
= reparse(m_pAddrSpec
->m_pBegin
, m_pAddrSpec
->m_pEnd
, true);
632 sal_Int32 nLen
= m_pAddrSpec
->m_pEnd
- m_pAddrSpec
->m_pBegin
;
633 if (nLen
== rInput
.getLength())
634 aTheAddrSpec
= rInput
;
636 aTheAddrSpec
= rInput
.copy( (m_pAddrSpec
->m_pBegin
- rInput
.getStr()),
639 OUString aTheRealName
;
640 if (!m_pRealNameBegin
||
641 (m_pAddrSpec
== &m_aOuterAddrSpec
&&
642 m_pRealNameBegin
== m_aOuterAddrSpec
.m_pBegin
&&
643 m_pRealNameEnd
== m_aOuterAddrSpec
.m_pEnd
&&
644 m_pFirstCommentBegin
))
646 if (!m_pFirstCommentBegin
)
647 aTheRealName
= aTheAddrSpec
;
648 else if (m_bFirstCommentReparse
)
649 aTheRealName
= reparseComment(m_pFirstCommentBegin
,
652 aTheRealName
= rInput
.copy( (m_pFirstCommentBegin
- rInput
.getStr()),
653 (m_pFirstCommentEnd
- m_pFirstCommentBegin
));
655 else if (m_bRealNameReparse
)
656 aTheRealName
= reparse(m_pRealNameBegin
, m_pRealNameEnd
, false);
659 sal_Int32 nLen
= m_pRealNameContentEnd
- m_pRealNameContentBegin
;
660 if (nLen
== rInput
.getLength())
661 aTheRealName
= rInput
;
663 aTheRealName
= rInput
.copy( (m_pRealNameContentBegin
- rInput
.getStr()), nLen
);
665 pParser
->m_vAddresses
.emplace_back( aTheAddrSpec
);
677 m_aOuterAddrSpec
.reset();
678 resetRealNameAndFirstComment();
679 m_eState
= BEFORE_LESS
;
684 m_aOuterAddrSpec
.finish();
685 addTokenToRealName();
689 m_aInnerAddrSpec
.reset();
695 m_eType
= TOKEN_QUOTED
;
699 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_END
)
701 if (m_pAddrSpec
->m_eLastElem
!= ELEMENT_DELIM
)
702 addTokenToAddrSpec(ELEMENT_DELIM
);
704 m_pAddrSpec
->reset();
706 addTokenToRealName();
710 m_eType
= TOKEN_DOMAIN
;
716 SvAddressParser::SvAddressParser(const OUString
& rInput
)
718 SvAddressParser_Impl
aDoParse(this, rInput
);
721 SvAddressParser::~SvAddressParser()
725 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */