nss: upgrade to release 3.73
[LibreOffice.git] / svl / source / misc / adrparse.cxx
blob16b3aa4c7196ab69f839aefa5b549d5b074f8359
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <rtl/ustrbuf.hxx>
21 #include <svl/adrparse.hxx>
23 namespace
26 enum ElementType { ELEMENT_START, ELEMENT_DELIM, ELEMENT_ITEM, ELEMENT_END };
28 struct ParsedAddrSpec
30 sal_Unicode const * m_pBegin;
31 sal_Unicode const * m_pEnd;
32 ElementType m_eLastElem;
33 bool m_bAtFound;
34 bool m_bReparse;
36 ParsedAddrSpec() { reset(); }
38 bool isPoorlyValid() const { return m_eLastElem >= ELEMENT_ITEM; }
40 bool isValid() const { return isPoorlyValid() && m_bAtFound; }
42 void reset();
44 void finish();
47 void ParsedAddrSpec::reset()
49 m_pBegin = nullptr;
50 m_pEnd = nullptr;
51 m_eLastElem = ELEMENT_START;
52 m_bAtFound = false;
53 m_bReparse = false;
56 void ParsedAddrSpec::finish()
58 if (isPoorlyValid())
59 m_eLastElem = ELEMENT_END;
60 else
61 reset();
66 class SvAddressParser_Impl
68 enum State { BEFORE_COLON, BEFORE_LESS, AFTER_LESS, AFTER_GREATER };
70 enum TokenType: sal_uInt32 {
71 TOKEN_QUOTED = 0x80000000, TOKEN_DOMAIN, TOKEN_COMMENT, TOKEN_ATOM };
73 sal_Unicode const * m_pInputPos;
74 sal_Unicode const * m_pInputEnd;
75 sal_uInt32 m_nCurToken;
76 sal_Unicode const * m_pCurTokenBegin;
77 sal_Unicode const * m_pCurTokenEnd;
78 sal_Unicode const * m_pCurTokenContentBegin;
79 sal_Unicode const * m_pCurTokenContentEnd;
80 bool m_bCurTokenReparse;
81 ParsedAddrSpec m_aOuterAddrSpec;
82 ParsedAddrSpec m_aInnerAddrSpec;
83 ParsedAddrSpec * m_pAddrSpec;
84 sal_Unicode const * m_pRealNameBegin;
85 sal_Unicode const * m_pRealNameEnd;
86 sal_Unicode const * m_pRealNameContentBegin;
87 sal_Unicode const * m_pRealNameContentEnd;
88 bool m_bRealNameReparse;
89 bool m_bRealNameFinished;
90 sal_Unicode const * m_pFirstCommentBegin;
91 sal_Unicode const * m_pFirstCommentEnd;
92 bool m_bFirstCommentReparse;
93 State m_eState;
94 TokenType m_eType;
96 inline void resetRealNameAndFirstComment();
98 inline void reset();
100 void addTokenToAddrSpec(ElementType eTokenElem);
102 inline void addTokenToRealName();
104 bool readToken();
106 static OUString reparse(sal_Unicode const * pBegin,
107 sal_Unicode const * pEnd, bool bAddrSpec);
109 static OUString reparseComment(sal_Unicode const * pBegin,
110 sal_Unicode const * pEnd);
112 public:
113 SvAddressParser_Impl(SvAddressParser * pParser, const OUString& rIn);
116 inline void SvAddressParser_Impl::resetRealNameAndFirstComment()
118 m_pRealNameBegin = nullptr;
119 m_pRealNameEnd = nullptr;
120 m_pRealNameContentBegin = nullptr;
121 m_pRealNameContentEnd = nullptr;
122 m_bRealNameReparse = false;
123 m_bRealNameFinished = false;
124 m_pFirstCommentBegin = nullptr;
125 m_pFirstCommentEnd = nullptr;
126 m_bFirstCommentReparse = false;
129 inline void SvAddressParser_Impl::reset()
131 m_aOuterAddrSpec.reset();
132 m_aInnerAddrSpec.reset();
133 m_pAddrSpec = &m_aOuterAddrSpec;
134 resetRealNameAndFirstComment();
135 m_eState = BEFORE_COLON;
136 m_eType = TOKEN_ATOM;
139 void SvAddressParser_Impl::addTokenToAddrSpec(ElementType eTokenElem)
141 if (!m_pAddrSpec->m_pBegin)
142 m_pAddrSpec->m_pBegin = m_pCurTokenBegin;
143 else if (m_pAddrSpec->m_pEnd < m_pCurTokenBegin)
144 m_pAddrSpec->m_bReparse = true;
145 m_pAddrSpec->m_pEnd = m_pCurTokenEnd;
146 m_pAddrSpec->m_eLastElem = eTokenElem;
149 inline void SvAddressParser_Impl::addTokenToRealName()
151 if (!m_bRealNameFinished && m_eState != AFTER_LESS)
153 if (!m_pRealNameBegin)
154 m_pRealNameBegin = m_pRealNameContentBegin = m_pCurTokenBegin;
155 else if (m_pRealNameEnd < m_pCurTokenBegin - 1
156 || (m_pRealNameEnd == m_pCurTokenBegin - 1
157 && *m_pRealNameEnd != ' '))
158 m_bRealNameReparse = true;
159 m_pRealNameEnd = m_pRealNameContentEnd = m_pCurTokenEnd;
164 // SvAddressParser_Impl
167 bool SvAddressParser_Impl::readToken()
169 m_nCurToken = m_eType;
170 m_bCurTokenReparse = false;
171 switch (m_eType)
173 case TOKEN_QUOTED:
175 m_pCurTokenBegin = m_pInputPos - 1;
176 m_pCurTokenContentBegin = m_pInputPos;
177 bool bEscaped = false;
178 for (;;)
180 if (m_pInputPos >= m_pInputEnd)
181 return false;
182 sal_Unicode cChar = *m_pInputPos++;
183 if (bEscaped)
185 m_bCurTokenReparse = true;
186 bEscaped = false;
188 else if (cChar == '"')
190 m_pCurTokenEnd = m_pInputPos;
191 m_pCurTokenContentEnd = m_pInputPos - 1;
192 return true;
194 else if (cChar == '\\')
195 bEscaped = true;
199 case TOKEN_DOMAIN:
201 m_pCurTokenBegin = m_pInputPos - 1;
202 m_pCurTokenContentBegin = m_pInputPos;
203 bool bEscaped = false;
204 for (;;)
206 if (m_pInputPos >= m_pInputEnd)
207 return false;
208 sal_Unicode cChar = *m_pInputPos++;
209 if (bEscaped)
210 bEscaped = false;
211 else if (cChar == ']')
213 m_pCurTokenEnd = m_pInputPos;
214 return true;
216 else if (cChar == '\\')
217 bEscaped = true;
221 case TOKEN_COMMENT:
223 m_pCurTokenBegin = m_pInputPos - 1;
224 m_pCurTokenContentBegin = nullptr;
225 m_pCurTokenContentEnd = nullptr;
226 bool bEscaped = false;
227 int nLevel = 0;
228 for (;;)
230 if (m_pInputPos >= m_pInputEnd)
231 return false;
232 sal_Unicode cChar = *m_pInputPos++;
233 if (bEscaped)
235 m_bCurTokenReparse = true;
236 m_pCurTokenContentEnd = m_pInputPos;
237 bEscaped = false;
239 else if (cChar == '(')
241 if (!m_pCurTokenContentBegin)
242 m_pCurTokenContentBegin = m_pInputPos - 1;
243 m_pCurTokenContentEnd = m_pInputPos;
244 ++nLevel;
246 else if (cChar == ')')
247 if (nLevel)
249 m_pCurTokenContentEnd = m_pInputPos;
250 --nLevel;
252 else
253 return true;
254 else if (cChar == '\\')
256 if (!m_pCurTokenContentBegin)
257 m_pCurTokenContentBegin = m_pInputPos - 1;
258 bEscaped = true;
260 else if (cChar > ' ' && cChar != 0x7F) // DEL
262 if (!m_pCurTokenContentBegin)
263 m_pCurTokenContentBegin = m_pInputPos - 1;
264 m_pCurTokenContentEnd = m_pInputPos;
269 default:
271 sal_Unicode cChar;
272 for (;;)
274 if (m_pInputPos >= m_pInputEnd)
275 return false;
276 cChar = *m_pInputPos++;
277 if (cChar > ' ' && cChar != 0x7F) // DEL
278 break;
280 m_pCurTokenBegin = m_pInputPos - 1;
281 if (cChar == '"' || cChar == '(' || cChar == ')' || cChar == ','
282 || cChar == '.' || cChar == ':' || cChar == ';'
283 || cChar == '<' || cChar == '>' || cChar == '@'
284 || cChar == '[' || cChar == '\\' || cChar == ']')
286 m_nCurToken = cChar;
287 m_pCurTokenEnd = m_pInputPos;
288 return true;
290 else
291 for (;;)
293 if (m_pInputPos >= m_pInputEnd)
295 m_pCurTokenEnd = m_pInputPos;
296 return true;
298 cChar = *m_pInputPos++;
299 if (cChar <= ' ' || cChar == '"' || cChar == '('
300 || cChar == ')' || cChar == ',' || cChar == '.'
301 || cChar == ':' || cChar == ';' || cChar == '<'
302 || cChar == '>' || cChar == '@' || cChar == '['
303 || cChar == '\\' || cChar == ']'
304 || cChar == 0x7F) // DEL
306 m_pCurTokenEnd = --m_pInputPos;
307 return true;
314 // static
315 OUString SvAddressParser_Impl::reparse(sal_Unicode const * pBegin,
316 sal_Unicode const * pEnd, bool bAddrSpec)
318 OUStringBuffer aResult;
319 TokenType eMode = TOKEN_ATOM;
320 bool bEscaped = false;
321 bool bEndsWithSpace = false;
322 int nLevel = 0;
323 while (pBegin < pEnd)
325 sal_Unicode cChar = *pBegin++;
326 switch (eMode)
328 case TOKEN_QUOTED:
329 if (bEscaped)
331 aResult.append(cChar);
332 bEscaped = false;
334 else if (cChar == '"')
336 if (bAddrSpec)
337 aResult.append(cChar);
338 eMode = TOKEN_ATOM;
340 else if (cChar == '\\')
342 if (bAddrSpec)
343 aResult.append(cChar);
344 bEscaped = true;
346 else
347 aResult.append(cChar);
348 break;
350 case TOKEN_DOMAIN:
351 if (bEscaped)
353 aResult.append(cChar);
354 bEscaped = false;
356 else if (cChar == ']')
358 aResult.append(cChar);
359 eMode = TOKEN_ATOM;
361 else if (cChar == '\\')
363 if (bAddrSpec)
364 aResult.append(cChar);
365 bEscaped = true;
367 else
368 aResult.append(cChar);
369 break;
371 case TOKEN_COMMENT:
372 if (bEscaped)
373 bEscaped = false;
374 else if (cChar == '(')
375 ++nLevel;
376 else if (cChar == ')')
377 if (nLevel)
378 --nLevel;
379 else
380 eMode = TOKEN_ATOM;
381 else if (cChar == '\\')
382 bEscaped = true;
383 break;
385 case TOKEN_ATOM:
386 if (cChar <= ' ' || cChar == 0x7F) // DEL
388 if (!bAddrSpec && !bEndsWithSpace)
390 aResult.append(' ');
391 bEndsWithSpace = true;
394 else if (cChar == '(')
396 if (!bAddrSpec && !bEndsWithSpace)
398 aResult.append(' ');
399 bEndsWithSpace = true;
401 eMode = TOKEN_COMMENT;
403 else
405 bEndsWithSpace = false;
406 if (cChar == '"')
408 if (bAddrSpec)
409 aResult.append(cChar);
410 eMode = TOKEN_QUOTED;
412 else if (cChar == '[')
414 aResult.append(cChar);
415 eMode = TOKEN_QUOTED;
417 else
418 aResult.append(cChar);
420 break;
423 return aResult.makeStringAndClear();
426 // static
427 OUString SvAddressParser_Impl::reparseComment(sal_Unicode const * pBegin,
428 sal_Unicode const * pEnd)
430 OUStringBuffer aResult;
431 while (pBegin < pEnd)
433 sal_Unicode cChar = *pBegin++;
434 if (cChar == '\\')
435 cChar = *pBegin++;
436 aResult.append(cChar);
438 return aResult.makeStringAndClear();
441 SvAddressParser_Impl::SvAddressParser_Impl(SvAddressParser * pParser,
442 const OUString& rInput)
443 : m_pCurTokenBegin(nullptr)
444 , m_pCurTokenEnd(nullptr)
445 , m_pCurTokenContentBegin(nullptr)
446 , m_pCurTokenContentEnd(nullptr)
448 m_pInputPos = rInput.getStr();
449 m_pInputEnd = m_pInputPos + rInput.getLength();
451 reset();
452 bool bDone = false;
453 for (;;)
455 if (!readToken())
457 m_bRealNameFinished = true;
458 if (m_eState == AFTER_LESS)
459 m_nCurToken = '>';
460 else
462 m_nCurToken = ',';
463 bDone = true;
466 switch (m_nCurToken)
468 case TOKEN_QUOTED:
469 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
471 if (m_pAddrSpec->m_bAtFound
472 || m_pAddrSpec->m_eLastElem <= ELEMENT_DELIM)
473 m_pAddrSpec->reset();
474 addTokenToAddrSpec(ELEMENT_ITEM);
476 if (!m_bRealNameFinished && m_eState != AFTER_LESS)
478 if (m_bCurTokenReparse)
480 if (!m_pRealNameBegin)
481 m_pRealNameBegin = m_pCurTokenBegin;
482 m_pRealNameEnd = m_pCurTokenEnd;
483 m_bRealNameReparse = true;
485 else if (m_bRealNameReparse)
486 m_pRealNameEnd = m_pCurTokenEnd;
487 else if (!m_pRealNameBegin)
489 m_pRealNameBegin = m_pCurTokenBegin;
490 m_pRealNameContentBegin = m_pCurTokenContentBegin;
491 m_pRealNameEnd = m_pRealNameContentEnd = m_pCurTokenContentEnd;
493 else
495 m_pRealNameEnd = m_pCurTokenEnd;
496 m_bRealNameReparse = true;
499 m_eType = TOKEN_ATOM;
500 break;
502 case TOKEN_DOMAIN:
503 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
505 if (m_pAddrSpec->m_bAtFound && m_pAddrSpec->m_eLastElem == ELEMENT_DELIM)
506 addTokenToAddrSpec(ELEMENT_ITEM);
507 else
508 m_pAddrSpec->reset();
510 addTokenToRealName();
511 m_eType = TOKEN_ATOM;
512 break;
514 case TOKEN_COMMENT:
515 if (!m_bRealNameFinished && m_eState != AFTER_LESS
516 && !m_pFirstCommentBegin && m_pCurTokenContentBegin)
518 m_pFirstCommentBegin = m_pCurTokenContentBegin;
519 m_pFirstCommentEnd = m_pCurTokenContentEnd;
520 m_bFirstCommentReparse = m_bCurTokenReparse;
522 m_eType = TOKEN_ATOM;
523 break;
525 case TOKEN_ATOM:
526 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
528 if (m_pAddrSpec->m_eLastElem != ELEMENT_DELIM)
529 m_pAddrSpec->reset();
530 addTokenToAddrSpec(ELEMENT_ITEM);
532 addTokenToRealName();
533 break;
535 case '(':
536 m_eType = TOKEN_COMMENT;
537 break;
539 case ')':
540 case '\\':
541 case ']':
542 m_pAddrSpec->finish();
543 addTokenToRealName();
544 break;
546 case '<':
547 switch (m_eState)
549 case BEFORE_COLON:
550 case BEFORE_LESS:
551 m_aOuterAddrSpec.finish();
552 if (m_pRealNameBegin)
553 m_bRealNameFinished = true;
554 m_pAddrSpec = &m_aInnerAddrSpec;
555 m_eState = AFTER_LESS;
556 break;
558 case AFTER_LESS:
559 m_aInnerAddrSpec.finish();
560 break;
562 case AFTER_GREATER:
563 m_aOuterAddrSpec.finish();
564 addTokenToRealName();
565 break;
567 break;
569 case '>':
570 if (m_eState == AFTER_LESS)
572 m_aInnerAddrSpec.finish();
573 if (m_aInnerAddrSpec.isValid())
574 m_aOuterAddrSpec.m_eLastElem = ELEMENT_END;
575 m_pAddrSpec = &m_aOuterAddrSpec;
576 m_eState = AFTER_GREATER;
578 else
580 m_aOuterAddrSpec.finish();
581 addTokenToRealName();
583 break;
585 case '@':
586 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
588 if (!m_pAddrSpec->m_bAtFound
589 && m_pAddrSpec->m_eLastElem == ELEMENT_ITEM)
591 addTokenToAddrSpec(ELEMENT_DELIM);
592 m_pAddrSpec->m_bAtFound = true;
594 else
595 m_pAddrSpec->reset();
597 addTokenToRealName();
598 break;
600 case ',':
601 case ';':
602 if (m_eState == AFTER_LESS)
603 if (m_nCurToken == ',')
605 if (m_aInnerAddrSpec.m_eLastElem != ELEMENT_END)
606 m_aInnerAddrSpec.reset();
608 else
609 m_aInnerAddrSpec.finish();
610 else
612 if(m_aInnerAddrSpec.isValid() || (!m_aOuterAddrSpec.isValid() && m_aInnerAddrSpec.isPoorlyValid()))
614 m_pAddrSpec = &m_aInnerAddrSpec;
616 else if(m_aOuterAddrSpec.isPoorlyValid())
618 m_pAddrSpec = &m_aOuterAddrSpec;
620 else
622 m_pAddrSpec = nullptr;
625 if (m_pAddrSpec)
627 OUString aTheAddrSpec;
628 if (m_pAddrSpec->m_bReparse)
629 aTheAddrSpec = reparse(m_pAddrSpec->m_pBegin, m_pAddrSpec->m_pEnd, true);
630 else
632 sal_Int32 nLen = m_pAddrSpec->m_pEnd - m_pAddrSpec->m_pBegin;
633 if (nLen == rInput.getLength())
634 aTheAddrSpec = rInput;
635 else
636 aTheAddrSpec = rInput.copy( (m_pAddrSpec->m_pBegin - rInput.getStr()),
637 nLen);
639 OUString aTheRealName;
640 if (!m_pRealNameBegin ||
641 (m_pAddrSpec == &m_aOuterAddrSpec &&
642 m_pRealNameBegin == m_aOuterAddrSpec.m_pBegin &&
643 m_pRealNameEnd == m_aOuterAddrSpec.m_pEnd &&
644 m_pFirstCommentBegin))
646 if (!m_pFirstCommentBegin)
647 aTheRealName = aTheAddrSpec;
648 else if (m_bFirstCommentReparse)
649 aTheRealName = reparseComment(m_pFirstCommentBegin,
650 m_pFirstCommentEnd);
651 else
652 aTheRealName = rInput.copy( (m_pFirstCommentBegin - rInput.getStr()),
653 (m_pFirstCommentEnd - m_pFirstCommentBegin));
655 else if (m_bRealNameReparse)
656 aTheRealName = reparse(m_pRealNameBegin, m_pRealNameEnd, false);
657 else
659 sal_Int32 nLen = m_pRealNameContentEnd - m_pRealNameContentBegin;
660 if (nLen == rInput.getLength())
661 aTheRealName = rInput;
662 else
663 aTheRealName = rInput.copy( (m_pRealNameContentBegin - rInput.getStr()), nLen);
665 pParser->m_vAddresses.emplace_back( aTheAddrSpec );
667 if (bDone)
668 return;
669 reset();
671 break;
673 case ':':
674 switch (m_eState)
676 case BEFORE_COLON:
677 m_aOuterAddrSpec.reset();
678 resetRealNameAndFirstComment();
679 m_eState = BEFORE_LESS;
680 break;
682 case BEFORE_LESS:
683 case AFTER_GREATER:
684 m_aOuterAddrSpec.finish();
685 addTokenToRealName();
686 break;
688 case AFTER_LESS:
689 m_aInnerAddrSpec.reset();
690 break;
692 break;
694 case '"':
695 m_eType = TOKEN_QUOTED;
696 break;
698 case '.':
699 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
701 if (m_pAddrSpec->m_eLastElem != ELEMENT_DELIM)
702 addTokenToAddrSpec(ELEMENT_DELIM);
703 else
704 m_pAddrSpec->reset();
706 addTokenToRealName();
707 break;
709 case '[':
710 m_eType = TOKEN_DOMAIN;
711 break;
716 SvAddressParser::SvAddressParser(const OUString& rInput)
718 SvAddressParser_Impl aDoParse(this, rInput);
721 SvAddressParser::~SvAddressParser()
725 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */