Update git submodules
[LibreOffice.git] / svl / source / misc / adrparse.cxx
blob19e869a09289bb83bbdef2056fe949ea299ed6d6
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <rtl/ustrbuf.hxx>
21 #include <svl/adrparse.hxx>
23 namespace
26 enum ElementType { ELEMENT_START, ELEMENT_DELIM, ELEMENT_ITEM, ELEMENT_END };
28 struct ParsedAddrSpec
30 sal_Unicode const * m_pBegin;
31 sal_Unicode const * m_pEnd;
32 ElementType m_eLastElem;
33 bool m_bAtFound;
34 bool m_bReparse;
36 ParsedAddrSpec() { reset(); }
38 bool isPoorlyValid() const { return m_eLastElem >= ELEMENT_ITEM; }
40 bool isValid() const { return isPoorlyValid() && m_bAtFound; }
42 void reset();
44 void finish();
47 void ParsedAddrSpec::reset()
49 m_pBegin = nullptr;
50 m_pEnd = nullptr;
51 m_eLastElem = ELEMENT_START;
52 m_bAtFound = false;
53 m_bReparse = false;
56 void ParsedAddrSpec::finish()
58 if (isPoorlyValid())
59 m_eLastElem = ELEMENT_END;
60 else
61 reset();
66 class SvAddressParser_Impl
68 enum State { BEFORE_COLON, BEFORE_LESS, AFTER_LESS, AFTER_GREATER };
70 enum TokenType: sal_uInt32 {
71 TOKEN_QUOTED = 0x80000000, TOKEN_DOMAIN, TOKEN_COMMENT, TOKEN_ATOM };
73 sal_Unicode const * m_pInputPos;
74 sal_Unicode const * m_pInputEnd;
75 sal_uInt32 m_nCurToken;
76 sal_Unicode const * m_pCurTokenBegin;
77 sal_Unicode const * m_pCurTokenEnd;
78 ParsedAddrSpec m_aOuterAddrSpec;
79 ParsedAddrSpec m_aInnerAddrSpec;
80 ParsedAddrSpec * m_pAddrSpec;
81 State m_eState;
82 TokenType m_eType;
84 inline void reset();
86 void addTokenToAddrSpec(ElementType eTokenElem);
88 bool readToken();
90 static OUString reparse(sal_Unicode const * pBegin,
91 sal_Unicode const * pEnd);
93 public:
94 SvAddressParser_Impl(SvAddressParser * pParser, const OUString& rIn);
97 inline void SvAddressParser_Impl::reset()
99 m_aOuterAddrSpec.reset();
100 m_aInnerAddrSpec.reset();
101 m_pAddrSpec = &m_aOuterAddrSpec;
102 m_eState = BEFORE_COLON;
103 m_eType = TOKEN_ATOM;
106 void SvAddressParser_Impl::addTokenToAddrSpec(ElementType eTokenElem)
108 if (!m_pAddrSpec->m_pBegin)
109 m_pAddrSpec->m_pBegin = m_pCurTokenBegin;
110 else if (m_pAddrSpec->m_pEnd < m_pCurTokenBegin)
111 m_pAddrSpec->m_bReparse = true;
112 m_pAddrSpec->m_pEnd = m_pCurTokenEnd;
113 m_pAddrSpec->m_eLastElem = eTokenElem;
117 // SvAddressParser_Impl
120 bool SvAddressParser_Impl::readToken()
122 m_nCurToken = m_eType;
123 switch (m_eType)
125 case TOKEN_QUOTED:
127 m_pCurTokenBegin = m_pInputPos - 1;
128 bool bEscaped = false;
129 for (;;)
131 if (m_pInputPos >= m_pInputEnd)
132 return false;
133 sal_Unicode cChar = *m_pInputPos++;
134 if (bEscaped)
136 bEscaped = false;
138 else if (cChar == '"')
140 m_pCurTokenEnd = m_pInputPos;
141 return true;
143 else if (cChar == '\\')
144 bEscaped = true;
148 case TOKEN_DOMAIN:
150 m_pCurTokenBegin = m_pInputPos - 1;
151 bool bEscaped = false;
152 for (;;)
154 if (m_pInputPos >= m_pInputEnd)
155 return false;
156 sal_Unicode cChar = *m_pInputPos++;
157 if (bEscaped)
158 bEscaped = false;
159 else if (cChar == ']')
161 m_pCurTokenEnd = m_pInputPos;
162 return true;
164 else if (cChar == '\\')
165 bEscaped = true;
169 case TOKEN_COMMENT:
171 m_pCurTokenBegin = m_pInputPos - 1;
172 bool bEscaped = false;
173 int nLevel = 0;
174 for (;;)
176 if (m_pInputPos >= m_pInputEnd)
177 return false;
178 sal_Unicode cChar = *m_pInputPos++;
179 if (bEscaped)
181 bEscaped = false;
183 else if (cChar == '(')
185 ++nLevel;
187 else if (cChar == ')')
188 if (nLevel)
190 --nLevel;
192 else
193 return true;
194 else if (cChar == '\\')
196 bEscaped = true;
201 default:
203 sal_Unicode cChar;
204 for (;;)
206 if (m_pInputPos >= m_pInputEnd)
207 return false;
208 cChar = *m_pInputPos++;
209 if (cChar > ' ' && cChar != 0x7F) // DEL
210 break;
212 m_pCurTokenBegin = m_pInputPos - 1;
213 if (cChar == '"' || cChar == '(' || cChar == ')' || cChar == ','
214 || cChar == '.' || cChar == ':' || cChar == ';'
215 || cChar == '<' || cChar == '>' || cChar == '@'
216 || cChar == '[' || cChar == '\\' || cChar == ']')
218 m_nCurToken = cChar;
219 m_pCurTokenEnd = m_pInputPos;
220 return true;
222 else
223 for (;;)
225 if (m_pInputPos >= m_pInputEnd)
227 m_pCurTokenEnd = m_pInputPos;
228 return true;
230 cChar = *m_pInputPos++;
231 if (cChar <= ' ' || cChar == '"' || cChar == '('
232 || cChar == ')' || cChar == ',' || cChar == '.'
233 || cChar == ':' || cChar == ';' || cChar == '<'
234 || cChar == '>' || cChar == '@' || cChar == '['
235 || cChar == '\\' || cChar == ']'
236 || cChar == 0x7F) // DEL
238 m_pCurTokenEnd = --m_pInputPos;
239 return true;
246 // static
247 OUString SvAddressParser_Impl::reparse(sal_Unicode const * pBegin,
248 sal_Unicode const * pEnd)
250 OUStringBuffer aResult;
251 TokenType eMode = TOKEN_ATOM;
252 bool bEscaped = false;
253 int nLevel = 0;
254 while (pBegin < pEnd)
256 sal_Unicode cChar = *pBegin++;
257 switch (eMode)
259 case TOKEN_QUOTED:
260 if (bEscaped)
262 aResult.append(cChar);
263 bEscaped = false;
265 else if (cChar == '"')
267 aResult.append(cChar);
268 eMode = TOKEN_ATOM;
270 else if (cChar == '\\')
272 aResult.append(cChar);
273 bEscaped = true;
275 else
276 aResult.append(cChar);
277 break;
279 case TOKEN_DOMAIN:
280 if (bEscaped)
282 aResult.append(cChar);
283 bEscaped = false;
285 else if (cChar == ']')
287 aResult.append(cChar);
288 eMode = TOKEN_ATOM;
290 else if (cChar == '\\')
292 aResult.append(cChar);
293 bEscaped = true;
295 else
296 aResult.append(cChar);
297 break;
299 case TOKEN_COMMENT:
300 if (bEscaped)
301 bEscaped = false;
302 else if (cChar == '(')
303 ++nLevel;
304 else if (cChar == ')')
305 if (nLevel)
306 --nLevel;
307 else
308 eMode = TOKEN_ATOM;
309 else if (cChar == '\\')
310 bEscaped = true;
311 break;
313 case TOKEN_ATOM:
314 if (cChar <= ' ' || cChar == 0x7F) // DEL
317 else if (cChar == '(')
319 eMode = TOKEN_COMMENT;
321 else
323 if (cChar == '"')
325 aResult.append(cChar);
326 eMode = TOKEN_QUOTED;
328 else if (cChar == '[')
330 aResult.append(cChar);
331 eMode = TOKEN_QUOTED;
333 else
334 aResult.append(cChar);
336 break;
339 return aResult.makeStringAndClear();
342 SvAddressParser_Impl::SvAddressParser_Impl(SvAddressParser * pParser,
343 const OUString& rInput)
344 : m_pCurTokenBegin(nullptr)
345 , m_pCurTokenEnd(nullptr)
347 m_pInputPos = rInput.getStr();
348 m_pInputEnd = m_pInputPos + rInput.getLength();
350 reset();
351 bool bDone = false;
352 for (;;)
354 if (!readToken())
356 if (m_eState == AFTER_LESS)
357 m_nCurToken = '>';
358 else
360 m_nCurToken = ',';
361 bDone = true;
364 switch (m_nCurToken)
366 case TOKEN_QUOTED:
367 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
369 if (m_pAddrSpec->m_bAtFound
370 || m_pAddrSpec->m_eLastElem <= ELEMENT_DELIM)
371 m_pAddrSpec->reset();
372 addTokenToAddrSpec(ELEMENT_ITEM);
374 m_eType = TOKEN_ATOM;
375 break;
377 case TOKEN_DOMAIN:
378 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
380 if (m_pAddrSpec->m_bAtFound && m_pAddrSpec->m_eLastElem == ELEMENT_DELIM)
381 addTokenToAddrSpec(ELEMENT_ITEM);
382 else
383 m_pAddrSpec->reset();
385 m_eType = TOKEN_ATOM;
386 break;
388 case TOKEN_COMMENT:
389 m_eType = TOKEN_ATOM;
390 break;
392 case TOKEN_ATOM:
393 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
395 if (m_pAddrSpec->m_eLastElem != ELEMENT_DELIM)
396 m_pAddrSpec->reset();
397 addTokenToAddrSpec(ELEMENT_ITEM);
399 break;
401 case '(':
402 m_eType = TOKEN_COMMENT;
403 break;
405 case ')':
406 case '\\':
407 case ']':
408 m_pAddrSpec->finish();
409 break;
411 case '<':
412 switch (m_eState)
414 case BEFORE_COLON:
415 case BEFORE_LESS:
416 m_aOuterAddrSpec.finish();
417 m_pAddrSpec = &m_aInnerAddrSpec;
418 m_eState = AFTER_LESS;
419 break;
421 case AFTER_LESS:
422 m_aInnerAddrSpec.finish();
423 break;
425 case AFTER_GREATER:
426 m_aOuterAddrSpec.finish();
427 break;
429 break;
431 case '>':
432 if (m_eState == AFTER_LESS)
434 m_aInnerAddrSpec.finish();
435 if (m_aInnerAddrSpec.isValid())
436 m_aOuterAddrSpec.m_eLastElem = ELEMENT_END;
437 m_pAddrSpec = &m_aOuterAddrSpec;
438 m_eState = AFTER_GREATER;
440 else
442 m_aOuterAddrSpec.finish();
444 break;
446 case '@':
447 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
449 if (!m_pAddrSpec->m_bAtFound
450 && m_pAddrSpec->m_eLastElem == ELEMENT_ITEM)
452 addTokenToAddrSpec(ELEMENT_DELIM);
453 m_pAddrSpec->m_bAtFound = true;
455 else
456 m_pAddrSpec->reset();
458 break;
460 case ',':
461 case ';':
462 if (m_eState == AFTER_LESS)
463 if (m_nCurToken == ',')
465 if (m_aInnerAddrSpec.m_eLastElem != ELEMENT_END)
466 m_aInnerAddrSpec.reset();
468 else
469 m_aInnerAddrSpec.finish();
470 else
472 if(m_aInnerAddrSpec.isValid() || (!m_aOuterAddrSpec.isValid() && m_aInnerAddrSpec.isPoorlyValid()))
474 m_pAddrSpec = &m_aInnerAddrSpec;
476 else if(m_aOuterAddrSpec.isPoorlyValid())
478 m_pAddrSpec = &m_aOuterAddrSpec;
480 else
482 m_pAddrSpec = nullptr;
485 if (m_pAddrSpec)
487 OUString aTheAddrSpec;
488 if (m_pAddrSpec->m_bReparse)
489 aTheAddrSpec = reparse(m_pAddrSpec->m_pBegin, m_pAddrSpec->m_pEnd);
490 else
492 sal_Int32 nLen = m_pAddrSpec->m_pEnd - m_pAddrSpec->m_pBegin;
493 if (nLen == rInput.getLength())
494 aTheAddrSpec = rInput;
495 else
496 aTheAddrSpec = rInput.copy( (m_pAddrSpec->m_pBegin - rInput.getStr()),
497 nLen);
499 pParser->m_vAddresses.emplace_back( aTheAddrSpec );
501 if (bDone)
502 return;
503 reset();
505 break;
507 case ':':
508 switch (m_eState)
510 case BEFORE_COLON:
511 m_aOuterAddrSpec.reset();
512 m_eState = BEFORE_LESS;
513 break;
515 case BEFORE_LESS:
516 case AFTER_GREATER:
517 m_aOuterAddrSpec.finish();
518 break;
520 case AFTER_LESS:
521 m_aInnerAddrSpec.reset();
522 break;
524 break;
526 case '"':
527 m_eType = TOKEN_QUOTED;
528 break;
530 case '.':
531 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
533 if (m_pAddrSpec->m_eLastElem != ELEMENT_DELIM)
534 addTokenToAddrSpec(ELEMENT_DELIM);
535 else
536 m_pAddrSpec->reset();
538 break;
540 case '[':
541 m_eType = TOKEN_DOMAIN;
542 break;
547 SvAddressParser::SvAddressParser(const OUString& rInput)
549 SvAddressParser_Impl aDoParse(this, rInput);
552 SvAddressParser::~SvAddressParser()
556 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */