Merge branch 'fixes' into main/rendor-staging
[ryzomcore.git] / web / public_php / login / email / RFC822.php
blobf30f674c9bde74042b6da8dc39fb5d8d1957fe25
1 <?php
2 /**
3 * RFC 822 Email address list validation Utility
5 * What is it?
7 * This class will take an address string, and parse it into it's consituent
8 * parts, be that either addresses, groups, or combinations. Nested groups
9 * are not supported. The structure it returns is pretty straight forward,
10 * and is similar to that provided by the imap_rfc822_parse_adrlist(). Use
11 * print_r() to view the structure.
13 * How do I use it?
15 * $address_string = 'My Group: "Richard Heyes" <richard@localhost> (A comment), ted@example.com (Ted Bloggs), Barney;';
16 * $structure = Mail_RFC822::parseAddressList($address_string, 'example.com', TRUE)
17 * print_r($structure);
19 * @author Richard Heyes <richard@phpguru.org>
20 * @author Chuck Hagenbuch <chuck@horde.org>
21 * @version $Revision: 1.1 $
22 * @package Mail
25 class Mail_RFC822
27 /**
28 * The address being parsed by the RFC822 object.
29 * @var string $address
31 var $address = '';
33 /**
34 * The default domain to use for unqualified addresses.
35 * @var string $default_domain
37 var $default_domain = 'localhost';
39 /**
40 * Should we return a nested array showing groups, or flatten everything?
41 * @var boolean $nestGroups
43 var $nestGroups = true;
45 /**
46 * Whether or not to validate atoms for non-ascii characters.
47 * @var boolean $validate
49 var $validate = true;
51 /**
52 * The array of raw addresses built up as we parse.
53 * @var array $addresses
55 var $addresses = array();
57 /**
58 * The final array of parsed address information that we build up.
59 * @var array $structure
61 var $structure = array();
63 /**
64 * The current error message, if any.
65 * @var string $error
67 var $error = null;
69 /**
70 * An internal counter/pointer.
71 * @var integer $index
73 var $index = null;
75 /**
76 * The number of groups that have been found in the address list.
77 * @var integer $num_groups
78 * @access public
80 var $num_groups = 0;
82 /**
83 * A variable so that we can tell whether or not we're inside a
84 * Mail_RFC822 object.
85 * @var boolean $mailRFC822
87 var $mailRFC822 = true;
89 /**
90 * A limit after which processing stops
91 * @var int $limit
93 var $limit = null;
96 /**
97 * Sets up the object. The address must either be set here or when
98 * calling parseAddressList(). One or the other.
100 * @access public
101 * @param string $address The address(es) to validate.
102 * @param string $default_domain Default domain/host etc. If not supplied, will be set to localhost.
103 * @param boolean $nest_groups Whether to return the structure with groups nested for easier viewing.
104 * @param boolean $validate Whether to validate atoms. Turn this off if you need to run addresses through before encoding the personal names, for instance.
106 * @return object Mail_RFC822 A new Mail_RFC822 object.
108 function Mail_RFC822($address = null, $default_domain = null, $nest_groups = null, $validate = null, $limit = null)
110 if (isset($address)) $this->address = $address;
111 if (isset($default_domain)) $this->default_domain = $default_domain;
112 if (isset($nest_groups)) $this->nestGroups = $nest_groups;
113 if (isset($validate)) $this->validate = $validate;
114 if (isset($limit)) $this->limit = $limit;
119 * Starts the whole process. The address must either be set here
120 * or when creating the object. One or the other.
122 * @access public
123 * @param string $address The address(es) to validate.
124 * @param string $default_domain Default domain/host etc.
125 * @param boolean $nest_groups Whether to return the structure with groups nested for easier viewing.
126 * @param boolean $validate Whether to validate atoms. Turn this off if you need to run addresses through before encoding the personal names, for instance.
128 * @return array A structured array of addresses.
130 function parseAddressList($address = null, $default_domain = null, $nest_groups = null, $validate = null, $limit = null)
133 if (!isset($this->mailRFC822)) {
134 $obj = new Mail_RFC822($address, $default_domain, $nest_groups, $validate, $limit);
135 return $obj->parseAddressList();
138 if (isset($address)) $this->address = $address;
139 if (isset($default_domain)) $this->default_domain = $default_domain;
140 if (isset($nest_groups)) $this->nestGroups = $nest_groups;
141 if (isset($validate)) $this->validate = $validate;
142 if (isset($limit)) $this->limit = $limit;
144 $this->structure = array();
145 $this->addresses = array();
146 $this->error = null;
147 $this->index = null;
149 while ($this->address = $this->_splitAddresses($this->address)) {
150 continue;
153 if ($this->address === false || isset($this->error)) {
154 return false;
157 // Reset timer since large amounts of addresses can take a long time to
158 // get here
159 set_time_limit(30);
161 // Loop through all the addresses
162 for ($i = 0; $i < count($this->addresses); $i++){
164 if (($return = $this->_validateAddress($this->addresses[$i])) === false
165 || isset($this->error)) {
166 return false;
169 if (!$this->nestGroups) {
170 $this->structure = array_merge($this->structure, $return);
171 } else {
172 $this->structure[] = $return;
176 return $this->structure;
180 * Splits an address into seperate addresses.
182 * @access private
183 * @param string $address The addresses to split.
184 * @return boolean Success or failure.
186 function _splitAddresses($address)
189 if (!empty($this->limit) AND count($this->addresses) == $this->limit) {
190 return '';
193 if ($this->_isGroup($address) && !isset($this->error)) {
194 $split_char = ';';
195 $is_group = true;
196 } elseif (!isset($this->error)) {
197 $split_char = ',';
198 $is_group = false;
199 } elseif (isset($this->error)) {
200 return false;
203 // Split the string based on the above ten or so lines.
204 $parts = explode($split_char, $address);
205 $string = $this->_splitCheck($parts, $split_char);
207 // If a group...
208 if ($is_group) {
209 // If $string does not contain a colon outside of
210 // brackets/quotes etc then something's fubar.
212 // First check there's a colon at all:
213 if (strpos($string, ':') === false) {
214 $this->error = 'Invalid address: ' . $string;
215 return false;
218 // Now check it's outside of brackets/quotes:
219 if (!$this->_splitCheck(explode(':', $string), ':'))
220 return false;
222 // We must have a group at this point, so increase the counter:
223 $this->num_groups++;
226 // $string now contains the first full address/group.
227 // Add to the addresses array.
228 $this->addresses[] = array(
229 'address' => trim($string),
230 'group' => $is_group
233 // Remove the now stored address from the initial line, the +1
234 // is to account for the explode character.
235 $address = trim(substr($address, strlen($string) + 1));
237 // If the next char is a comma and this was a group, then
238 // there are more addresses, otherwise, if there are any more
239 // chars, then there is another address.
240 if ($is_group && substr($address, 0, 1) == ','){
241 $address = trim(substr($address, 1));
242 return $address;
244 } elseif (strlen($address) > 0) {
245 return $address;
247 } else {
248 return '';
251 // If you got here then something's off
252 return false;
256 * Checks for a group at the start of the string.
258 * @access private
259 * @param string $address The address to check.
260 * @return boolean Whether or not there is a group at the start of the string.
262 function _isGroup($address)
264 // First comma not in quotes, angles or escaped:
265 $parts = explode(',', $address);
266 $string = $this->_splitCheck($parts, ',');
268 // Now we have the first address, we can reliably check for a
269 // group by searching for a colon that's not escaped or in
270 // quotes or angle brackets.
271 if (count($parts = explode(':', $string)) > 1) {
272 $string2 = $this->_splitCheck($parts, ':');
273 return ($string2 !== $string);
274 } else {
275 return false;
280 * A common function that will check an exploded string.
282 * @access private
283 * @param array $parts The exloded string.
284 * @param string $char The char that was exploded on.
285 * @return mixed False if the string contains unclosed quotes/brackets, or the string on success.
287 function _splitCheck($parts, $char)
289 $string = $parts[0];
291 for ($i = 0; $i < count($parts); $i++) {
292 if ($this->_hasUnclosedQuotes($string)
293 || $this->_hasUnclosedBrackets($string, '<>')
294 || $this->_hasUnclosedBrackets($string, '[]')
295 || $this->_hasUnclosedBrackets($string, '()')
296 || substr($string, -1) == '\\') {
297 if (isset($parts[$i + 1])) {
298 $string = $string . $char . $parts[$i + 1];
299 } else {
300 $this->error = 'Invalid address spec. Unclosed bracket or quotes';
301 return false;
303 } else {
304 $this->index = $i;
305 break;
309 return $string;
313 * Checks if a string has an unclosed quotes or not.
315 * @access private
316 * @param string $string The string to check.
317 * @return boolean True if there are unclosed quotes inside the string, false otherwise.
319 function _hasUnclosedQuotes($string)
321 $string = explode('"', $string);
322 $string_cnt = count($string);
324 for ($i = 0; $i < (count($string) - 1); $i++)
325 if (substr($string[$i], -1) == '\\')
326 $string_cnt--;
328 return ($string_cnt % 2 === 0);
332 * Checks if a string has an unclosed brackets or not. IMPORTANT:
333 * This function handles both angle brackets and square brackets;
335 * @access private
336 * @param string $string The string to check.
337 * @param string $chars The characters to check for.
338 * @return boolean True if there are unclosed brackets inside the string, false otherwise.
340 function _hasUnclosedBrackets($string, $chars)
342 $num_angle_start = substr_count($string, $chars[0]);
343 $num_angle_end = substr_count($string, $chars[1]);
345 $this->_hasUnclosedBracketsSub($string, $num_angle_start, $chars[0]);
346 $this->_hasUnclosedBracketsSub($string, $num_angle_end, $chars[1]);
348 if ($num_angle_start < $num_angle_end) {
349 $this->error = 'Invalid address spec. Unmatched quote or bracket (' . $chars . ')';
350 return false;
351 } else {
352 return ($num_angle_start > $num_angle_end);
357 * Sub function that is used only by hasUnclosedBrackets().
359 * @access private
360 * @param string $string The string to check.
361 * @param integer &$num The number of occurences.
362 * @param string $char The character to count.
363 * @return integer The number of occurences of $char in $string, adjusted for backslashes.
365 function _hasUnclosedBracketsSub($string, &$num, $char)
367 $parts = explode($char, $string);
368 for ($i = 0; $i < count($parts); $i++){
369 if (substr($parts[$i], -1) == '\\' || $this->_hasUnclosedQuotes($parts[$i]))
370 $num--;
371 if (isset($parts[$i + 1]))
372 $parts[$i + 1] = $parts[$i] . $char . $parts[$i + 1];
375 return $num;
379 * Function to begin checking the address.
381 * @access private
382 * @param string $address The address to validate.
383 * @return mixed False on failure, or a structured array of address information on success.
385 function _validateAddress($address)
387 $is_group = false;
389 if ($address['group']) {
390 $is_group = true;
392 // Get the group part of the name
393 $parts = explode(':', $address['address']);
394 $groupname = $this->_splitCheck($parts, ':');
395 $structure = array();
397 // And validate the group part of the name.
398 if (!$this->_validatePhrase($groupname)){
399 $this->error = 'Group name did not validate.';
400 return false;
401 } else {
402 // Don't include groups if we are not nesting
403 // them. This avoids returning invalid addresses.
404 if ($this->nestGroups) {
405 $structure = new stdClass;
406 $structure->groupname = $groupname;
410 $address['address'] = ltrim(substr($address['address'], strlen($groupname . ':')));
413 // If a group then split on comma and put into an array.
414 // Otherwise, Just put the whole address in an array.
415 if ($is_group) {
416 while (strlen($address['address']) > 0) {
417 $parts = explode(',', $address['address']);
418 $addresses[] = $this->_splitCheck($parts, ',');
419 $address['address'] = trim(substr($address['address'], strlen(end($addresses) . ',')));
421 } else {
422 $addresses[] = $address['address'];
425 // Check that $addresses is set, if address like this:
426 // Groupname:;
427 // Then errors were appearing.
428 if (!isset($addresses)){
429 $this->error = 'Empty group.';
430 return false;
433 for ($i = 0; $i < count($addresses); $i++) {
434 $addresses[$i] = trim($addresses[$i]);
437 // Validate each mailbox.
438 // Format could be one of: name <geezer@domain.com>
439 // geezer@domain.com
440 // geezer
441 // ... or any other format valid by RFC 822.
442 array_walk($addresses, array($this, 'validateMailbox'));
444 // Nested format
445 if ($this->nestGroups) {
446 if ($is_group) {
447 $structure->addresses = $addresses;
448 } else {
449 $structure = $addresses[0];
452 // Flat format
453 } else {
454 if ($is_group) {
455 $structure = array_merge($structure, $addresses);
456 } else {
457 $structure = $addresses;
461 return $structure;
465 * Function to validate a phrase.
467 * @access private
468 * @param string $phrase The phrase to check.
469 * @return boolean Success or failure.
471 function _validatePhrase($phrase)
473 // Splits on one or more Tab or space.
474 $parts = preg_split('/[ \\x09]+/', $phrase, -1, PREG_SPLIT_NO_EMPTY);
476 $phrase_parts = array();
477 while (count($parts) > 0){
478 $phrase_parts[] = $this->_splitCheck($parts, ' ');
479 for ($i = 0; $i < $this->index + 1; $i++)
480 array_shift($parts);
483 for ($i = 0; $i < count($phrase_parts); $i++) {
484 // If quoted string:
485 if (substr($phrase_parts[$i], 0, 1) == '"') {
486 if (!$this->_validateQuotedString($phrase_parts[$i]))
487 return false;
488 continue;
491 // Otherwise it's an atom:
492 if (!$this->_validateAtom($phrase_parts[$i])) return false;
495 return true;
499 * Function to validate an atom which from rfc822 is:
500 * atom = 1*<any CHAR except specials, SPACE and CTLs>
502 * If validation ($this->validate) has been turned off, then
503 * validateAtom() doesn't actually check anything. This is so that you
504 * can split a list of addresses up before encoding personal names
505 * (umlauts, etc.), for example.
507 * @access private
508 * @param string $atom The string to check.
509 * @return boolean Success or failure.
511 function _validateAtom($atom)
513 if (!$this->validate) {
514 // Validation has been turned off; assume the atom is okay.
515 return true;
518 // Check for any char from ASCII 0 - ASCII 127
519 if (!preg_match('/^[\\x00-\\x7E]+$/i', $atom, $matches)) {
520 return false;
523 // Check for specials:
524 if (preg_match('/[][()<>@,;\\:". ]/', $atom)) {
525 return false;
528 // Check for control characters (ASCII 0-31):
529 if (preg_match('/[\\x00-\\x1F]+/', $atom)) {
530 return false;
533 return true;
537 * Function to validate quoted string, which is:
538 * quoted-string = <"> *(qtext/quoted-pair) <">
540 * @access private
541 * @param string $qstring The string to check
542 * @return boolean Success or failure.
544 function _validateQuotedString($qstring)
546 // Leading and trailing "
547 $qstring = substr($qstring, 1, -1);
549 // Perform check.
550 return !(preg_match('/(.)[\x0D\\\\"]/', $qstring, $matches) && $matches[1] != '\\');
554 * Function to validate a mailbox, which is:
555 * mailbox = addr-spec ; simple address
556 * / phrase route-addr ; name and route-addr
558 * @access public
559 * @param string &$mailbox The string to check.
560 * @return boolean Success or failure.
562 function validateMailbox(&$mailbox)
564 // A couple of defaults.
565 $phrase = '';
566 $comment = '';
568 // Catch any RFC822 comments and store them separately
569 $_mailbox = $mailbox;
570 while (strlen(trim($_mailbox)) > 0) {
571 $parts = explode('(', $_mailbox);
572 $before_comment = $this->_splitCheck($parts, '(');
573 if ($before_comment != $_mailbox) {
574 // First char should be a (
575 $comment = substr(str_replace($before_comment, '', $_mailbox), 1);
576 $parts = explode(')', $comment);
577 $comment = $this->_splitCheck($parts, ')');
578 $comments[] = $comment;
580 // +1 is for the trailing )
581 $_mailbox = substr($_mailbox, strpos($_mailbox, $comment)+strlen($comment)+1);
582 } else {
583 break;
587 for($i=0; $i<count(@$comments); $i++){
588 $mailbox = str_replace('('.$comments[$i].')', '', $mailbox);
590 $mailbox = trim($mailbox);
592 // Check for name + route-addr
593 if (substr($mailbox, -1) == '>' && substr($mailbox, 0, 1) != '<') {
594 $parts = explode('<', $mailbox);
595 $name = $this->_splitCheck($parts, '<');
597 $phrase = trim($name);
598 $route_addr = trim(substr($mailbox, strlen($name.'<'), -1));
600 if ($this->_validatePhrase($phrase) === false || ($route_addr = $this->_validateRouteAddr($route_addr)) === false)
601 return false;
603 // Only got addr-spec
604 } else {
605 // First snip angle brackets if present.
606 if (substr($mailbox,0,1) == '<' && substr($mailbox,-1) == '>')
607 $addr_spec = substr($mailbox,1,-1);
608 else
609 $addr_spec = $mailbox;
611 if (($addr_spec = $this->_validateAddrSpec($addr_spec)) === false)
612 return false;
615 // Construct the object that will be returned.
616 $mbox = new stdClass();
618 // Add the phrase (even if empty) and comments
619 $mbox->personal = $phrase;
620 $mbox->comment = isset($comments) ? $comments : array();
622 if (isset($route_addr)) {
623 $mbox->mailbox = $route_addr['local_part'];
624 $mbox->host = $route_addr['domain'];
625 $route_addr['adl'] !== '' ? $mbox->adl = $route_addr['adl'] : '';
626 } else {
627 $mbox->mailbox = $addr_spec['local_part'];
628 $mbox->host = $addr_spec['domain'];
631 $mailbox = $mbox;
632 return true;
636 * This function validates a route-addr which is:
637 * route-addr = "<" [route] addr-spec ">"
639 * Angle brackets have already been removed at the point of
640 * getting to this function.
642 * @access private
643 * @param string $route_addr The string to check.
644 * @return mixed False on failure, or an array containing validated address/route information on success.
646 function _validateRouteAddr($route_addr)
648 // Check for colon.
649 if (strpos($route_addr, ':') !== false) {
650 $parts = explode(':', $route_addr);
651 $route = $this->_splitCheck($parts, ':');
652 } else {
653 $route = $route_addr;
656 // If $route is same as $route_addr then the colon was in
657 // quotes or brackets or, of course, non existent.
658 if ($route === $route_addr){
659 unset($route);
660 $addr_spec = $route_addr;
661 if (($addr_spec = $this->_validateAddrSpec($addr_spec)) === false) {
662 return false;
664 } else {
665 // Validate route part.
666 if (($route = $this->_validateRoute($route)) === false) {
667 return false;
670 $addr_spec = substr($route_addr, strlen($route . ':'));
672 // Validate addr-spec part.
673 if (($addr_spec = $this->_validateAddrSpec($addr_spec)) === false) {
674 return false;
678 if (isset($route)) {
679 $return['adl'] = $route;
680 } else {
681 $return['adl'] = '';
684 $return = array_merge($return, $addr_spec);
685 return $return;
689 * Function to validate a route, which is:
690 * route = 1#("@" domain) ":"
692 * @access private
693 * @param string $route The string to check.
694 * @return mixed False on failure, or the validated $route on success.
696 function _validateRoute($route)
698 // Split on comma.
699 $domains = explode(',', trim($route));
701 for ($i = 0; $i < count($domains); $i++) {
702 $domains[$i] = str_replace('@', '', trim($domains[$i]));
703 if (!$this->_validateDomain($domains[$i])) return false;
706 return $route;
710 * Function to validate a domain, though this is not quite what
711 * you expect of a strict internet domain.
713 * domain = sub-domain *("." sub-domain)
715 * @access private
716 * @param string $domain The string to check.
717 * @return mixed False on failure, or the validated domain on success.
719 function _validateDomain($domain)
721 // Note the different use of $subdomains and $sub_domains
722 $subdomains = explode('.', $domain);
724 while (count($subdomains) > 0) {
725 $sub_domains[] = $this->_splitCheck($subdomains, '.');
726 for ($i = 0; $i < $this->index + 1; $i++)
727 array_shift($subdomains);
730 for ($i = 0; $i < count($sub_domains); $i++) {
731 if (!$this->_validateSubdomain(trim($sub_domains[$i])))
732 return false;
735 // Managed to get here, so return input.
736 return $domain;
740 * Function to validate a subdomain:
741 * subdomain = domain-ref / domain-literal
743 * @access private
744 * @param string $subdomain The string to check.
745 * @return boolean Success or failure.
747 function _validateSubdomain($subdomain)
749 if (preg_match('|^\[(.*)]$|', $subdomain, $arr)){
750 if (!$this->_validateDliteral($arr[1])) return false;
751 } else {
752 if (!$this->_validateAtom($subdomain)) return false;
755 // Got here, so return successful.
756 return true;
760 * Function to validate a domain literal:
761 * domain-literal = "[" *(dtext / quoted-pair) "]"
763 * @access private
764 * @param string $dliteral The string to check.
765 * @return boolean Success or failure.
767 function _validateDliteral($dliteral)
769 return !preg_match('/(.)[][\x0D\\\\]/', $dliteral, $matches) && $matches[1] != '\\';
773 * Function to validate an addr-spec.
775 * addr-spec = local-part "@" domain
777 * @access private
778 * @param string $addr_spec The string to check.
779 * @return mixed False on failure, or the validated addr-spec on success.
781 function _validateAddrSpec($addr_spec)
783 $addr_spec = trim($addr_spec);
785 // Split on @ sign if there is one.
786 if (strpos($addr_spec, '@') !== false) {
787 $parts = explode('@', $addr_spec);
788 $local_part = $this->_splitCheck($parts, '@');
789 $domain = substr($addr_spec, strlen($local_part . '@'));
791 // No @ sign so assume the default domain.
792 } else {
793 $local_part = $addr_spec;
794 $domain = $this->default_domain;
797 if (($local_part = $this->_validateLocalPart($local_part)) === false) return false;
798 if (($domain = $this->_validateDomain($domain)) === false) return false;
800 // Got here so return successful.
801 return array('local_part' => $local_part, 'domain' => $domain);
805 * Function to validate the local part of an address:
806 * local-part = word *("." word)
808 * @access private
809 * @param string $local_part
810 * @return mixed False on failure, or the validated local part on success.
812 function _validateLocalPart($local_part)
814 $parts = explode('.', $local_part);
816 // Split the local_part into words.
817 while (count($parts) > 0){
818 $words[] = $this->_splitCheck($parts, '.');
819 for ($i = 0; $i < $this->index + 1; $i++) {
820 array_shift($parts);
824 // Validate each word.
825 for ($i = 0; $i < count($words); $i++) {
826 if ($this->_validatePhrase(trim($words[$i])) === false) return false;
829 // Managed to get here, so return the input.
830 return $local_part;
834 * Returns an approximate count of how many addresses are
835 * in the given string. This is APPROXIMATE as it only splits
836 * based on a comma which has no preceding backslash. Could be
837 * useful as large amounts of addresses will end up producing
838 * *large* structures when used with parseAddressList().
840 * @param string $data Addresses to count
841 * @return int Approximate count
843 function approximateCount($data)
845 return count(preg_split('/(?<!\\\\),/', $data));
849 * This is a email validating function seperate to the rest
850 * of the class. It simply validates whether an email is of
851 * the common internet form: <user>@<domain>. This can be
852 * sufficient for most people. Optional stricter mode can
853 * be utilised which restricts mailbox characters allowed
854 * to alphanumeric, full stop, hyphen and underscore.
856 * @param string $data Address to check
857 * @param boolean $strict Optional stricter mode
858 * @return mixed False if it fails, an indexed array
859 * username/domain if it matches
861 function isValidInetAddress($data, $strict = false)
863 $regex = $strict ? '/^([.0-9a-z_-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,4})$/i' : '/^([*+!.&#$|\'\\%\/0-9a-z^_`{}=?~:-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,4})$/i';
864 if (preg_match($regex, trim($data), $matches)) {
865 return array($matches[1], $matches[2]);
866 } else {
867 return false;