Merge "Special:Upload should not crash on failing previews"
[mediawiki.git] / includes / compat / normal / UtfNormal.php
blobc9c05a07592b24364becc6888007a0b5a52837cb
1 <?php
2 /**
3 * Unicode normalization routines
5 * Copyright © 2004 Brion Vibber <brion@pobox.com>
6 * https://www.mediawiki.org/
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
23 * @file
24 * @ingroup UtfNormal
27 /**
28 * @defgroup UtfNormal UtfNormal
31 use UtfNormal\Validator;
33 /**
34 * Unicode normalization routines for working with UTF-8 strings.
35 * Currently assumes that input strings are valid UTF-8!
37 * Not as fast as I'd like, but should be usable for most purposes.
38 * UtfNormal::toNFC() will bail early if given ASCII text or text
39 * it can quickly determine is already normalized.
41 * All functions can be called static.
43 * See description of forms at http://www.unicode.org/reports/tr15/
45 * @deprecated since 1.25, use UtfNormal\Validator directly
46 * @ingroup UtfNormal
48 class UtfNormal {
49 /**
50 * The ultimate convenience function! Clean up invalid UTF-8 sequences,
51 * and convert to normal form C, canonical composition.
53 * Fast return for pure ASCII strings; some lesser optimizations for
54 * strings containing only known-good characters. Not as fast as toNFC().
56 * @param string $string a UTF-8 string
57 * @return string a clean, shiny, normalized UTF-8 string
59 static function cleanUp( $string ) {
60 return Validator::cleanUp( $string );
63 /**
64 * Convert a UTF-8 string to normal form C, canonical composition.
65 * Fast return for pure ASCII strings; some lesser optimizations for
66 * strings containing only known-good characters.
68 * @param string $string a valid UTF-8 string. Input is not validated.
69 * @return string a UTF-8 string in normal form C
71 static function toNFC( $string ) {
72 return Validator::toNFC( $string );
75 /**
76 * Convert a UTF-8 string to normal form D, canonical decomposition.
77 * Fast return for pure ASCII strings.
79 * @param string $string a valid UTF-8 string. Input is not validated.
80 * @return string a UTF-8 string in normal form D
82 static function toNFD( $string ) {
83 return Validator::toNFD( $string );
86 /**
87 * Convert a UTF-8 string to normal form KC, compatibility composition.
88 * This may cause irreversible information loss, use judiciously.
89 * Fast return for pure ASCII strings.
91 * @param string $string a valid UTF-8 string. Input is not validated.
92 * @return string a UTF-8 string in normal form KC
94 static function toNFKC( $string ) {
95 return Validator::toNFKC( $string );
98 /**
99 * Convert a UTF-8 string to normal form KD, compatibility decomposition.
100 * This may cause irreversible information loss, use judiciously.
101 * Fast return for pure ASCII strings.
103 * @param string $string a valid UTF-8 string. Input is not validated.
104 * @return string a UTF-8 string in normal form KD
106 static function toNFKD( $string ) {
107 return Validator::toNFKD( $string );
111 * Returns true if the string is _definitely_ in NFC.
112 * Returns false if not or uncertain.
113 * @param string $string a valid UTF-8 string. Input is not validated.
114 * @return bool
116 static function quickIsNFC( $string ) {
117 return Validator::quickIsNFC( $string );
121 * Returns true if the string is _definitely_ in NFC.
122 * Returns false if not or uncertain.
123 * @param string $string a UTF-8 string, altered on output to be valid UTF-8 safe for XML.
124 * @return bool
126 static function quickIsNFCVerify( &$string ) {
127 return Validator::quickIsNFCVerify( $string );