3 * Esperanto (Esperanto) specific code.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
21 * @author Brion Vibber <brion@pobox.com>
26 * Esperanto (Esperanto)
30 class LanguageEo
extends Language
{
32 * Wrapper for charset conversions.
34 * In most languages, this calls through to standard system iconv(), but
35 * for Esperanto we're also adding a special pseudo-charset to convert
36 * accented characters to/from the ASCII-friendly "X" surrogate coding:
46 * http://en.wikipedia.org/wiki/Esperanto_orthography#X-system
47 * http://eo.wikipedia.org/wiki/X-sistemo
49 * X-conversion is applied, in either direction, between "utf-8" and "x" charsets;
50 * this comes into effect when input is run through $wgRequest->getText() and the
51 * $wgEditEncoding is set to 'x'.
53 * In the long run, this should be moved out of here and into the client-side
54 * editor behavior; the original server-side translation system dates to 2002-2003
55 * when many browsers with really bad Unicode support were still in use.
57 * @param string $in input character set
58 * @param string $out output character set
59 * @param string $string text to be converted
62 function iconv( $in, $out, $string ) {
63 if ( strcasecmp( $in, 'x' ) == 0 && strcasecmp( $out, 'utf-8' ) == 0 ) {
64 return preg_replace_callback (
65 '/([cghjsu]x?)((?:xx)*)(?!x)/i',
66 array( $this, 'strrtxuCallback' ), $string );
67 } elseif ( strcasecmp( $in, 'UTF-8' ) == 0 && strcasecmp( $out, 'x' ) == 0 ) {
68 # Double Xs only if they follow cxapelutaj literoj.
69 return preg_replace_callback(
70 '/((?:[cghjsu]|\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]|\xc5[\x9c\x9d\xac\xad])x*)/i',
71 array( $this, 'strrtuxCallback' ), $string );
73 return parent
::iconv( $in, $out, $string );
77 * @param $matches array
80 function strrtuxCallback( $matches ) {
82 'x' => 'xx', 'X' => 'Xx',
83 "\xc4\x88" => "Cx", "\xc4\x89" => "cx",
84 "\xc4\x9c" => "Gx", "\xc4\x9d" => "gx",
85 "\xc4\xa4" => "Hx", "\xc4\xa5" => "hx",
86 "\xc4\xb4" => "Jx", "\xc4\xb5" => "jx",
87 "\xc5\x9c" => "Sx", "\xc5\x9d" => "sx",
88 "\xc5\xac" => "Ux", "\xc5\xad" => "ux",
90 return strtr( $matches[1], $ux );
94 * @param $matches array
97 function strrtxuCallback( $matches ) {
99 'xx' => 'x', 'xX' => 'x',
100 'Xx' => 'X', 'XX' => 'X',
101 "Cx" => "\xc4\x88", "CX" => "\xc4\x88",
102 "cx" => "\xc4\x89", "cX" => "\xc4\x89",
103 "Gx" => "\xc4\x9c", "GX" => "\xc4\x9c",
104 "gx" => "\xc4\x9d", "gX" => "\xc4\x9d",
105 "Hx" => "\xc4\xa4", "HX" => "\xc4\xa4",
106 "hx" => "\xc4\xa5", "hX" => "\xc4\xa5",
107 "Jx" => "\xc4\xb4", "JX" => "\xc4\xb4",
108 "jx" => "\xc4\xb5", "jX" => "\xc4\xb5",
109 "Sx" => "\xc5\x9c", "SX" => "\xc5\x9c",
110 "sx" => "\xc5\x9d", "sX" => "\xc5\x9d",
111 "Ux" => "\xc5\xac", "UX" => "\xc5\xac",
112 "ux" => "\xc5\xad", "uX" => "\xc5\xad",
114 return strtr( $matches[1], $xu ) . strtr( $matches[2], $xu );
121 function checkTitleEncoding( $s ) {
122 # Check for X-system backwards-compatibility URLs
123 $ishigh = preg_match( '/[\x80-\xff]/', $s );
124 $isutf = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
125 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
127 if ( $ishigh and !$isutf ) {
129 $s = utf8_encode( $s );
130 } elseif ( preg_match( '/(\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]' .
131 '|\xc5[\x9c\x9d\xac\xad])/', $s )
136 // if( preg_match( '/[cghjsu]x/i', $s ) )
137 // return $this->iconv( 'x', 'utf-8', $s );
141 function initEncoding() {
142 global $wgEditEncoding;
143 $wgEditEncoding = 'x';