languages/classes/LanguageEo.php

   1 <?php
   2 /**
   3  * Esperanto (Esperanto) specific code.
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License along
  16  * with this program; if not, write to the Free Software Foundation, Inc.,
  17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18  * http://www.gnu.org/copyleft/gpl.html
  19  *
  20  * @file
  21  * @author Brion Vibber <brion@pobox.com>
  22  * @ingroup Language
  23  */
  24
  25 /**
  26  * Esperanto (Esperanto)
  27  *
  28  * @ingroup Language
  29  */
  30 class LanguageEo extends Language {
  31         /**
  32          * Wrapper for charset conversions.
  33          *
  34          * In most languages, this calls through to standard system iconv(), but
  35          * for Esperanto we're also adding a special pseudo-charset to convert
  36          * accented characters to/from the ASCII-friendly "X" surrogate coding:
  37          *
  38          *     cx = ĉ     cxx = cx
  39          *     gx = ĝ     gxx = gx
  40          *     hx = ĥ     hxx = hx
  41          *     jx = ĵ     jxx = jx
  42          *     sx = ŝ     sxx = sx
  43          *     ux = ŭ     uxx = ux
  44          *     xx = x
  45          *
  46          *   http://en.wikipedia.org/wiki/Esperanto_orthography#X-system
  47          *   http://eo.wikipedia.org/wiki/X-sistemo
  48          *
  49          * X-conversion is applied, in either direction, between "utf-8" and "x" charsets;
  50          * this comes into effect when input is run through $wgRequest->getText() and the
  51          * $wgEditEncoding is set to 'x'.
  52          *
  53          * In the long run, this should be moved out of here and into the client-side
  54          * editor behavior; the original server-side translation system dates to 2002-2003
  55          * when many browsers with really bad Unicode support were still in use.
  56          *
  57          * @param string $in Input character set
  58          * @param string $out Output character set
  59          * @param string $string Text to be converted
  60          * @return string
  61          */
  62         function iconv( $in, $out, $string ) {
  63                 if ( strcasecmp( $in, 'x' ) == 0 && strcasecmp( $out, 'utf-8' ) == 0 ) {
  64                         return preg_replace_callback (
  65                                 '/([cghjsu]x?)((?:xx)*)(?!x)/i',
  66                                 array( $this, 'strrtxuCallback' ), $string );
  67                 } elseif ( strcasecmp( $in, 'UTF-8' ) == 0 && strcasecmp( $out, 'x' ) == 0 ) {
  68                         # Double Xs only if they follow cxapelutaj literoj.
  69                         return preg_replace_callback(
  70                                 '/((?:[cghjsu]|\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]|\xc5[\x9c\x9d\xac\xad])x*)/i',
  71                                 array( $this, 'strrtuxCallback' ), $string );
  72                 }
  73                 return parent::iconv( $in, $out, $string );
  74         }
  75
  76         /**
  77          * @param array $matches
  78          * @return string
  79          */
  80         function strrtuxCallback( $matches ) {
  81                 static $ux = array(
  82                         'x' => 'xx', 'X' => 'Xx',
  83                         "\xc4\x88" => "Cx", "\xc4\x89" => "cx",
  84                         "\xc4\x9c" => "Gx", "\xc4\x9d" => "gx",
  85                         "\xc4\xa4" => "Hx", "\xc4\xa5" => "hx",
  86                         "\xc4\xb4" => "Jx", "\xc4\xb5" => "jx",
  87                         "\xc5\x9c" => "Sx", "\xc5\x9d" => "sx",
  88                         "\xc5\xac" => "Ux", "\xc5\xad" => "ux",
  89                 );
  90                 return strtr( $matches[1], $ux );
  91         }
  92
  93         /**
  94          * @param array $matches
  95          * @return string
  96          */
  97         function strrtxuCallback( $matches ) {
  98                 static $xu = array(
  99                         'xx' => 'x', 'xX' => 'x',
 100                         'Xx' => 'X', 'XX' => 'X',
 101                         "Cx" => "\xc4\x88", "CX" => "\xc4\x88",
 102                         "cx" => "\xc4\x89", "cX" => "\xc4\x89",
 103                         "Gx" => "\xc4\x9c", "GX" => "\xc4\x9c",
 104                         "gx" => "\xc4\x9d", "gX" => "\xc4\x9d",
 105                         "Hx" => "\xc4\xa4", "HX" => "\xc4\xa4",
 106                         "hx" => "\xc4\xa5", "hX" => "\xc4\xa5",
 107                         "Jx" => "\xc4\xb4", "JX" => "\xc4\xb4",
 108                         "jx" => "\xc4\xb5", "jX" => "\xc4\xb5",
 109                         "Sx" => "\xc5\x9c", "SX" => "\xc5\x9c",
 110                         "sx" => "\xc5\x9d", "sX" => "\xc5\x9d",
 111                         "Ux" => "\xc5\xac", "UX" => "\xc5\xac",
 112                         "ux" => "\xc5\xad", "uX" => "\xc5\xad",
 113                 );
 114                 return strtr( $matches[1], $xu ) . strtr( $matches[2], $xu );
 115         }
 116
 117         function initEncoding() {
 118                 global $wgEditEncoding;
 119                 $wgEditEncoding = 'x';
 120         }
 121 }