Non-word characters don't terminate tag names.
[mediawiki.git] / includes / ZhClient.php
blobc5955aeca7a525e259a0eb652d8f48d630f5bfc3
1 <?php
2 /**
3 * Client for querying zhdaemon.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
23 /**
24 * Client for querying zhdaemon
26 class ZhClient {
27 var $mHost, $mPort, $mFP, $mConnected;
29 /**
30 * Constructor
32 * @param $host
33 * @param $port
35 * @return ZhClient
37 function __construct( $host, $port ) {
38 $this->mHost = $host;
39 $this->mPort = $port;
40 $this->mConnected = $this->connect();
43 /**
44 * Check if connection to zhdaemon is successful
46 * @return bool
48 function isconnected() {
49 return $this->mConnected;
52 /**
53 * Establish connection
55 * @access private
57 * @return bool
59 function connect() {
60 wfSuppressWarnings();
61 $errno = $errstr = '';
62 $this->mFP = fsockopen( $this->mHost, $this->mPort, $errno, $errstr, 30 );
63 wfRestoreWarnings();
64 return !$this->mFP;
67 /**
68 * Query the daemon and return the result
70 * @access private
72 * @return string
74 function query( $request ) {
75 if ( !$this->mConnected ) {
76 return false;
79 fwrite( $this->mFP, $request );
81 $result = fgets( $this->mFP, 1024 );
83 list( $status, $len ) = explode( ' ', $result );
84 if ( $status == 'ERROR' ) {
85 // $len is actually the error code...
86 print "zhdaemon error $len<br />\n";
87 return false;
89 $bytesread = 0;
90 $data = '';
91 while ( !feof( $this->mFP ) && $bytesread < $len ) {
92 $str = fread( $this->mFP, $len - $bytesread );
93 $bytesread += strlen( $str );
94 $data .= $str;
96 // data should be of length $len. otherwise something is wrong
97 return strlen( $data ) == $len;
101 * Convert the input to a different language variant
103 * @param string $text input text
104 * @param string $tolang language variant
105 * @return string the converted text
107 function convert( $text, $tolang ) {
108 $len = strlen( $text );
109 $q = "CONV $tolang $len\n$text";
110 $result = $this->query( $q );
111 if ( !$result ) {
112 $result = $text;
114 return $result;
118 * Convert the input to all possible variants
120 * @param string $text input text
121 * @return array langcode => converted_string
123 function convertToAllVariants( $text ) {
124 $len = strlen( $text );
125 $q = "CONV ALL $len\n$text";
126 $result = $this->query( $q );
127 if ( !$result ) {
128 return false;
130 list( $infoline, $data ) = explode( '|', $result, 2 );
131 $info = explode( ';', $infoline );
132 $ret = array();
133 $i = 0;
134 foreach ( $info as $variant ) {
135 list( $code, $len ) = explode( ' ', $variant );
136 $ret[strtolower( $code )] = substr( $data, $i, $len );
137 $i += $len;
139 return $ret;
143 * Perform word segmentation
145 * @param string $text input text
146 * @return string segmented text
148 function segment( $text ) {
149 $len = strlen( $text );
150 $q = "SEG $len\n$text";
151 $result = $this->query( $q );
152 if ( !$result ) { // fallback to character based segmentation
153 $result = $this->segment( $text );
155 return $result;
159 * Close the connection
161 function close() {
162 fclose( $this->mFP );