Don't display multiple language links to the same language
[mediawiki.git] / includes / parser / Parser_LinkHooks.php
blob6bcc324d58a024b6ee8b813ba3f79f67b89eeec8
1 <?php
2 /**
3 * Modified version of the PHP parser with hooks for wiki links; experimental
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
21 * @ingroup Parser
24 /**
25 * Parser with LinkHooks experiment
26 * @ingroup Parser
28 class Parser_LinkHooks extends Parser {
29 /**
30 * Update this version number when the ParserOutput format
31 * changes in an incompatible way, so the parser cache
32 * can automatically discard old data.
34 const VERSION = '1.6.4';
36 # Flags for Parser::setLinkHook
37 # Also available as global constants from Defines.php
38 const SLH_PATTERN = 1;
40 # Constants needed for external link processing
41 # Everything except bracket, space, or control characters
42 const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F]';
43 const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+)
44 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx';
46 /**#@+
47 * @private
49 # Persistent:
50 var $mLinkHooks;
52 /**#@-*/
54 /**
55 * Constructor
57 public function __construct( $conf = array() ) {
58 parent::__construct( $conf );
59 $this->mLinkHooks = array();
62 /**
63 * Do various kinds of initialisation on the first call of the parser
65 function firstCallInit() {
66 parent::__construct();
67 if ( !$this->mFirstCall ) {
68 return;
70 $this->mFirstCall = false;
72 wfProfileIn( __METHOD__ );
74 $this->setHook( 'pre', array( $this, 'renderPreTag' ) );
75 CoreParserFunctions::register( $this );
76 CoreLinkFunctions::register( $this );
77 $this->initialiseVariables();
79 wfRunHooks( 'ParserFirstCallInit', array( &$this ) );
80 wfProfileOut( __METHOD__ );
83 /**
84 * Create a link hook, e.g. [[Namepsace:...|display}}
85 * The callback function should have the form:
86 * function myLinkCallback( $parser, $holders, $markers,
87 * Title $title, $titleText, &$sortText = null, &$leadingColon = false ) { ... }
89 * Or with SLH_PATTERN:
90 * function myLinkCallback( $parser, $holders, $markers, )
91 * &$titleText, &$sortText = null, &$leadingColon = false ) { ... }
93 * The callback may either return a number of different possible values:
94 * String) Text result of the link
95 * True) (Treat as link) Parse the link according to normal link rules
96 * False) (Bad link) Just output the raw wikitext (You may modify the text first)
98 * @param $ns Integer or String: the Namespace ID or regex pattern if SLH_PATTERN is set
99 * @param $callback Mixed: the callback function (and object) to use
100 * @param $flags Integer: a combination of the following flags:
101 * SLH_PATTERN Use a regex link pattern rather than a namespace
103 * @return callback|null The old callback function for this name, if any
105 public function setLinkHook( $ns, $callback, $flags = 0 ) {
106 if( $flags & SLH_PATTERN && !is_string($ns) )
107 throw new MWException( __METHOD__.'() expecting a regex string pattern.' );
108 elseif( $flags | ~SLH_PATTERN && !is_int($ns) )
109 throw new MWException( __METHOD__.'() expecting a namespace index.' );
110 $oldVal = isset( $this->mLinkHooks[$ns] ) ? $this->mLinkHooks[$ns][0] : null;
111 $this->mLinkHooks[$ns] = array( $callback, $flags );
112 return $oldVal;
116 * Get all registered link hook identifiers
118 * @return array
120 function getLinkHooks() {
121 return array_keys( $this->mLinkHooks );
125 * Process [[ ]] wikilinks
126 * @return LinkHolderArray
128 * @private
130 function replaceInternalLinks2( &$s ) {
131 wfProfileIn( __METHOD__ );
133 wfProfileIn( __METHOD__.'-setup' );
134 static $tc = FALSE, $titleRegex;//$e1, $e1_img;
135 if( !$tc ) {
136 # the % is needed to support urlencoded titles as well
137 $tc = Title::legalChars() . '#%';
138 # Match a link having the form [[namespace:link|alternate]]trail
139 //$e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
140 # Match cases where there is no "]]", which might still be images
141 //$e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
142 # Match a valid plain title
143 $titleRegex = "/^([{$tc}]+)$/sD";
146 $holders = new LinkHolderArray( $this );
148 if( is_null( $this->mTitle ) ) {
149 wfProfileOut( __METHOD__ );
150 wfProfileOut( __METHOD__.'-setup' );
151 throw new MWException( __METHOD__.": \$this->mTitle is null\n" );
154 wfProfileOut( __METHOD__.'-setup' );
156 $offset = 0;
157 $offsetStack = array();
158 $markers = new LinkMarkerReplacer( $this, $holders, array( &$this, 'replaceInternalLinksCallback' ) );
159 while( true ) {
160 $startBracketOffset = strpos( $s, '[[', $offset );
161 $endBracketOffset = strpos( $s, ']]', $offset );
162 # Finish when there are no more brackets
163 if( $startBracketOffset === false && $endBracketOffset === false ) break;
164 # Determine if the bracket is a starting or ending bracket
165 # When we find both, use the first one
166 elseif( $startBracketOffset !== false && $endBracketOffset !== false )
167 $isStart = $startBracketOffset <= $endBracketOffset;
168 # When we only found one, check which it is
169 else $isStart = $startBracketOffset !== false;
170 $bracketOffset = $isStart ? $startBracketOffset : $endBracketOffset;
171 if( $isStart ) {
172 /** Opening bracket **/
173 # Just push our current offset in the string onto the stack
174 $offsetStack[] = $startBracketOffset;
175 } else {
176 /** Closing bracket **/
177 # Pop the start pos for our current link zone off the stack
178 $startBracketOffset = array_pop($offsetStack);
179 # Just to clean up the code, lets place offsets on the outer ends
180 $endBracketOffset += 2;
182 # Only do logic if we actually have a opening bracket for this
183 if( isset($startBracketOffset) ) {
184 # Extract text inside the link
185 @list( $titleText, $paramText ) = explode('|',
186 substr($s, $startBracketOffset+2, $endBracketOffset-$startBracketOffset-4), 2);
187 # Create markers only for valid links
188 if( preg_match( $titleRegex, $titleText ) ) {
189 # Store the text for the marker
190 $marker = $markers->addMarker($titleText, $paramText);
191 # Replace the current link with the marker
192 $s = substr($s,0,$startBracketOffset).
193 $marker.
194 substr($s, $endBracketOffset);
195 # We have modified $s, because of this we need to set the
196 # offset manually since the end position is different now
197 $offset = $startBracketOffset+strlen($marker);
198 continue;
200 # ToDo: Some LinkHooks may allow recursive links inside of
201 # the link text, create a regex that also matches our
202 # <!-- LINKMARKER ### --> sequence in titles
203 # ToDO: Some LinkHooks use patterns rather than namespaces
204 # these need to be tested at this point here
208 # Bump our offset to after our current bracket
209 $offset = $bracketOffset+2;
213 # Now expand our tree
214 wfProfileIn( __METHOD__.'-expand' );
215 $s = $markers->expand( $s );
216 wfProfileOut( __METHOD__.'-expand' );
218 wfProfileOut( __METHOD__ );
219 return $holders;
222 function replaceInternalLinksCallback( $parser, $holders, $markers, $titleText, $paramText ) {
223 wfProfileIn( __METHOD__ );
224 $wt = isset($paramText) ? "[[$titleText|$paramText]]" : "[[$titleText]]";
225 wfProfileIn( __METHOD__."-misc" );
226 # Don't allow internal links to pages containing
227 # PROTO: where PROTO is a valid URL protocol; these
228 # should be external links.
229 if( preg_match('/^\b(?i:' . wfUrlProtocols() . ')/', $titleText) ) {
230 wfProfileOut( __METHOD__ );
231 return $wt;
234 # Make subpage if necessary
235 if( $this->areSubpagesAllowed() ) {
236 $titleText = $this->maybeDoSubpageLink( $titleText, $paramText );
239 # Check for a leading colon and strip it if it is there
240 $leadingColon = $titleText[0] == ':';
241 if( $leadingColon ) $titleText = substr( $titleText, 1 );
243 wfProfileOut( __METHOD__."-misc" );
244 # Make title object
245 wfProfileIn( __METHOD__."-title" );
246 $title = Title::newFromText( $this->mStripState->unstripNoWiki( $titleText ) );
247 if( !$title ) {
248 wfProfileOut( __METHOD__."-title" );
249 wfProfileOut( __METHOD__ );
250 return $wt;
252 $ns = $title->getNamespace();
253 wfProfileOut( __METHOD__."-title" );
255 # Default for Namespaces is a default link
256 # ToDo: Default for patterns is plain wikitext
257 $return = true;
258 if( isset( $this->mLinkHooks[$ns] ) ) {
259 list( $callback, $flags ) = $this->mLinkHooks[$ns];
260 if( $flags & SLH_PATTERN ) {
261 $args = array( $parser, $holders, $markers, $titleText, &$paramText, &$leadingColon );
262 } else {
263 $args = array( $parser, $holders, $markers, $title, $titleText, &$paramText, &$leadingColon );
265 # Workaround for PHP bug 35229 and similar
266 if ( !is_callable( $callback ) ) {
267 throw new MWException( "Tag hook for namespace $ns is not callable\n" );
269 $return = call_user_func_array( $callback, $args );
271 if( $return === true ) {
272 # True (treat as plain link) was returned, call the defaultLinkHook
273 $return = CoreLinkFunctions::defaultLinkHook( $parser, $holders, $markers, $title,
274 $titleText, $paramText, $leadingColon );
276 if( $return === false ) {
277 # False (no link) was returned, output plain wikitext
278 # Build it again as the hook is allowed to modify $paramText
279 $return = isset($paramText) ? "[[$titleText|$paramText]]" : "[[$titleText]]";
281 # Content was returned, return it
282 wfProfileOut( __METHOD__ );
283 return $return;
288 class LinkMarkerReplacer {
290 protected $markers, $nextId, $parser, $holders, $callback;
292 function __construct( $parser, $holders, $callback ) {
293 $this->nextId = 0;
294 $this->markers = array();
295 $this->parser = $parser;
296 $this->holders = $holders;
297 $this->callback = $callback;
300 function addMarker($titleText, $paramText) {
301 $id = $this->nextId++;
302 $this->markers[$id] = array( $titleText, $paramText );
303 return "<!-- LINKMARKER $id -->";
306 function findMarker( $string ) {
307 return (bool) preg_match('/<!-- LINKMARKER [0-9]+ -->/', $string );
310 function expand( $string ) {
311 return StringUtils::delimiterReplaceCallback( "<!-- LINKMARKER ", " -->", array( &$this, 'callback' ), $string );
314 function callback( $m ) {
315 $id = intval($m[1]);
316 if( !array_key_exists($id, $this->markers) ) return $m[0];
317 $args = $this->markers[$id];
318 array_unshift( $args, $this );
319 array_unshift( $args, $this->holders );
320 array_unshift( $args, $this->parser );
321 return call_user_func_array( $this->callback, $args );