3 * Modified version of the PHP parser with hooks for wiki links; experimental
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
25 * Parser with LinkHooks experiment
28 class Parser_LinkHooks
extends Parser
{
30 * Update this version number when the ParserOutput format
31 * changes in an incompatible way, so the parser cache
32 * can automatically discard old data.
34 const VERSION
= '1.6.4';
36 # Flags for Parser::setLinkHook
37 # Also available as global constants from Defines.php
38 const SLH_PATTERN
= 1;
40 # Constants needed for external link processing
41 # Everything except bracket, space, or control characters
42 const EXT_LINK_URL_CLASS
= '[^][<>"\\x00-\\x20\\x7F]';
43 const EXT_IMAGE_REGEX
= '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+)
44 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx';
57 public function __construct( $conf = array() ) {
58 parent
::__construct( $conf );
59 $this->mLinkHooks
= array();
63 * Do various kinds of initialisation on the first call of the parser
65 function firstCallInit() {
66 parent
::__construct();
67 if ( !$this->mFirstCall
) {
70 $this->mFirstCall
= false;
72 wfProfileIn( __METHOD__
);
74 $this->setHook( 'pre', array( $this, 'renderPreTag' ) );
75 CoreParserFunctions
::register( $this );
76 CoreLinkFunctions
::register( $this );
77 $this->initialiseVariables();
79 wfRunHooks( 'ParserFirstCallInit', array( &$this ) );
80 wfProfileOut( __METHOD__
);
84 * Create a link hook, e.g. [[Namepsace:...|display}}
85 * The callback function should have the form:
86 * function myLinkCallback( $parser, $holders, $markers,
87 * Title $title, $titleText, &$sortText = null, &$leadingColon = false ) { ... }
89 * Or with SLH_PATTERN:
90 * function myLinkCallback( $parser, $holders, $markers, )
91 * &$titleText, &$sortText = null, &$leadingColon = false ) { ... }
93 * The callback may either return a number of different possible values:
94 * String) Text result of the link
95 * True) (Treat as link) Parse the link according to normal link rules
96 * False) (Bad link) Just output the raw wikitext (You may modify the text first)
98 * @param $ns Integer or String: the Namespace ID or regex pattern if SLH_PATTERN is set
99 * @param $callback Mixed: the callback function (and object) to use
100 * @param $flags Integer: a combination of the following flags:
101 * SLH_PATTERN Use a regex link pattern rather than a namespace
103 * @throws MWException
104 * @return callback|null The old callback function for this name, if any
106 public function setLinkHook( $ns, $callback, $flags = 0 ) {
107 if( $flags & SLH_PATTERN
&& !is_string($ns) )
108 throw new MWException( __METHOD__
. '() expecting a regex string pattern.' );
109 elseif( $flags | ~SLH_PATTERN
&& !is_int( $ns ) )
110 throw new MWException( __METHOD__
. '() expecting a namespace index.' );
111 $oldVal = isset( $this->mLinkHooks
[$ns] ) ?
$this->mLinkHooks
[$ns][0] : null;
112 $this->mLinkHooks
[$ns] = array( $callback, $flags );
117 * Get all registered link hook identifiers
121 function getLinkHooks() {
122 return array_keys( $this->mLinkHooks
);
126 * Process [[ ]] wikilinks
128 * @throws MWException
129 * @return LinkHolderArray
133 function replaceInternalLinks2( &$s ) {
134 wfProfileIn( __METHOD__
);
136 wfProfileIn( __METHOD__
. '-setup' );
137 static $tc = false, $titleRegex; //$e1, $e1_img;
139 # the % is needed to support urlencoded titles as well
140 $tc = Title
::legalChars() . '#%';
141 # Match a link having the form [[namespace:link|alternate]]trail
142 //$e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
143 # Match cases where there is no "]]", which might still be images
144 //$e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
145 # Match a valid plain title
146 $titleRegex = "/^([{$tc}]+)$/sD";
149 $holders = new LinkHolderArray( $this );
151 if( is_null( $this->mTitle
) ) {
152 wfProfileOut( __METHOD__
. '-setup' );
153 wfProfileOut( __METHOD__
);
154 throw new MWException( __METHOD__
. ": \$this->mTitle is null\n" );
157 wfProfileOut( __METHOD__
. '-setup' );
160 $offsetStack = array();
161 $markers = new LinkMarkerReplacer( $this, $holders, array( &$this, 'replaceInternalLinksCallback' ) );
163 $startBracketOffset = strpos( $s, '[[', $offset );
164 $endBracketOffset = strpos( $s, ']]', $offset );
165 # Finish when there are no more brackets
166 if( $startBracketOffset === false && $endBracketOffset === false ) break;
167 # Determine if the bracket is a starting or ending bracket
168 # When we find both, use the first one
169 elseif( $startBracketOffset !== false && $endBracketOffset !== false )
170 $isStart = $startBracketOffset <= $endBracketOffset;
171 # When we only found one, check which it is
172 else $isStart = $startBracketOffset !== false;
173 $bracketOffset = $isStart ?
$startBracketOffset : $endBracketOffset;
175 /** Opening bracket **/
176 # Just push our current offset in the string onto the stack
177 $offsetStack[] = $startBracketOffset;
179 /** Closing bracket **/
180 # Pop the start pos for our current link zone off the stack
181 $startBracketOffset = array_pop( $offsetStack );
182 # Just to clean up the code, lets place offsets on the outer ends
183 $endBracketOffset +
= 2;
185 # Only do logic if we actually have a opening bracket for this
186 if( isset( $startBracketOffset ) ) {
187 # Extract text inside the link
188 @list
( $titleText, $paramText ) = explode( '|',
189 substr( $s, $startBracketOffset +
2, $endBracketOffset - $startBracketOffset - 4 ), 2 );
190 # Create markers only for valid links
191 if( preg_match( $titleRegex, $titleText ) ) {
192 # Store the text for the marker
193 $marker = $markers->addMarker( $titleText, $paramText );
194 # Replace the current link with the marker
195 $s = substr( $s, 0, $startBracketOffset ) .
197 substr( $s, $endBracketOffset );
198 # We have modified $s, because of this we need to set the
199 # offset manually since the end position is different now
200 $offset = $startBracketOffset+
strlen( $marker );
203 # ToDo: Some LinkHooks may allow recursive links inside of
204 # the link text, create a regex that also matches our
205 # <!-- LINKMARKER ### --> sequence in titles
206 # ToDO: Some LinkHooks use patterns rather than namespaces
207 # these need to be tested at this point here
210 # Bump our offset to after our current bracket
211 $offset = $bracketOffset+
2;
214 # Now expand our tree
215 wfProfileIn( __METHOD__
. '-expand' );
216 $s = $markers->expand( $s );
217 wfProfileOut( __METHOD__
. '-expand' );
219 wfProfileOut( __METHOD__
);
223 function replaceInternalLinksCallback( $parser, $holders, $markers, $titleText, $paramText ) {
224 wfProfileIn( __METHOD__
);
225 $wt = isset( $paramText ) ?
"[[$titleText|$paramText]]" : "[[$titleText]]";
226 wfProfileIn( __METHOD__
. "-misc" );
228 # Don't allow internal links to pages containing
229 # PROTO: where PROTO is a valid URL protocol; these
230 # should be external links.
231 if( preg_match( '/^\b(?i:' . wfUrlProtocols() . ')/', $titleText ) ) {
232 wfProfileOut( __METHOD__
. "-misc" );
233 wfProfileOut( __METHOD__
);
237 # Make subpage if necessary
238 if( $this->areSubpagesAllowed() ) {
239 $titleText = $this->maybeDoSubpageLink( $titleText, $paramText );
242 # Check for a leading colon and strip it if it is there
243 $leadingColon = $titleText[0] == ':';
244 if( $leadingColon ) $titleText = substr( $titleText, 1 );
246 wfProfileOut( __METHOD__
. "-misc" );
248 wfProfileIn( __METHOD__
. "-title" );
249 $title = Title
::newFromText( $this->mStripState
->unstripNoWiki( $titleText ) );
251 wfProfileOut( __METHOD__
. "-title" );
252 wfProfileOut( __METHOD__
);
255 $ns = $title->getNamespace();
256 wfProfileOut( __METHOD__
. "-title" );
258 # Default for Namespaces is a default link
259 # ToDo: Default for patterns is plain wikitext
261 if( isset( $this->mLinkHooks
[$ns] ) ) {
262 list( $callback, $flags ) = $this->mLinkHooks
[$ns];
263 if( $flags & SLH_PATTERN
) {
264 $args = array( $parser, $holders, $markers, $titleText, &$paramText, &$leadingColon );
266 $args = array( $parser, $holders, $markers, $title, $titleText, &$paramText, &$leadingColon );
268 # Workaround for PHP bug 35229 and similar
269 if ( !is_callable( $callback ) ) {
270 throw new MWException( "Tag hook for namespace $ns is not callable\n" );
272 $return = call_user_func_array( $callback, $args );
274 if( $return === true ) {
275 # True (treat as plain link) was returned, call the defaultLinkHook
276 $return = CoreLinkFunctions
::defaultLinkHook( $parser, $holders, $markers, $title,
277 $titleText, $paramText, $leadingColon );
279 if( $return === false ) {
280 # False (no link) was returned, output plain wikitext
281 # Build it again as the hook is allowed to modify $paramText
282 $return = isset( $paramText ) ?
"[[$titleText|$paramText]]" : "[[$titleText]]";
284 # Content was returned, return it
285 wfProfileOut( __METHOD__
);
291 class LinkMarkerReplacer
{
293 protected $markers, $nextId, $parser, $holders, $callback;
295 function __construct( $parser, $holders, $callback ) {
297 $this->markers
= array();
298 $this->parser
= $parser;
299 $this->holders
= $holders;
300 $this->callback
= $callback;
303 function addMarker( $titleText, $paramText ) {
304 $id = $this->nextId++
;
305 $this->markers
[$id] = array( $titleText, $paramText );
306 return "<!-- LINKMARKER $id -->";
309 function findMarker( $string ) {
310 return (bool) preg_match( '/<!-- LINKMARKER [0-9]+ -->/', $string );
313 function expand( $string ) {
314 return StringUtils
::delimiterReplaceCallback( "<!-- LINKMARKER ", " -->", array( &$this, 'callback' ), $string );
317 function callback( $m ) {
318 $id = intval( $m[1] );
319 if( !array_key_exists( $id, $this->markers
) ) return $m[0];
320 $args = $this->markers
[$id];
321 array_unshift( $args, $this );
322 array_unshift( $args, $this->holders
);
323 array_unshift( $args, $this->parser
);
324 return call_user_func_array( $this->callback
, $args );