3 /* private */ var $mText, # Text to be processed by the tokenizer
4 $mPos, # current position of tokenizer in text
5 $mTextLength, # Length of $mText
6 $mCount, # token count, computed in preParse
7 $mMatch, # matches of tokenizer regex, computed in preParse
8 $mMatchPos; # current token position of tokenizer. Each match can
9 # be up to two tokens: A matched token and the text after it.
11 /* private */ function Tokenizer()
17 function newFromString( $s )
22 $t->mTextLength
= strlen( $s );
30 # build up the regex, step by step.
31 # Basic features: Quotes for <em>/<strong> and hyphens for <hr>
32 $regex = "\'\'\'\'\'|\'\'\'|\'\'|\n-----*";
33 # Append regex for linkPrefixExtension
34 if ( $wgLang->linkPrefixExtension() ) {
35 $regex .= "|([a-zA-Z\x80-\xff]+)\[\[";
41 # Magic words that automatically generate links
42 $regex .= "|ISBN |RFC ";
43 # Language-specific additions
44 $regex .= $wgLang->tokenizerRegex();
46 $regex = "/(" . $regex . ")/";
48 # Apply the regex to the text
49 $this->mCount
= preg_match_all( $regex, $this->mText
, $this->mMatch
,
50 PREG_PATTERN_ORDER|PREG_OFFSET_CAPTURE
);
56 $token = $this->previewToken();
58 $this->mMatchPos
= $token["mMatchPos"];
59 $this->mPos
= $token["mPos"];
65 function previewToken()
67 if ( $this->mMatchPos
<= $this->mCount
) {
68 $token["pos"] = $this->mPos
;
69 if ( $this->mPos
< $this->mMatch
[0][$this->mMatchPos
][1] ) {
70 $token["type"] = "text";
71 $token["text"] = substr( $this->mText
, $this->mPos
,
72 $this->mMatch
[0][$this->mMatchPos
][1] - $this->mPos
);
73 # What the pointers would change to if this would not just be a preview
74 $token["mMatchPos"] = $this->mMatchPos
;
75 $token["mPos"] = $this->mMatch
[0][$this->mMatchPos
][1];
77 # If linkPrefixExtension is set, $this->mMatch[2][$this->mMatchPos][0]
78 # contains the link prefix, or is null if no link prefix exist.
79 if ( $this->mMatch
[2][$this->mMatchPos
][0] )
81 # prefixed link open tag, [0] is "prefix[["
82 $token["type"] = "[[";
83 $token["text"] = $this->mMatch
[2][$this->mMatchPos
][0]; # the prefix
85 $token["type"] = $this->mMatch
[0][$this->mMatchPos
][0];
86 if ( substr($token["type"],1,4) == "----" )
88 # any number of hyphens bigger than four is a <HR>.
90 $token["type"]="----";
93 # What the pointers would change to if this would not just be a preview
94 $token["mPos"] = $this->mPos +
strlen( $this->mMatch
[0][$this->mMatchPos
][0] );
95 $token["mMatchPos"] = $this->mMatchPos +
1;
97 } elseif ( $this->mPos
< $this->mTextLength
) {
98 $token["type"] = "text";
99 $token["text"] = substr( $this->mText
, $this->mPos
);
100 # What the pointers would change to if this would not just be a preview
101 $token["mPos"] = $this->mTextLength
;
102 $token["mMatchPos"] = $this->mMatchPos
;