3 /* private */ var $mText, # Text to be processed by the tokenizer
4 $mPos, # current position of tokenizer in text
5 $mTextLength, # Length of $mText
6 $mCount, # token count, computed in preParse
7 $mMatch, # matches of tokenizer regex, computed in preParse
8 $mMatchPos; # current token position of tokenizer. Each match can
9 # be up to two tokens: A matched token and the text after it.
11 /* private */ function Tokenizer()
17 function newFromString( $s )
22 $t->mTextLength
= strlen( $s );
30 # build up the regex, step by step.
31 # Basic features: Quotes for <em>/<strong> and hyphens for <hr>
32 $regex = "\'\'\'\'\'|\'\'\'|\'\'|\n-----*";
33 # Append regex for linkPrefixExtension
34 if ( $wgLang->linkPrefixExtension() ) {
35 $regex .= "|([a-zA-Z\x80-\xff]+)\[\[";
37 # end tag that can start with 3 [
42 # Magic words that automatically generate links
43 $regex .= "|ISBN |RFC ";
44 # Language-specific additions
45 $regex .= $wgLang->tokenizerRegex();
47 $regex = "/(" . $regex . ")/";
49 # Apply the regex to the text
50 $this->mCount
= preg_match_all( $regex, $this->mText
, $this->mMatch
,
51 PREG_PATTERN_ORDER|PREG_OFFSET_CAPTURE
);
57 $token = $this->previewToken();
59 $this->mMatchPos
= $token["mMatchPos"];
60 $this->mPos
= $token["mPos"];
66 function previewToken()
68 if ( $this->mMatchPos
< $this->mCount
) {
69 $token["pos"] = $this->mPos
;
70 if ( $this->mPos
< $this->mMatch
[0][$this->mMatchPos
][1] ) {
71 $token["type"] = "text";
72 $token["text"] = substr( $this->mText
, $this->mPos
,
73 $this->mMatch
[0][$this->mMatchPos
][1] - $this->mPos
);
74 # What the pointers would change to if this would not just be a preview
75 $token["mMatchPos"] = $this->mMatchPos
;
76 $token["mPos"] = $this->mMatch
[0][$this->mMatchPos
][1];
78 # If linkPrefixExtension is set, $this->mMatch[2][$this->mMatchPos][0]
79 # contains the link prefix, or is null if no link prefix exist.
80 if ( isset( $this->mMatch
[2] ) && $this->mMatch
[2][$this->mMatchPos
][0] )
82 # prefixed link open tag, [0] is "prefix[["
83 $token["type"] = "[[";
84 $token["text"] = $this->mMatch
[2][$this->mMatchPos
][0]; # the prefix
86 $token["type"] = $this->mMatch
[0][$this->mMatchPos
][0];
87 if ( substr($token["type"],1,4) == "----" )
89 # any number of hyphens bigger than four is a <HR>.
91 $token["type"]="----";
94 # What the pointers would change to if this would not just be a preview
95 $token["mPos"] = $this->mPos +
strlen( $this->mMatch
[0][$this->mMatchPos
][0] );
96 $token["mMatchPos"] = $this->mMatchPos +
1;
98 } elseif ( $this->mPos
< $this->mTextLength
) {
99 $token["type"] = "text";
100 $token["text"] = substr( $this->mText
, $this->mPos
);
101 # What the pointers would change to if this would not just be a preview
102 $token["mPos"] = $this->mTextLength
;
103 $token["mMatchPos"] = $this->mMatchPos
;