Initial import; Bengali (incomplete) from meta
[mediawiki.git] / includes / Tokenizer.php
blob4e8cf560e8b196b618498cc9d7b270d54c7e74fe
1 <?php
2 class Tokenizer {
3 /* private */ var $mText, $mPos, $mTextLength;
4 /* private */ var $mCount, $mM, $mMPos; # What is M meant to stand for?
6 /* private */ function Tokenizer()
8 $this->mPos=0;
11 # factory function
12 function newFromString( $s )
14 $t = new Tokenizer();
15 $t->mText = $s;
16 $t->preParse();
17 $t->mTextLength = strlen( $s );
18 return $t;
21 function preParse()
23 $this->mCount = preg_match_all( "/(\[\[|\]\]|\'\'\'\'\'|\'\'\'|\'\')/",
24 $this->mText, $this->mM,
25 PREG_PATTERN_ORDER|PREG_OFFSET_CAPTURE);
26 $this->mMPos=0;
29 function nextToken()
31 $token = $this->previewToken();
32 if ( $token ) {
33 if ( $token["type"] == "text" ) {
34 $this->mPos = $token["mPos"];
35 } else {
36 $this->mMPos = $token["mMPos"];
37 $this->mPos = $token["mPos"];
40 return $token;
44 function previewToken()
46 if ( $this->mMPos <= $this->mCount ) {
47 $token["pos"] = $this->mPos;
48 if ( $this->mPos < $this->mM[0][$this->mMPos][1] ) {
49 $token["type"] = "text";
50 $token["text"] = substr( $this->mText, $this->mPos,
51 $this->mM[0][$this->mMPos][1] - $this->mPos );
52 $token["mPos"] = $this->mM[0][$this->mMPos][1];
53 } else {
54 $token["type"] = $this->mM[0][$this->mMPos][0];
55 $token["mPos"] = $this->mPos + strlen($token["type"]);
56 $token["mMPos"] = $this->mMPos + 1;
58 } elseif ( $this->mPos < $this->mTextLength ) {
59 $token["type"] = "text";
60 $token["text"] = substr( $this->mText, $this->mPos );
61 $token["mPos"] = $this->mTextLength;
62 } else {
63 $token = FALSE;
65 return $token;