don't parse blank ISBNs

[lhc/web/wiklou.git] / includes / Tokenizer.php
diff --git a/includes/Tokenizer.php b/includes/Tokenizer.php

index 129d5e0..84a2f06 100644 (file)
--- a/includes/Tokenizer.php
+++ b/includes/Tokenizer.php
@@ -1,4 +1,13 @@
  <?php
+/**
+ *
+ * @package MediaWiki
+ */
+
+/**
+ *
+ * @package MediaWiki
+ */
  class Tokenizer {
         /* private */ var $mText,               # Text to be processed by the tokenizer
                           $mPos,                # current position of tokenizer in text
@@ -6,8 +15,11 @@ class Tokenizer {
                           $mQueuedToken;        # Tokens that were already found, but not
                                                 # returned yet.
  
-       /* private */ function Tokenizer()
-       {
+       /**
+        * Constructor
+        * @access private
+        */
+       function Tokenizer() {
                 global $wgLang;
  
                 $this->mPos=0;
@@ -15,9 +27,10 @@ class Tokenizer {
                 $this->linkPrefixExtension = $wgLang->linkPrefixExtension();
         }
  
-       # factory function
-       function newFromString( $s )
-       {
+       /**
+        * factory function
+        */
+       function newFromString( $s ) {
                 $fname = 'Tokenizer::newFromString';
                 wfProfileIn( $fname );
  
@@ -30,13 +43,14 @@ class Tokenizer {
         }
  
  
-       // Return the next token, but do not increase the pointer. The next call
-       // to previewToken or nextToken will return the same token again.
-       // Actually, the pointer is increased, but the token is queued. The next
-       // call to previewToken or nextToken will check the queue and return
-       // the stored token.
-       function previewToken()
-       {
+       /**
+        * Return the next token, but do not increase the pointer. The next call
+        * to previewToken or nextToken will return the same token again.
+        * Actually, the pointer is increased, but the token is queued. The next
+        * call to previewToken or nextToken will check the queue and return
+        * the stored token.
+        */
+       function previewToken() {
                 $fname = 'Tokenizer::previewToken';
                 wfProfileIn( $fname );
  
@@ -53,13 +67,15 @@ class Tokenizer {
         }
  
  
-       // get the next token
-       // proceeds character by character through the text, looking for characters needing
-       // special attention. Those are currently: I, R, ', [, ], newline
-       //
-       // TODO:  handling of French blanks not yet implemented
-       function nextToken()
-       {
+       /**
+        * Get the next token.
+        *
+        * proceeds character by character through the text, looking for characters needing
+        * special attention. Those are currently: I, R, ', [, ], newline
+        *
+        * @todo handling of French blanks not yet implemented
+        */
+       function nextToken() {
                 $fname = 'Tokenizer::nextToken';
                 wfProfileIn( $fname );
  
@@ -67,7 +83,7 @@ class Tokenizer {
                         // still one token from the last round around. Return that one first.
                         $token = array_shift( $this->mQueuedToken );
                 } else if ( $this->mPos > $this->mTextLength ) {
-                       // If no text is left, return "false".
+                       // If no text is left, return 'false'.
                         $token = false;
                 } else {
  
@@ -185,7 +201,7 @@ class Tokenizer {
                                                         // strip blank from Token
                                                         $token['text'] = substr( $token['text'], 0, -1 );
                                                         $queueToken['type'] = 'blank';
-                                                       $queueToken['text'] = " {$ch}";
+                                                       $queueToken['text'] = ' '.$ch;
                                                         $this->mQueuedToken[] = $queueToken;
                                                         $this->mPos ++;
                                                         break 2; // switch + while
@@ -202,7 +218,7 @@ class Tokenizer {
                                         case '8':
                                         case '9':
                                                 if (    ($this->mTextLength >= $this->mPos +2)
-                                                    && ($this->mText[$this->mPos+1] == " ")
+                                                    && ($this->mText[$this->mPos+1] == ' ')
                                                      && ctype_digit( $this->mText[$this->mPos+2] ) )
                                                 {
                                                         $queueToken['type'] = 'blank';
@@ -235,10 +251,10 @@ class Tokenizer {
                                                 break;
                                         case '&': //extensions like <timeline>, since HTML stripping has already been done, 
                                                   //those look like &lt;timeline&gt;
-                                               if ( $this->continues( "lt;timeline&gt;" ) )
+                                               if ( $this->continues( 'lt;timeline&gt;' ) )
                                                 {
-                                                       $queueToken['type'] = "<timeline>";
-                                                       $queueToken['text'] = "&lt;timeline&gt;";
+                                                       $queueToken['type'] = '<timeline>';
+                                                       $queueToken['text'] = '&lt;timeline&gt;';
                                                         $this->mQueuedToken[] = $queueToken;
                                                         $this->mPos += 16;
                                                         break 2; // switch + while
@@ -256,10 +272,14 @@ class Tokenizer {
                 return $token;
         }
  
-       // function continues
-       // checks whether the mText continues with $cont from mPos+1
-       /* private */ function continues( $cont )
-       {
+       /**
+        * function continues
+        *
+        * checks whether the mText continues with $cont from mPos+1
+        *
+        * @access private
+        */
+       function continues( $cont ) {
                 // If string is not long enough to contain $cont, return false
                 if ( $this->mTextLength < $this->mPos + strlen( $cont ) )
                         return false;
@@ -271,10 +291,14 @@ class Tokenizer {
                 return true;
         }
  
-       // function preceeded
-       // checks whether the mText is preceeded by $prec at position mPos
-       /* private */ function preceeded( $prec )
-       {
+       /**
+        * function preceeded
+        *
+        * checks whether the mText is preceeded by $prec at position mPos
+        *
+        * @access private
+        */
+       function preceeded( $prec ) {
                 $len = strlen( $prec );
                 // if $prec is longer than the text up to mPos, return false
                 if ( $this->mPos < $len )
@@ -282,8 +306,10 @@ class Tokenizer {
                 return ( 0 == strcmp( $prec, substr($this->mText, $this->mPos-$len, $len) ) );
         }
  
-       function readAllUntil( $border )
-       {
+       /**
+        *
+        */
+       function readAllUntil( $border ) {
                 $n = strpos( $this->mText, $border, $this->mPos );
                 if ( $n === false )
                         return '';