* Add wfReadOnlyReason() to supply the reason for the wiki being read only. Use it...

[lhc/web/wiklou.git] / includes / SearchUpdate.php
diff --git a/includes/SearchUpdate.php b/includes/SearchUpdate.php

index 08f66ea..849d6dc 100644 (file)
--- a/includes/SearchUpdate.php
+++ b/includes/SearchUpdate.php
@@ -1,13 +1,14 @@
  <?php
  <?php
-# See deferred.doc
-
+/**
+ * See deferred.txt
+ * @addtogroup Search
+ */
  class SearchUpdate {
  
         /* private */ var $mId = 0, $mNamespace, $mTitle, $mText;
         /* private */ var $mTitleWords;
  
  class SearchUpdate {
  
         /* private */ var $mId = 0, $mNamespace, $mTitle, $mText;
         /* private */ var $mTitleWords;
  
-       function SearchUpdate( $id, $title, $text = false )
-       {
+       function SearchUpdate( $id, $title, $text = false ) {
                 $nt = Title::newFromText( $title );
                 if( $nt ) {
                         $this->mId = $id;
                 $nt = Title::newFromText( $title );
                 if( $nt ) {
                         $this->mId = $id;
@@ -22,30 +23,33 @@ class SearchUpdate {
                 }
         }
  
                 }
         }
  
-       function doUpdate()
-       {
-               global $wgDBminWordLen, $wgLang, $wgDisableSearchUpdate;
+       function doUpdate() {
+               global $wgContLang, $wgDisableSearchUpdate;
  
                 if( $wgDisableSearchUpdate || !$this->mId ) {
                         return false;
                 }
  
                 if( $wgDisableSearchUpdate || !$this->mId ) {
                         return false;
                 }
-               $lc = SearchEngine::legalSearchChars() . "&#;";
-               if( $this->mText == false ) {
-                       # Just update the title
-                       $sql = "UPDATE LOW_PRIORITY searchindex SET si_title='" .
-                         wfStrencode( Title::indexTitle( $this->mNamespace, $this->mTitle ) ) .
-                         "' WHERE si_page={$this->mId}";
-                       wfQuery( $sql, DB_WRITE, "SearchUpdate::doUpdate" );
+               $fname = 'SearchUpdate::doUpdate';
+               wfProfileIn( $fname );
+
+               $search = SearchEngine::create();
+               $lc = SearchEngine::legalSearchChars() . '&#;';
+
+               if( $this->mText === false ) {
+                       $search->updateTitle($this->mId,
+                               Title::indexTitle( $this->mNamespace, $this->mTitle ));
+                       wfProfileOut( $fname );
                         return;
                 }
  
                 # Language-specific strip/conversion
                         return;
                 }
  
                 # Language-specific strip/conversion
-               $text = $wgLang->stripForSearch( $this->mText );
+               $text = $wgContLang->stripForSearch( $this->mText );
  
  
+               wfProfileIn( $fname.'-regexps' );
                 $text = preg_replace( "/<\\/?\\s*[A-Za-z][A-Za-z0-9]*\\s*([^>]*?)>/",
                 $text = preg_replace( "/<\\/?\\s*[A-Za-z][A-Za-z0-9]*\\s*([^>]*?)>/",
-                 " ", strtolower( " " . $text /*$this->mText*/ . " " ) ); # Strip HTML markup
-               $text = preg_replace( "/(^|\\n)\\s*==\\s+([^\\n]+)\\s+==\\s/sD",
-                 "\\2 \\2 \\2 ", $text ); # Emphasize headings
+                 ' ', strtolower( " " . $text /*$this->mText*/ . " " ) ); # Strip HTML markup
+               $text = preg_replace( "/(^|\\n)==\\s*([^\\n]+)\\s*==(\\s)/sD",
+                 "\\1\\2 \\2 \\2\\3", $text ); # Emphasize headings
  
                 # Strip external URLs
                 $uc = "A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\xA0-\\xFF";
  
                 # Strip external URLs
                 $uc = "A-Za-z0-9_\\/:.,~%\\-+&;#?!=()@\\xA0-\\xFF";
@@ -69,17 +73,43 @@ class SearchUpdate {
                 $text = preg_replace( "/[^{$lc}]+/", " ", $text );
  
                 # Handle 's, s'
                 $text = preg_replace( "/[^{$lc}]+/", " ", $text );
  
                 # Handle 's, s'
-               $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text );
-               $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text );
+               #
+               #   $text = preg_replace( "/([{$lc}]+)'s /", "\\1 \\1's ", $text );
+               #   $text = preg_replace( "/([{$lc}]+)s' /", "\\1s ", $text );
+               #
+               # These tail-anchored regexps are insanely slow. The worst case comes
+               # when Japanese or Chinese text (ie, no word spacing) is written on
+               # a wiki configured for Western UTF-8 mode. The Unicode characters are
+               # expanded to hex codes and the "words" are very long paragraph-length
+               # monstrosities. On a large page the above regexps may take over 20
+               # seconds *each* on a 1GHz-level processor.
+               #
+               # Following are reversed versions which are consistently fast
+               # (about 3 milliseconds on 1GHz-level processor).
+               #
+               $text = strrev( preg_replace( "/ s'([{$lc}]+)/", " s'\\1 \\1", strrev( $text ) ) );
+               $text = strrev( preg_replace( "/ 's([{$lc}]+)/", " s\\1", strrev( $text ) ) );
  
                 # Strip wiki '' and '''
                 $text = preg_replace( "/''[']*/", " ", $text );
  
                 # Strip wiki '' and '''
                 $text = preg_replace( "/''[']*/", " ", $text );
+               wfProfileOut( "$fname-regexps" );
+
+               wfRunHooks( 'SearchUpdate', array( $this->mId, $this->mNamespace, $this->mTitle, &$text ) );
                 
                 
-               $sql = "REPLACE DELAYED INTO searchindex (si_page,si_title,si_text) VALUES ({$this->mId},'" .
-                 wfStrencode( Title::indexTitle( $this->mNamespace, $this->mTitle ) ) . "','" .
-                 wfStrencode( $text ) . "')";
-               wfQuery( $sql, DB_WRITE, "SearchUpdate::doUpdate" );
+               # Perform the actual update
+               $search->update($this->mId, Title::indexTitle( $this->mNamespace, $this->mTitle ),
+                               $text);
+               
+               wfProfileOut( $fname );
         }
  }
  
         }
  }
  
-?>
+/**
+ * Placeholder class
+ * @addtogroup Search
+ */
+class SearchUpdateMyISAM extends SearchUpdate {
+       # Inherits everything
+}
+
+