Move up to date the parser test expectation.

[lhc/web/wiklou.git] / maintenance / importUseModWiki.php
diff --git a/maintenance/importUseModWiki.php b/maintenance/importUseModWiki.php

index 46eed2b..e8463e2 100644 (file)
--- a/maintenance/importUseModWiki.php
+++ b/maintenance/importUseModWiki.php
@@ -22,12 +22,13 @@
   * 2005-03-14
   *
   * @todo document
- * @package MediaWiki
- * @subpackage Maintenance
+ * @file
+ * @ingroup Maintenance
   */
  
-if( php_sapi_name() != 'cli' ) {
-       die( "Please customize the settings and run me from the command line." );
+if ( php_sapi_name() != 'cli' ) {
+       echo "Please customize the settings and run me from the command line.";
+       die( -1 );
  }
  
  /** Set these correctly! */
@@ -40,12 +41,12 @@ $wgRootDirectory = "/kalman/Projects/wiki2002/wiki/lib-http/db/wiki";
  /* globals */
  $wgFieldSeparator = "\xb3"; # Some wikis may use different char
         $FS = $wgFieldSeparator ;
-       $FS1 = $FS."1" ;
-       $FS2 = $FS."2" ;
-       $FS3 = $FS."3" ;
+       $FS1 = $FS . "1" ;
+       $FS2 = $FS . "2" ;
+       $FS3 = $FS . "3" ;
  
  # Unicode sanitization tools
-require_once( '../includes/normal/UtfNormal.php' );
+require_once( dirname( dirname( __FILE__ ) ) . '/includes/normal/UtfNormal.php' );
  
  $usercache = array();
  
@@ -56,9 +57,9 @@ importPages();
  function importPages()
  {
         global $wgRootDirectory;
-       
+
         $gt = '>';
-       echo <<<END
+       echo <<<XML
  <?xml version="1.0" encoding="UTF-8" ?$gt
  <mediawiki xmlns="http://www.mediawiki.org/xml/export-0.1/"
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
@@ -68,32 +69,33 @@ function importPages()
             xml:lang="en">
  <!-- generated by importUseModWiki.php -->
  
-END;
+XML;
         $letters = array(
                 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
                 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
                 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'other' );
-       foreach( $letters as $letter ) {
+       foreach ( $letters as $letter ) {
                 $dir = "$wgRootDirectory/page/$letter";
-               if( is_dir( $dir ) )
+               if ( is_dir( $dir ) )
                         importPageDirectory( $dir );
         }
-       echo <<<END
+       echo <<<XML
  </mediawiki>
  
-END;
+XML;
  }
  
  function importPageDirectory( $dir, $prefix = "" )
  {
         echo "\n<!-- Checking page directory " . xmlCommentSafe( $dir ) . " -->\n";
         $mydir = opendir( $dir );
-       while( $entry = readdir( $mydir ) ) {
-               if( preg_match( '/^(.+)\.db$/', $entry, $m ) ) {
+       while ( $entry = readdir( $mydir ) ) {
+               $m = array();
+               if ( preg_match( '/^(.+)\.db$/', $entry, $m ) ) {
                         echo importPage( $prefix . $m[1] );
                 } else {
-                       if( is_dir( "$dir/$entry" ) ) {
-                               if( $entry != '.' && $entry != '..' ) {
+                       if ( is_dir( "$dir/$entry" ) ) {
+                               if ( $entry != '.' && $entry != '..' ) {
                                         importPageDirectory( "$dir/$entry", "$entry/" );
                                 }
                         } else {
@@ -112,7 +114,7 @@ function importPageDirectory( $dir, $prefix = "" )
  
  function useModFilename( $title ) {
         $c = substr( $title, 0, 1 );
-       if(preg_match( '/[A-Z]/i', $c ) ) {
+       if ( preg_match( '/[A-Z]/i', $c ) ) {
                 return strtoupper( $c ) . "/$title";
         }
         return "other/$title";
@@ -120,17 +122,18 @@ function useModFilename( $title ) {
  
  function fetchPage( $title )
  {
-       global $FS,$FS1,$FS2,$FS3, $wgRootDirectory;
-       
+       global $FS1, $FS2, $FS3, $wgRootDirectory;
+
         $fname = $wgRootDirectory . "/page/" . useModFilename( $title ) . ".db";
-       if( !file_exists( $fname ) ) {
-               die( "Couldn't open file '$fname' for page '$title'.\n" );
+       if ( !file_exists( $fname ) ) {
+               echo "Couldn't open file '$fname' for page '$title'.\n";
+               die( -1 );
         }
-       
+
         $page = splitHash( $FS1, file_get_contents( $fname ) );
         $section = splitHash( $FS2, $page["text_default"] );
         $text = splitHash( $FS3, $section["data"] );
-       
+
         return array2object( array( "text" => $text["text"] , "summary" => $text["summary"] ,
                 "minor" => $text["minor"] , "ts" => $section["ts"] ,
                 "username" => $section["username"] , "host" => $section["host"] ) );
@@ -138,19 +141,19 @@ function fetchPage( $title )
  
  function fetchKeptPages( $title )
  {
-       global $FS,$FS1,$FS2,$FS3, $wgRootDirectory, $wgTimezoneCorrection;
-       
+       global $FS1, $FS2, $FS3, $wgRootDirectory;
+
         $fname = $wgRootDirectory . "/keep/" . useModFilename( $title ) . ".kp";
-       if( !file_exists( $fname ) ) return array();
-       
+       if ( !file_exists( $fname ) ) return array();
+
         $keptlist = explode( $FS1, file_get_contents( $fname ) );
         array_shift( $keptlist ); # Drop the junk at beginning of file
-       
+
         $revisions = array();
-       foreach( $keptlist as $rev ) {
+       foreach ( $keptlist as $rev ) {
                 $section = splitHash( $FS2, $rev );
                 $text = splitHash( $FS3, $section["data"] );
-               if ( $text["text"] && $text["minor"] != "" && ( $section["ts"]*1 > 0 ) ) {
+               if ( $text["text"] && $text["minor"] != "" && ( $section["ts"] * 1 > 0 ) ) {
                         array_push( $revisions, array2object( array ( "text" => $text["text"] , "summary" => $text["summary"] ,
                                 "minor" => $text["minor"] , "ts" => $section["ts"] ,
                                 "username" => $section["username"] , "host" => $section["host"] ) ) );
@@ -164,7 +167,7 @@ function fetchKeptPages( $title )
  function splitHash ( $sep , $str ) {
         $temp = explode ( $sep , $str ) ;
         $ret = array () ;
-       for ( $i = 0; $i+1 < count ( $temp ) ; $i++ ) {
+       for ( $i = 0; $i + 1 < count ( $temp ) ; $i++ ) {
                 $ret[$temp[$i]] = $temp[++$i] ;
                 }
         return $ret ;
@@ -179,8 +182,8 @@ function checkUserCache( $name, $host )
  {
         global $usercache;
  
-       if( $name ) {
-               if( in_array( $name, $usercache ) ) {
+       if ( $name ) {
+               if ( in_array( $name, $usercache ) ) {
                         $userid = $usercache[$name];
                 } else {
                         # If we haven't imported user accounts
@@ -197,14 +200,14 @@ function checkUserCache( $name, $host )
  function importPage( $title )
  {
         global $usercache;
-       
+
         echo "\n<!-- Importing page " . xmlCommentSafe( $title ) . " -->\n";
         $page = fetchPage( $title );
  
         $newtitle = xmlsafe( str_replace( '_', ' ', recodeText( $title ) ) );
-       
+
         $munged = mungeFormat( $page->text );
-       if( $munged != $page->text ) {
+       if ( $munged != $page->text ) {
                 /**
                  * Save a *new* revision with the conversion, and put the
                  * previous last version into the history.
@@ -224,27 +227,27 @@ function importPage( $title )
                  */
                 $revisions = array( $page );
         }
-       $xml = <<<END
+       $xml = <<<XML
         <page>
                 <title>$newtitle</title>
  
-END;
+XML;
  
         # History
         $revisions = array_merge( $revisions, fetchKeptPages( $title ) );
-       if(count( $revisions ) == 0 ) {
-               return $sql;
+       if ( count( $revisions ) == 0 ) {
+               return NULL; // Was "$sql", which does not appear to be defined.
         }
-       
-       foreach( $revisions as $rev ) {
+
+       foreach ( $revisions as $rev ) {
                 $text      = xmlsafe( recodeText( $rev->text ) );
-               $minor     = ($rev->minor ? '<minor/>' : '');
-               list( $userid, $username ) = checkUserCache( $rev->username, $rev->host );
+               $minor     = ( $rev->minor ? '<minor/>' : '' );
+               list( /* $userid */ , $username ) = checkUserCache( $rev->username, $rev->host );
                 $username  = xmlsafe( recodeText( $username ) );
                 $timestamp = xmlsafe( timestamp2ISO8601( $rev->ts ) );
                 $comment   = xmlsafe( recodeText( $rev->summary ) );
-               
-               $xml .= <<<END
+
+               $xml .= <<<XML
                 <revision>
                         <timestamp>$timestamp</timestamp>
                         <contributor><username>$username</username></contributor>
@@ -253,7 +256,7 @@ END;
                         <text>$text</text>
                 </revision>
  
-END;
+XML;
         }
         $xml .= "</page>\n\n";
         return $xml;
@@ -269,22 +272,22 @@ function recodeText( $string ) {
         return $string;
  }
  
-function wfUtf8Sequence($codepoint) {
-       if($codepoint <     0x80) return chr($codepoint);
-       if($codepoint <    0x800) return chr($codepoint >>  6 & 0x3f | 0xc0) .
-                                     chr($codepoint       & 0x3f | 0x80);
-    if($codepoint <  0x10000) return chr($codepoint >> 12 & 0x0f | 0xe0) .
-                                     chr($codepoint >>  6 & 0x3f | 0x80) .
-                                     chr($codepoint       & 0x3f | 0x80);
-       if($codepoint < 0x100000) return chr($codepoint >> 18 & 0x07 | 0xf0) . # Double-check this
-                                        chr($codepoint >> 12 & 0x3f | 0x80) .
-                                     chr($codepoint >>  6 & 0x3f | 0x80) .
-                                     chr($codepoint       & 0x3f | 0x80);
+function wfUtf8Sequence( $codepoint ) {
+       if ( $codepoint <     0x80 ) return chr( $codepoint );
+       if ( $codepoint <    0x800 ) return chr( $codepoint >>  6 & 0x3f | 0xc0 ) .
+                                     chr( $codepoint       & 0x3f | 0x80 );
+    if ( $codepoint <  0x10000 ) return chr( $codepoint >> 12 & 0x0f | 0xe0 ) .
+                                     chr( $codepoint >>  6 & 0x3f | 0x80 ) .
+                                     chr( $codepoint       & 0x3f | 0x80 );
+       if ( $codepoint < 0x100000 ) return chr( $codepoint >> 18 & 0x07 | 0xf0 ) . # Double-check this
+                                        chr( $codepoint >> 12 & 0x3f | 0x80 ) .
+                                     chr( $codepoint >>  6 & 0x3f | 0x80 ) .
+                                     chr( $codepoint       & 0x3f | 0x80 );
         # Doesn't yet handle outside the BMP
         return "&#$codepoint;";
  }
  
-function wfMungeToUtf8($string) {
+function wfMungeToUtf8( $string ) {
         $string = preg_replace ( '/&#([0-9]+);/e', 'wfUtf8Sequence($1)', $string );
         $string = preg_replace ( '/&#x([0-9a-f]+);/ie', 'wfUtf8Sequence(0x$1)', $string );
         # Should also do named entities here
@@ -292,7 +295,7 @@ function wfMungeToUtf8($string) {
  }
  
  function timestamp2ISO8601( $ts ) {
-       #2003-08-05T18:30:02Z
+       # 2003-08-05T18:30:02Z
         return gmdate( 'Y-m-d', $ts ) . 'T' . gmdate( 'H:i:s', $ts ) . 'Z';
  }
  
@@ -303,7 +306,7 @@ function xmlsafe( $string ) {
          * XML output invalid, so be sure to strip them out.
          */
         $string = UtfNormal::cleanUp( $string );
-       
+
         $string = htmlspecialchars( $string );
         return $string;
  }
@@ -315,7 +318,7 @@ function xmlCommentSafe( $text ) {
  
  function array2object( $arr ) {
         $o = (object)0;
-       foreach( $arr as $x => $y ) {
+       foreach ( $arr as $x => $y ) {
                 $o->$x = $y;
         }
         return $o;
@@ -331,7 +334,7 @@ function mungeFormat( $text ) {
         $staged = preg_replace_callback(
                 '/(<nowiki>.*?<\\/nowiki>|(?:http|https|ftp):\\S+|\[\[[^]\\n]+]])/s',
                 'nowikiPlaceholder', $text );
-       
+
         # This is probably not  100% correct, I'm just
         # glancing at the UseModWiki code.
         $upper   = "[A-Z]";
@@ -340,10 +343,10 @@ function mungeFormat( $text ) {
         $camel   = "(?:$upper+$lower+$upper+$any*)";
         $subpage = "(?:\\/$any+)";
         $substart = "(?:\\/$upper$any*)";
-       
+
         $munged = preg_replace( "/(?!\\[\\[)($camel$subpage*|$substart$subpage*)\\b(?!\\]\\]|>)/",
                 '[[$1]]', $staged );
-       
+
         $final = preg_replace( '/' . preg_quote( placeholder() ) . '/es',
                 'array_shift( $nowiki )', $munged );
         return $final;
@@ -360,4 +363,4 @@ function nowikiPlaceholder( $matches ) {
         return placeholder();
  }
  
-?>
+