* Reordered wiki table handling and __TOC__ extraction in the parser to better handle...

[lhc/web/wiklou.git] / maintenance / importUseModWiki.php
diff --git a/maintenance/importUseModWiki.php b/maintenance/importUseModWiki.php

index 7011bfb..15f5e44 100644 (file)
--- a/maintenance/importUseModWiki.php
+++ b/maintenance/importUseModWiki.php
@@ -27,7 +27,8 @@
   */
  
  if( php_sapi_name() != 'cli' ) {
-       die( "Please customize the settings and run me from the command line." );
+       echo "Please customize the settings and run me from the command line.";
+       die( -1 );
  }
  
  /** Set these correctly! */
@@ -56,7 +57,7 @@ importPages();
  function importPages()
  {
         global $wgRootDirectory;
-       
+
         $gt = '>';
         echo <<<END
  <?xml version="1.0" encoding="UTF-8" ?$gt
@@ -64,7 +65,8 @@ function importPages()
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.1/
                                 http://www.mediawiki.org/xml/export-0.1.xsd"
-           version="0.1">
+           version="0.1"
+           xml:lang="en">
  <!-- generated by importUseModWiki.php -->
  
  END;
@@ -85,7 +87,7 @@ END;
  
  function importPageDirectory( $dir, $prefix = "" )
  {
-       echo "\n<!-- Checking page directory $dir -->\n";
+       echo "\n<!-- Checking page directory " . xmlCommentSafe( $dir ) . " -->\n";
         $mydir = opendir( $dir );
         while( $entry = readdir( $mydir ) ) {
                 if( preg_match( '/^(.+)\.db$/', $entry, $m ) ) {
@@ -120,16 +122,17 @@ function useModFilename( $title ) {
  function fetchPage( $title )
  {
         global $FS,$FS1,$FS2,$FS3, $wgRootDirectory;
-       
+
         $fname = $wgRootDirectory . "/page/" . useModFilename( $title ) . ".db";
         if( !file_exists( $fname ) ) {
-               die( "Couldn't open file '$fname' for page '$title'.\n" );
+               echo "Couldn't open file '$fname' for page '$title'.\n";
+               die( -1 );
         }
-       
+
         $page = splitHash( $FS1, file_get_contents( $fname ) );
         $section = splitHash( $FS2, $page["text_default"] );
         $text = splitHash( $FS3, $section["data"] );
-       
+
         return array2object( array( "text" => $text["text"] , "summary" => $text["summary"] ,
                 "minor" => $text["minor"] , "ts" => $section["ts"] ,
                 "username" => $section["username"] , "host" => $section["host"] ) );
@@ -138,13 +141,13 @@ function fetchPage( $title )
  function fetchKeptPages( $title )
  {
         global $FS,$FS1,$FS2,$FS3, $wgRootDirectory, $wgTimezoneCorrection;
-       
+
         $fname = $wgRootDirectory . "/keep/" . useModFilename( $title ) . ".kp";
         if( !file_exists( $fname ) ) return array();
-       
+
         $keptlist = explode( $FS1, file_get_contents( $fname ) );
         array_shift( $keptlist ); # Drop the junk at beginning of file
-       
+
         $revisions = array();
         foreach( $keptlist as $rev ) {
                 $section = splitHash( $FS2, $rev );
@@ -154,7 +157,7 @@ function fetchKeptPages( $title )
                                 "minor" => $text["minor"] , "ts" => $section["ts"] ,
                                 "username" => $section["username"] , "host" => $section["host"] ) ) );
                 } else {
-                       echo "-- skipped a bad old revision\n";
+                       echo "<!-- skipped a bad old revision -->\n";
                 }
         }
         return $revisions;
@@ -196,12 +199,12 @@ function checkUserCache( $name, $host )
  function importPage( $title )
  {
         global $usercache;
-       
+
         echo "\n<!-- Importing page " . xmlCommentSafe( $title ) . " -->\n";
         $page = fetchPage( $title );
  
         $newtitle = xmlsafe( str_replace( '_', ' ', recodeText( $title ) ) );
-       
+
         $munged = mungeFormat( $page->text );
         if( $munged != $page->text ) {
                 /**
@@ -234,7 +237,7 @@ END;
         if(count( $revisions ) == 0 ) {
                 return $sql;
         }
-       
+
         foreach( $revisions as $rev ) {
                 $text      = xmlsafe( recodeText( $rev->text ) );
                 $minor     = ($rev->minor ? '<minor/>' : '');
@@ -242,13 +245,13 @@ END;
                 $username  = xmlsafe( recodeText( $username ) );
                 $timestamp = xmlsafe( timestamp2ISO8601( $rev->ts ) );
                 $comment   = xmlsafe( recodeText( $rev->summary ) );
-               
+
                 $xml .= <<<END
                 <revision>
                         <timestamp>$timestamp</timestamp>
                         <contributor><username>$username</username></contributor>
-                       <comment>$comment</comment>
                         $minor
+                       <comment>$comment</comment>
                         <text>$text</text>
                 </revision>
  
@@ -302,13 +305,13 @@ function xmlsafe( $string ) {
          * XML output invalid, so be sure to strip them out.
          */
         $string = UtfNormal::cleanUp( $string );
-       
+
         $string = htmlspecialchars( $string );
         return $string;
  }
  
  function xmlCommentSafe( $text ) {
-       return str_replace( '--', '\\-\\-', xmlsafe( $text ) );
+       return str_replace( '--', '\\-\\-', xmlsafe( recodeText( $text ) ) );
  }
  
  
@@ -330,7 +333,7 @@ function mungeFormat( $text ) {
         $staged = preg_replace_callback(
                 '/(<nowiki>.*?<\\/nowiki>|(?:http|https|ftp):\\S+|\[\[[^]\\n]+]])/s',
                 'nowikiPlaceholder', $text );
-       
+
         # This is probably not  100% correct, I'm just
         # glancing at the UseModWiki code.
         $upper   = "[A-Z]";
@@ -339,10 +342,10 @@ function mungeFormat( $text ) {
         $camel   = "(?:$upper+$lower+$upper+$any*)";
         $subpage = "(?:\\/$any+)";
         $substart = "(?:\\/$upper$any*)";
-       
+
         $munged = preg_replace( "/(?!\\[\\[)($camel$subpage*|$substart$subpage*)\\b(?!\\]\\]|>)/",
                 '[[$1]]', $staged );
-       
+
         $final = preg_replace( '/' . preg_quote( placeholder() ) . '/es',
                 'array_shift( $nowiki )', $munged );
         return $final;