var $history = WikiExporter::FULL;
var $fetchCount = 0;
var $prefetchCount = 0;
- var $lastTime = 0;
- var $pageCountLast = 0;
- var $revCountLast = 0;
var $prefetchCountLast = 0;
var $fetchCountLast = 0;
var $spawnRead = false;
var $spawnErr = false;
- var $ID = 0;
-
var $xmlwriterobj = false;
- # when we spend more than maxTimeAllowed seconds on this run, we continue
- # processing until we write out the next complete page, then save output file(s),
- # rename it/them and open new one(s)
+ // when we spend more than maxTimeAllowed seconds on this run, we continue
+ // processing until we write out the next complete page, then save output file(s),
+ // rename it/them and open new one(s)
var $maxTimeAllowed = 0; // 0 = no limit
var $timeExceeded = false;
var $firstPageWritten = false;
var $checkpointJustWritten = false;
var $checkpointFiles = array();
+ /**
+ * @var DatabaseBase
+ */
+ protected $db;
+
function initProgress( $history ) {
parent::initProgress();
- $this->ID = getmypid();
- $this->lastTime = $this->startTime;
$this->timeOfCheckpoint = $this->startTime;
}
function dump( $history, $text = WikiExporter::TEXT ) {
- # This shouldn't happen if on console... ;)
+ // This shouldn't happen if on console... ;)
header( 'Content-type: text/html; charset=UTF-8' );
- # Notice messages will foul up your XML output even if they're
- # relatively harmless.
+ // Notice messages will foul up your XML output even if they're
+ // relatively harmless.
if ( ini_get( 'display_errors' ) )
ini_set( 'display_errors', 'stderr' );
$this->egress = new ExportProgressFilter( $this->sink, $this );
- # it would be nice to do it in the constructor, oh well. need egress set
+ // it would be nice to do it in the constructor, oh well. need egress set
$this->finalOptionCheck();
- # we only want this so we know how to close a stream :-P
+ // we only want this so we know how to close a stream :-P
$this->xmlwriterobj = new XmlDumpWriter();
$input = fopen( $this->input, "rt" );
$result = $this->readDump( $input );
if ( WikiError::isError( $result ) ) {
- wfDie( $result->getMessage() );
+ throw new MWException( $result->getMessage() );
}
if ( $this->spawnProc ) {
*/
function showReport() {
if ( !$this->prefetch ) {
- return parent::showReport();
+ parent::showReport();
+ return;
}
if ( $this->reporting ) {
$now = wfTimestamp( TS_DB );
+ $nowts = wfTime();
$deltaAll = wfTime() - $this->startTime;
$deltaPart = wfTime() - $this->lastTime;
$this->pageCountPart = $this->pageCount - $this->pageCountLast;
$etats = wfTimestamp( TS_DB, intval( $eta ) );
if ( $this->fetchCount ) {
$fetchRate = 100.0 * $this->prefetchCount / $this->fetchCount;
- }
- else {
+ } else {
$fetchRate = '-';
}
$pageRate = $this->pageCount / $deltaAll;
if ( $deltaPart ) {
if ( $this->fetchCountLast ) {
$fetchRatePart = 100.0 * $this->prefetchCountLast / $this->fetchCountLast;
- }
- else {
+ } else {
$fetchRatePart = '-';
}
$pageRatePart = $this->pageCountPart / $deltaPart;
$pageRatePart = '-';
$revRatePart = '-';
}
- $this->progress( sprintf( "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% prefetched (all|curr), ETA %s [max %d]",-
+ $this->progress( sprintf( "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% prefetched (all|curr), ETA %s [max %d]",
$now, wfWikiID(), $this->ID, $this->pageCount, $pageRate, $pageRatePart, $this->revCount, $revRate, $revRatePart, $fetchRate, $fetchRatePart, $etats, $this->maxCount ) );
- $this->lastTime = $now;
- $this->partCountLast = $this->partCount;
+ $this->lastTime = $nowts;
$this->revCountLast = $this->revCount;
$this->prefetchCountLast = $this->prefetchCount;
$this->fetchCountLast = $this->fetchCount;
function checkIfTimeExceeded() {
if ( $this->maxTimeAllowed && ( $this->lastTime - $this->timeOfCheckpoint > $this->maxTimeAllowed ) ) {
- return True;
+ return true;
}
- return False;
+ return false;
}
function finalOptionCheck() {
- if (($this->checkpointFiles && ! $this->maxTimeAllowed) ||
- ($this->maxTimeAllowed && !$this->checkpointFiles)) {
- wfDie("Options checkpointfile and maxtime must be specified together.\n");
+ if ( ( $this->checkpointFiles && ! $this->maxTimeAllowed ) ||
+ ( $this->maxTimeAllowed && !$this->checkpointFiles ) ) {
+ throw new MWException("Options checkpointfile and maxtime must be specified together.\n");
}
foreach ($this->checkpointFiles as $checkpointFile) {
- $count = substr_count ($checkpointFile,"%s");
- if (substr_count ($checkpointFile,"%s") != 2) {
- wfDie("Option checkpointfile must contain two '%s' for substitution of first and last pageids, count is $count instead, fil
-e is $checkpointFile.\n");
+ $count = substr_count ( $checkpointFile,"%s" );
+ if ( $count != 2 ) {
+ throw new MWException("Option checkpointfile must contain two '%s' for substitution of first and last pageids, count is $count instead, file is $checkpointFile.\n");
}
}
- $filenameList = $this->egress->getFilename();
- if (! is_array($filenameList)) {
- $filenameList = array( $filenameList );
- }
- if (count($filenameList) != count($this->checkpointFiles)) {
- wfDie("One checkpointfile must be specified for each output option, if maxtime is used.\n");
+ if ( $this->checkpointFiles ) {
+ $filenameList = (array)$this->egress->getFilenames();
+ if ( count( $filenameList ) != count( $this->checkpointFiles ) ) {
+ throw new MWException("One checkpointfile must be specified for each output option, if maxtime is used.\n");
+ }
}
}
$offset += strlen( $chunk );
} while ( $chunk !== false && !feof( $input ) );
if ($this->maxTimeAllowed) {
- $filenameList = $this->egress->getFilename();
- # we wrote some stuff after last checkpoint that needs renamed */
- if (! is_array($filenameList)) {
- $filenameList = array( $filenameList );
- }
+ $filenameList = (array)$this->egress->getFilenames();
+ // we wrote some stuff after last checkpoint that needs renamed
if (file_exists($filenameList[0])) {
$newFilenames = array();
- $firstPageID = str_pad($this->firstPageWritten,9,"0",STR_PAD_LEFT);
- $lastPageID = str_pad($this->lastPageWritten,9,"0",STR_PAD_LEFT);
- for ($i =0; $i < count($filenameList); $i++) {
- $checkpointNameFilledIn = sprintf($this->checkpointFiles[$i], $firstPageID, $lastPageID);
+ # we might have just written the header and footer and had no
+ # pages or revisions written... perhaps they were all deleted
+ # there's no pageID 0 so we use that. the caller is responsible
+ # for deciding what to do with a file containing only the
+ # siteinfo information and the mw tags.
+ if (! $this->firstPageWritten) {
+ $firstPageID = str_pad(0,9,"0",STR_PAD_LEFT);
+ $lastPageID = str_pad(0,9,"0",STR_PAD_LEFT);
+ }
+ else {
+ $firstPageID = str_pad($this->firstPageWritten,9,"0",STR_PAD_LEFT);
+ $lastPageID = str_pad($this->lastPageWritten,9,"0",STR_PAD_LEFT);
+ }
+ for ( $i = 0; $i < count( $filenameList ); $i++ ) {
+ $checkpointNameFilledIn = sprintf( $this->checkpointFiles[$i], $firstPageID, $lastPageID );
$fileinfo = pathinfo($filenameList[$i]);
- $newFilenames[] = $fileinfo{'dirname'} . '/' . $checkpointNameFilledIn;
+ $newFilenames[] = $fileinfo['dirname'] . '/' . $checkpointNameFilledIn;
}
- $this->egress->rename( $newFilenames );
+ $this->egress->closeAndRename( $newFilenames );
}
}
xml_parser_free( $parser );
}
private function doGetText( $id ) {
-
$id = intval( $id );
$this->failures = 0;
$ex = new MWException( "Graceful storage failure" );
$this->closeSpawn();
$this->openSpawn();
}
- $text = $this->getTextSpawned( $id );
+ $text = $this->getTextSpawned( $id );
} else {
- $text = $this->getTextDbSafe( $id );
+ $text = $this->getTextDbSafe( $id );
}
if ( $text === false ) {
$this->failures++;
$this->failedTextRetrievals++;
if ($this->failedTextRetrievals > $this->maxConsecutiveFailedTextRetrievals) {
throw $ex;
- }
- else {
+ } else {
// would be nice to return something better to the caller someday,
// log what we know about the failure and about the revision
- return("");
+ return "";
}
} else {
$this->progress( "Error $this->failures " .
}
} else {
$this->failedTextRetrievals= 0;
- return( $text );
+ return $text;
}
}
-
+ return '';
}
/**
* Fetch a text revision from the database, retrying in case of failure.
* This may survive some transitory errors by reconnecting, but
* may not survive a long-term server outage.
+ *
+ * FIXME: WTF? Why is it using a loop and then returning unconditionally?
*/
private function getTextDbSafe( $id ) {
while ( true ) {
/**
* May throw a database error if, say, the server dies during query.
+ * @param $id
+ * @return bool|string
*/
private function getTextDb( $id ) {
global $wgContLang;
function openSpawn() {
global $IP;
- $cmd = implode( " ",
- array_map( 'wfEscapeShellArg',
- array(
- $this->php,
- "$IP/maintenance/fetchText.php",
- '--wiki', wfWikiID() ) ) );
+ if ( file_exists( "$IP/../multiversion/MWScript.php" ) ) {
+ $cmd = implode( " ",
+ array_map( 'wfEscapeShellArg',
+ array(
+ $this->php,
+ "$IP/../multiversion/MWScript.php",
+ "fetchText.php",
+ '--wiki', wfWikiID() ) ) );
+ }
+ else {
+ $cmd = implode( " ",
+ array_map( 'wfEscapeShellArg',
+ array(
+ $this->php,
+ "$IP/maintenance/fetchText.php",
+ '--wiki', wfWikiID() ) ) );
+ }
$spec = array(
0 => array( "pipe", "r" ),
1 => array( "pipe", "w" ),
$this->lastPageWritten = trim($this->thisPage);
if ($this->timeExceeded) {
$this->egress->writeClosePage( $this->buffer );
- # nasty hack, we can't just write the chardata after the
- # page tag, it will include leading blanks from the next line
- $this->egress->sink->write("\n");
-
+ // nasty hack, we can't just write the chardata after the
+ // page tag, it will include leading blanks from the next line
+ $this->egress->sink->write("\n");
+
$this->buffer = $this->xmlwriterobj->closeStream();
$this->egress->writeCloseStream( $this->buffer );
$this->buffer = "";
$this->thisPage = "";
- /* this could be more than one file if we had more than one output arg */
- $checkpointFilenames = array();
- $filenameList = $this->egress->getFilename();
+ // this could be more than one file if we had more than one output arg
- if (! is_array($filenameList)) {
- $filenameList = array( $filenameList );
- }
+ $filenameList = (array)$this->egress->getFilenames();
$newFilenames = array();
$firstPageID = str_pad($this->firstPageWritten,9,"0",STR_PAD_LEFT);
$lastPageID = str_pad($this->lastPageWritten,9,"0",STR_PAD_LEFT);
- for ($i =0; $i < count($filenameList); $i++) {
- $checkpointNameFilledIn = sprintf($this->checkpointFiles[$i], $firstPageID, $lastPageID);
+ for ( $i = 0; $i < count( $filenameList ); $i++ ) {
+ $checkpointNameFilledIn = sprintf( $this->checkpointFiles[$i], $firstPageID, $lastPageID );
$fileinfo = pathinfo($filenameList[$i]);
- $newFilenames[] = $fileinfo{'dirname'} . '/' . $checkpointNameFilledIn;
+ $newFilenames[] = $fileinfo['dirname'] . '/' . $checkpointNameFilledIn;
}
$this->egress->closeRenameAndReopen( $newFilenames );
$this->buffer = $this->xmlwriterobj->openStream();
$this->thisPage .= $data;
}
}
- # have to skip the newline left over from closepagetag line of
- # end of checkpoint files. nasty hack!!
+ // have to skip the newline left over from closepagetag line of
+ // end of checkpoint files. nasty hack!!
if ($this->checkpointJustWritten) {
if ($data[0] == "\n") {
$data = substr($data,1);
pressure on the database.
(Requires the XMLReader extension)
--maxtime=<minutes> Write out checkpoint file after this many minutes (writing
- out complete page, closing xml file properly, and opening new one
+ out complete page, closing xml file properly, and opening new one
with header). This option requires the checkpointfile option.
--checkpointfile=<filenamepattern> Use this string for checkpoint filenames,
- substituting first pageid written for the first %s (required) and the
+ substituting first pageid written for the first %s (required) and the
last pageid written for the second %s if it exists.
--quiet Don't dump status reports to stderr.
--report=n Report position and speed after every n pages processed.