/** @var DumpOutput */
public $sink;
+ /** @var XmlDumpWriter */
+ private $writer;
+
/**
- * Returns the export schema version.
+ * Returns the default export schema version, as defined by $wgXmlDumpSchemaVersion.
* @return string
*/
public static function schemaVersion() {
- return "0.10";
+ global $wgXmlDumpSchemaVersion;
+ return $wgXmlDumpSchemaVersion;
}
/**
* - limit: maximum number of rows to return
* - dir: "asc" or "desc" timestamp order
* @param int $text One of WikiExporter::TEXT or WikiExporter::STUB
+ * @param null|array $limitNamespaces Comma-separated list of namespace numbers
+ * to limit results
*/
- function __construct( $db, $history = self::CURRENT, $text = self::TEXT ) {
+ function __construct( $db, $history = self::CURRENT, $text = self::TEXT,
+ $limitNamespaces = null
+ ) {
$this->db = $db;
$this->history = $history;
- $this->writer = new XmlDumpWriter();
+ $this->writer = new XmlDumpWriter( $text, self::schemaVersion() );
$this->sink = new DumpOutput();
$this->text = $text;
+ $this->limitNamespaces = $limitNamespaces;
+ }
+
+ /**
+ * @param string $schemaVersion which schema version the generated XML should comply to.
+ * One of the values from self::$supportedSchemas, using the XML_DUMP_SCHEMA_VERSION_XX
+ * constants.
+ */
+ public function setSchemaVersion( $schemaVersion ) {
+ $this->writer = new XmlDumpWriter( $this->text, $schemaVersion );
}
/**
foreach ( $res as $row ) {
$this->author_list .= "<contributor>" .
"<username>" .
- htmlentities( $row->rev_user_text ) .
+ htmlspecialchars( $row->rev_user_text ) .
"</username>" .
"<id>" .
- $row->rev_user .
+ ( (int)$row->rev_user ) .
"</id>" .
"</contributor>";
}
}
$revOpts = [ 'page' ];
- if ( $this->text != self::STUB ) {
- // TODO: remove the text and make XmlDumpWriter use a RevisionStore instead! (T198706)
- $revOpts[] = 'text';
- }
+
$revQuery = Revision::getQueryInfo( $revOpts );
// We want page primary rather than revision
];
unset( $join['page'] );
- // TODO: remove rev_text_id and make XmlDumpWriter use a RevisionStore instead! (T198706)
- $fields = array_merge( $revQuery['fields'], [ 'page_restrictions, rev_text_id' ] );
+ $fields = $revQuery['fields'];
+ $fields[] = 'page_restrictions';
+
+ if ( $this->text != self::STUB ) {
+ $fields['_load_content'] = '1';
+ }
$conds = [];
if ( $cond !== '' ) {
$opts[] = 'STRAIGHT_JOIN';
$opts['USE INDEX']['revision'] = 'rev_page_id';
unset( $join['revision'] );
- $join['page'] = [ 'INNER JOIN', 'rev_page=page_id' ];
+ $join['page'] = [ 'JOIN', 'rev_page=page_id' ];
}
} elseif ( $this->history & self::CURRENT ) {
# Latest revision dumps...
if ( $this->list_authors && $cond != '' ) { // List authors, if so desired
$this->do_list_authors( $cond );
}
- $join['revision'] = [ 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ];
+ $join['revision'] = [ 'JOIN', 'page_id=rev_page AND page_latest=rev_id' ];
} elseif ( $this->history & self::STABLE ) {
# "Stable" revision dumps...
# Default JOIN, to be overridden...
- $join['revision'] = [ 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ];
+ $join['revision'] = [ 'JOIN', 'page_id=rev_page AND page_latest=rev_id' ];
# One, and only one hook should set this, and return false
if ( Hooks::run( 'WikiExporter::dumpStableQuery', [ &$tables, &$opts, &$join ] ) ) {
throw new MWException( __METHOD__ . " given invalid history dump type." );
$queryConds[] = 'rev_page>' . intval( $revPage ) . ' OR (rev_page=' .
intval( $revPage ) . ' AND rev_id' . $op . intval( $revId ) . ')';
- # Do the query!
+ # Do the query and process any results, remembering max ids for the next iteration.
$result = $this->db->select(
$tables,
$fields,
$opts,
$join
);
- # Output dump results, get new max ids.
- $lastRow = $this->outputPageStream( $result, $lastRow );
-
- if ( !$result->numRows() || !$lastRow ) {
- $done = true;
- } else {
+ if ( $result->numRows() > 0 ) {
+ $lastRow = $this->outputPageStreamBatch( $result, $lastRow );
$rowCount += $result->numRows();
$revPage = $lastRow->rev_page;
$revId = $lastRow->rev_id;
+ } else {
+ $done = true;
+ }
+
+ // If we are finished, close off final page element (if any).
+ if ( $done && $lastRow ) {
+ $this->finishPageStreamOutput( $lastRow );
}
}
}
* The result set should be sorted/grouped by page to avoid duplicate
* page records in the output.
*
- * @param ResultWrapper $resultset
+ * @param ResultWrapper $results
* @param object $lastRow the last row output from the previous call (or null if none)
* @return object the last row processed
*/
- protected function outputPageStream( $resultset, $lastRow ) {
- if ( $resultset->numRows() ) {
- foreach ( $resultset as $row ) {
- if ( $lastRow === null ||
- $lastRow->page_namespace != $row->page_namespace ||
- $lastRow->page_title != $row->page_title ) {
- if ( $lastRow !== null ) {
- $output = '';
- if ( $this->dumpUploads ) {
- $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
- }
- $output .= $this->writer->closePage();
- $this->sink->writeClosePage( $output );
- }
- $output = $this->writer->openPage( $row );
- $this->sink->writeOpenPage( $row, $output );
- }
- $output = $this->writer->writeRevision( $row );
- $this->sink->writeRevision( $row, $output );
+ protected function outputPageStreamBatch( $results, $lastRow ) {
+ foreach ( $results as $row ) {
+ if ( $this->limitNamespaces &&
+ !in_array( $row->page_namespace, $this->limitNamespaces ) ) {
$lastRow = $row;
+ continue;
}
- } elseif ( $lastRow !== null ) {
- // Empty resultset means done with all batches Close off final page element (if any).
- $output = '';
- if ( $this->dumpUploads ) {
- $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
+ if ( $lastRow === null ||
+ $lastRow->page_namespace !== $row->page_namespace ||
+ $lastRow->page_title !== $row->page_title ) {
+ if ( $lastRow !== null ) {
+ $output = '';
+ if ( $this->dumpUploads ) {
+ $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
+ }
+ $output .= $this->writer->closePage();
+ $this->sink->writeClosePage( $output );
+ }
+ $output = $this->writer->openPage( $row );
+ $this->sink->writeOpenPage( $row, $output );
}
- $output .= $this->author_list;
- $output .= $this->writer->closePage();
- $this->sink->writeClosePage( $output );
- $lastRow = null;
+ $output = $this->writer->writeRevision( $row );
+ $this->sink->writeRevision( $row, $output );
+ $lastRow = $row;
}
return $lastRow;
}
+ /**
+ * Final page stream output, after all batches are complete
+ *
+ * @param object $lastRow the last row output from the last batch (or null if none)
+ */
+ protected function finishPageStreamOutput( $lastRow ) {
+ $output = '';
+ if ( $this->dumpUploads ) {
+ $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
+ }
+ $output .= $this->author_list;
+ $output .= $this->writer->closePage();
+ $this->sink->writeClosePage( $output );
+ }
+
/**
* @param ResultWrapper $resultset
* @return int the log_id value of the last item output, or null if none