Removed forward comapt code with 5.1.0+ and modified code to use the new version
[lhc/web/wiklou.git] / maintenance / backupPrefetch.inc
1 <?php
2 /**
3 * Helper class for the --prefetch option of dumpTextPass.php
4 *
5 * @file
6 * @ingrouo Maintenance
7 */
8
9 /**
10 * Readahead helper for making large MediaWiki data dumps;
11 * reads in a previous XML dump to sequentially prefetch text
12 * records already normalized and decompressed.
13 *
14 * This can save load on the external database servers, hopefully.
15 *
16 * Assumes that dumps will be recorded in the canonical order:
17 * - ascending by page_id
18 * - ascending by rev_id within each page
19 * - text contents are immutable and should not change once
20 * recorded, so the previous dump is a reliable source
21 *
22 * Requires the XMLReader PECL extension.
23 * @ingroup Maintenance
24 */
25 class BaseDump {
26 var $reader = null;
27 var $atEnd = false;
28 var $atPageEnd = false;
29 var $lastPage = 0;
30 var $lastRev = 0;
31
32 function BaseDump( $infile ) {
33 $this->reader = new XMLReader();
34 $this->reader->open( $infile );
35 }
36
37 /**
38 * Attempts to fetch the text of a particular page revision
39 * from the dump stream. May return null if the page is
40 * unavailable.
41 *
42 * @param $page Integer: ID number of page to read
43 * @param $rev Integer: ID number of revision to read
44 * @return string or null
45 */
46 function prefetch( $page, $rev ) {
47 $page = intval( $page );
48 $rev = intval( $rev );
49 while ( $this->lastPage < $page && !$this->atEnd ) {
50 $this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" );
51 $this->nextPage();
52 }
53 if ( $this->lastPage > $page || $this->atEnd ) {
54 $this->debug( "BaseDump::prefetch already past page $page looking for rev $rev [$this->lastPage, $this->lastRev]" );
55 return null;
56 }
57 while ( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
58 $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, looking for $page, $rev" );
59 $this->nextRev();
60 }
61 if ( $this->lastRev == $rev && !$this->atEnd ) {
62 $this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" );
63 return $this->nextText();
64 } else {
65 $this->debug( "BaseDump::prefetch already past rev $rev on page $page [$this->lastPage, $this->lastRev]" );
66 return null;
67 }
68 }
69
70 function debug( $str ) {
71 wfDebug( $str . "\n" );
72 // global $dumper;
73 // $dumper->progress( $str );
74 }
75
76 /**
77 * @access private
78 */
79 function nextPage() {
80 if ( $this->skipTo( 'page', 'mediawiki' ) ) {
81 if ( $this->skipTo( 'id' ) ) {
82 $this->lastPage = intval( $this->nodeContents() );
83 $this->lastRev = 0;
84 $this->atPageEnd = false;
85 }
86 } else {
87 $this->atEnd = true;
88 }
89 }
90
91 /**
92 * @access private
93 */
94 function nextRev() {
95 if ( $this->skipTo( 'revision' ) ) {
96 if ( $this->skipTo( 'id' ) ) {
97 $this->lastRev = intval( $this->nodeContents() );
98 }
99 } else {
100 $this->atPageEnd = true;
101 }
102 }
103
104 /**
105 * @access private
106 */
107 function nextText() {
108 $this->skipTo( 'text' );
109 return strval( $this->nodeContents() );
110 }
111
112 /**
113 * @access private
114 */
115 function skipTo( $name, $parent = 'page' ) {
116 if ( $this->atEnd ) {
117 return false;
118 }
119 while ( $this->reader->read() ) {
120 if ( $this->reader->nodeType == XMLReader::ELEMENT &&
121 $this->reader->name == $name ) {
122 return true;
123 }
124 if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
125 $this->reader->name == $parent ) {
126 $this->debug( "BaseDump::skipTo found </$parent> searching for <$name>" );
127 return false;
128 }
129 }
130 return $this->close();
131 }
132
133 /**
134 * Shouldn't something like this be built-in to XMLReader?
135 * Fetches text contents of the current element, assuming
136 * no sub-elements or such scary things.
137 *
138 * @return String
139 * @access private
140 */
141 function nodeContents() {
142 if ( $this->atEnd ) {
143 return null;
144 }
145 if ( $this->reader->isEmptyElement ) {
146 return "";
147 }
148 $buffer = "";
149 while ( $this->reader->read() ) {
150 switch( $this->reader->nodeType ) {
151 case XMLReader::TEXT:
152 // case XMLReader::WHITESPACE:
153 case XMLReader::SIGNIFICANT_WHITESPACE:
154 $buffer .= $this->reader->value;
155 break;
156 case XMLReader::END_ELEMENT:
157 return $buffer;
158 }
159 }
160 return $this->close();
161 }
162
163 /**
164 * @access private
165 */
166 function close() {
167 $this->reader->close();
168 $this->atEnd = true;
169 return null;
170 }
171 }