Don't look for pipes in the root node.
[lhc/web/wiklou.git] / includes / BacklinkCache.php
1 <?php
2 /**
3 * Class for fetching backlink lists, approximate backlink counts and partitions.
4 * Instances of this class should typically be fetched with $title->getBacklinkCache().
5 *
6 * Ideally you should only get your backlinks from here when you think there is some
7 * advantage in caching them. Otherwise it's just a waste of memory.
8 */
9 class BacklinkCache {
10 var $partitionCache = array();
11 var $fullResultCache = array();
12 var $title;
13 var $db;
14
15 const CACHE_EXPIRY = 3600;
16
17 /**
18 * Create a new BacklinkCache
19 */
20 function __construct( $title ) {
21 $this->title = $title;
22 }
23
24 /**
25 * Serialization handler, diasallows to serialize the database to prevent
26 * failures after this class is deserialized from cache with dead DB connection.
27 */
28 function __sleep() {
29 return array( 'partitionCache', 'fullResultCache', 'title' );
30 }
31
32 /**
33 * Clear locally stored data
34 */
35 function clear() {
36 $this->partitionCache = array();
37 $this->fullResultCache = array();
38 unset( $this->db );
39 }
40
41 /**
42 * Set the Database object to use
43 */
44 public function setDB( $db ) {
45 $this->db = $db;
46 }
47
48 protected function getDB() {
49 if ( !isset( $this->db ) ) {
50 $this->db = wfGetDB( DB_SLAVE );
51 }
52
53 return $this->db;
54 }
55
56 /**
57 * Get the backlinks for a given table. Cached in process memory only.
58 * @param $table String
59 * @param $startId Integer or false
60 * @param $endId Integer or false
61 * @return TitleArray
62 */
63 public function getLinks( $table, $startId = false, $endId = false ) {
64 wfProfileIn( __METHOD__ );
65
66 $fromField = $this->getPrefix( $table ) . '_from';
67
68 if ( $startId || $endId ) {
69 // Partial range, not cached
70 wfDebug( __METHOD__ . ": from DB (uncacheable range)\n" );
71 $conds = $this->getConditions( $table );
72
73 // Use the from field in the condition rather than the joined page_id,
74 // because databases are stupid and don't necessarily propagate indexes.
75 if ( $startId ) {
76 $conds[] = "$fromField >= " . intval( $startId );
77 }
78
79 if ( $endId ) {
80 $conds[] = "$fromField <= " . intval( $endId );
81 }
82
83 $res = $this->getDB()->select(
84 array( $table, 'page' ),
85 array( 'page_namespace', 'page_title', 'page_id' ),
86 $conds,
87 __METHOD__,
88 array(
89 'STRAIGHT_JOIN',
90 'ORDER BY' => $fromField
91 ) );
92 $ta = TitleArray::newFromResult( $res );
93
94 wfProfileOut( __METHOD__ );
95 return $ta;
96 }
97
98 if ( !isset( $this->fullResultCache[$table] ) ) {
99 wfDebug( __METHOD__ . ": from DB\n" );
100 $res = $this->getDB()->select(
101 array( $table, 'page' ),
102 array( 'page_namespace', 'page_title', 'page_id' ),
103 $this->getConditions( $table ),
104 __METHOD__,
105 array(
106 'STRAIGHT_JOIN',
107 'ORDER BY' => $fromField,
108 ) );
109 $this->fullResultCache[$table] = $res;
110 }
111
112 $ta = TitleArray::newFromResult( $this->fullResultCache[$table] );
113
114 wfProfileOut( __METHOD__ );
115 return $ta;
116 }
117
118 /**
119 * Get the field name prefix for a given table
120 */
121 protected function getPrefix( $table ) {
122 static $prefixes = array(
123 'pagelinks' => 'pl',
124 'imagelinks' => 'il',
125 'categorylinks' => 'cl',
126 'templatelinks' => 'tl',
127 'redirect' => 'rd',
128 );
129
130 if ( isset( $prefixes[$table] ) ) {
131 return $prefixes[$table];
132 } else {
133 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
134 }
135 }
136
137 /**
138 * Get the SQL condition array for selecting backlinks, with a join on the page table
139 */
140 protected function getConditions( $table ) {
141 $prefix = $this->getPrefix( $table );
142
143 switch ( $table ) {
144 case 'pagelinks':
145 case 'templatelinks':
146 case 'redirect':
147 $conds = array(
148 "{$prefix}_namespace" => $this->title->getNamespace(),
149 "{$prefix}_title" => $this->title->getDBkey(),
150 "page_id={$prefix}_from"
151 );
152 break;
153 case 'imagelinks':
154 $conds = array(
155 'il_to' => $this->title->getDBkey(),
156 'page_id=il_from'
157 );
158 break;
159 case 'categorylinks':
160 $conds = array(
161 'cl_to' => $this->title->getDBkey(),
162 'page_id=cl_from',
163 );
164 break;
165 default:
166 throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
167 }
168
169 return $conds;
170 }
171
172 /**
173 * Get the approximate number of backlinks
174 */
175 public function getNumLinks( $table ) {
176 if ( isset( $this->fullResultCache[$table] ) ) {
177 return $this->fullResultCache[$table]->numRows();
178 }
179
180 if ( isset( $this->partitionCache[$table] ) ) {
181 $entry = reset( $this->partitionCache[$table] );
182 return $entry['numRows'];
183 }
184
185 $titleArray = $this->getLinks( $table );
186
187 return $titleArray->count();
188 }
189
190 /**
191 * Partition the backlinks into batches.
192 * Returns an array giving the start and end of each range. The first batch has
193 * a start of false, and the last batch has an end of false.
194 *
195 * @param $table String: the links table name
196 * @param $batchSize Integer
197 * @return Array
198 */
199 public function partition( $table, $batchSize ) {
200 // Try cache
201 if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
202 wfDebug( __METHOD__ . ": got from partition cache\n" );
203 return $this->partitionCache[$table][$batchSize]['batches'];
204 }
205
206 $this->partitionCache[$table][$batchSize] = false;
207 $cacheEntry =& $this->partitionCache[$table][$batchSize];
208
209 // Try full result cache
210 if ( isset( $this->fullResultCache[$table] ) ) {
211 $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
212 wfDebug( __METHOD__ . ": got from full result cache\n" );
213
214 return $cacheEntry['batches'];
215 }
216
217 // Try memcached
218 global $wgMemc;
219
220 $memcKey = wfMemcKey(
221 'backlinks',
222 md5( $this->title->getPrefixedDBkey() ),
223 $table,
224 $batchSize
225 );
226
227 $memcValue = $wgMemc->get( $memcKey );
228
229 if ( is_array( $memcValue ) ) {
230 $cacheEntry = $memcValue;
231 wfDebug( __METHOD__ . ": got from memcached $memcKey\n" );
232
233 return $cacheEntry['batches'];
234 }
235
236 // Fetch from database
237 $this->getLinks( $table );
238 $cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
239 // Save to memcached
240 $wgMemc->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
241
242 wfDebug( __METHOD__ . ": got from database\n" );
243 return $cacheEntry['batches'];
244 }
245
246 /**
247 * Partition a DB result with backlinks in it into batches
248 */
249 protected function partitionResult( $res, $batchSize ) {
250 $batches = array();
251 $numRows = $res->numRows();
252 $numBatches = ceil( $numRows / $batchSize );
253
254 for ( $i = 0; $i < $numBatches; $i++ ) {
255 if ( $i == 0 ) {
256 $start = false;
257 } else {
258 $rowNum = intval( $numRows * $i / $numBatches );
259 $res->seek( $rowNum );
260 $row = $res->fetchObject();
261 $start = $row->page_id;
262 }
263
264 if ( $i == $numBatches - 1 ) {
265 $end = false;
266 } else {
267 $rowNum = intval( $numRows * ( $i + 1 ) / $numBatches );
268 $res->seek( $rowNum );
269 $row = $res->fetchObject();
270 $end = $row->page_id - 1;
271 }
272
273 # Sanity check order
274 if ( $start && $end && $start > $end ) {
275 throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
276 }
277
278 $batches[] = array( $start, $end );
279 }
280
281 return array( 'numRows' => $numRows, 'batches' => $batches );
282 }
283 }