API: Fix list=deletedrevs paging bug pointed out by Splarka on IRC
[lhc/web/wiklou.git] / includes / filerepo / FSRepo.php
1 <?php
2
3 /**
4 * A repository for files accessible via the local filesystem. Does not support
5 * database access or registration.
6 * @ingroup FileRepo
7 */
8 class FSRepo extends FileRepo {
9 var $directory, $deletedDir, $url, $deletedHashLevels;
10 var $fileFactory = array( 'UnregisteredLocalFile', 'newFromTitle' );
11 var $oldFileFactory = false;
12 var $pathDisclosureProtection = 'simple';
13
14 function __construct( $info ) {
15 parent::__construct( $info );
16
17 // Required settings
18 $this->directory = $info['directory'];
19 $this->url = $info['url'];
20
21 // Optional settings
22 $this->hashLevels = isset( $info['hashLevels'] ) ? $info['hashLevels'] : 2;
23 $this->deletedHashLevels = isset( $info['deletedHashLevels'] ) ?
24 $info['deletedHashLevels'] : $this->hashLevels;
25 $this->deletedDir = isset( $info['deletedDir'] ) ? $info['deletedDir'] : false;
26 }
27
28 /**
29 * Get the public root directory of the repository.
30 */
31 function getRootDirectory() {
32 return $this->directory;
33 }
34
35 /**
36 * Get the public root URL of the repository
37 */
38 function getRootUrl() {
39 return $this->url;
40 }
41
42 /**
43 * Returns true if the repository uses a multi-level directory structure
44 */
45 function isHashed() {
46 return (bool)$this->hashLevels;
47 }
48
49 /**
50 * Get the local directory corresponding to one of the three basic zones
51 */
52 function getZonePath( $zone ) {
53 switch ( $zone ) {
54 case 'public':
55 return $this->directory;
56 case 'temp':
57 return "{$this->directory}/temp";
58 case 'deleted':
59 return $this->deletedDir;
60 default:
61 return false;
62 }
63 }
64
65 /**
66 * Get the URL corresponding to one of the three basic zones
67 */
68 function getZoneUrl( $zone ) {
69 switch ( $zone ) {
70 case 'public':
71 return $this->url;
72 case 'temp':
73 return "{$this->url}/temp";
74 case 'deleted':
75 return false; // no public URL
76 default:
77 return false;
78 }
79 }
80
81 /**
82 * Get a URL referring to this repository, with the private mwrepo protocol.
83 * The suffix, if supplied, is considered to be unencoded, and will be
84 * URL-encoded before being returned.
85 */
86 function getVirtualUrl( $suffix = false ) {
87 $path = 'mwrepo://' . $this->name;
88 if ( $suffix !== false ) {
89 $path .= '/' . rawurlencode( $suffix );
90 }
91 return $path;
92 }
93
94 /**
95 * Get the local path corresponding to a virtual URL
96 */
97 function resolveVirtualUrl( $url ) {
98 if ( substr( $url, 0, 9 ) != 'mwrepo://' ) {
99 throw new MWException( __METHOD__.': unknown protoocl' );
100 }
101
102 $bits = explode( '/', substr( $url, 9 ), 3 );
103 if ( count( $bits ) != 3 ) {
104 throw new MWException( __METHOD__.": invalid mwrepo URL: $url" );
105 }
106 list( $repo, $zone, $rel ) = $bits;
107 if ( $repo !== $this->name ) {
108 throw new MWException( __METHOD__.": fetching from a foreign repo is not supported" );
109 }
110 $base = $this->getZonePath( $zone );
111 if ( !$base ) {
112 throw new MWException( __METHOD__.": invalid zone: $zone" );
113 }
114 return $base . '/' . rawurldecode( $rel );
115 }
116
117 /**
118 * Store a batch of files
119 *
120 * @param array $triplets (src,zone,dest) triplets as per store()
121 * @param integer $flags Bitwise combination of the following flags:
122 * self::DELETE_SOURCE Delete the source file after upload
123 * self::OVERWRITE Overwrite an existing destination file instead of failing
124 * self::OVERWRITE_SAME Overwrite the file if the destination exists and has the
125 * same contents as the source
126 */
127 function storeBatch( $triplets, $flags = 0 ) {
128 if ( !wfMkdirParents( $this->directory ) ) {
129 return $this->newFatal( 'upload_directory_missing', $this->directory );
130 }
131 if ( !is_writable( $this->directory ) ) {
132 return $this->newFatal( 'upload_directory_read_only', $this->directory );
133 }
134 $status = $this->newGood();
135 foreach ( $triplets as $i => $triplet ) {
136 list( $srcPath, $dstZone, $dstRel ) = $triplet;
137
138 $root = $this->getZonePath( $dstZone );
139 if ( !$root ) {
140 throw new MWException( "Invalid zone: $dstZone" );
141 }
142 if ( !$this->validateFilename( $dstRel ) ) {
143 throw new MWException( 'Validation error in $dstRel' );
144 }
145 $dstPath = "$root/$dstRel";
146 $dstDir = dirname( $dstPath );
147
148 if ( !is_dir( $dstDir ) ) {
149 if ( !wfMkdirParents( $dstDir ) ) {
150 return $this->newFatal( 'directorycreateerror', $dstDir );
151 }
152 if ( $dstZone == 'deleted' ) {
153 $this->initDeletedDir( $dstDir );
154 }
155 }
156
157 if ( self::isVirtualUrl( $srcPath ) ) {
158 $srcPath = $triplets[$i][0] = $this->resolveVirtualUrl( $srcPath );
159 }
160 if ( !is_file( $srcPath ) ) {
161 // Make a list of files that don't exist for return to the caller
162 $status->fatal( 'filenotfound', $srcPath );
163 continue;
164 }
165 if ( !( $flags & self::OVERWRITE ) && file_exists( $dstPath ) ) {
166 if ( $flags & self::OVERWRITE_SAME ) {
167 $hashSource = sha1_file( $srcPath );
168 $hashDest = sha1_file( $dstPath );
169 if ( $hashSource != $hashDest ) {
170 $status->fatal( 'fileexistserror', $dstPath );
171 }
172 } else {
173 $status->fatal( 'fileexistserror', $dstPath );
174 }
175 }
176 }
177
178 $deleteDest = wfIsWindows() && ( $flags & self::OVERWRITE );
179
180 // Abort now on failure
181 if ( !$status->ok ) {
182 return $status;
183 }
184
185 foreach ( $triplets as $triplet ) {
186 list( $srcPath, $dstZone, $dstRel ) = $triplet;
187 $root = $this->getZonePath( $dstZone );
188 $dstPath = "$root/$dstRel";
189 $good = true;
190
191 if ( $flags & self::DELETE_SOURCE ) {
192 if ( $deleteDest ) {
193 unlink( $dstPath );
194 }
195 if ( !rename( $srcPath, $dstPath ) ) {
196 $status->error( 'filerenameerror', $srcPath, $dstPath );
197 $good = false;
198 }
199 } else {
200 if ( !copy( $srcPath, $dstPath ) ) {
201 $status->error( 'filecopyerror', $srcPath, $dstPath );
202 $good = false;
203 }
204 }
205 if ( $good ) {
206 chmod( $dstPath, 0644 );
207 $status->successCount++;
208 } else {
209 $status->failCount++;
210 }
211 }
212 return $status;
213 }
214
215 /**
216 * Checks existence of specified array of files.
217 *
218 * @param array $files URLs of files to check
219 * @param integer $flags Bitwise combination of the following flags:
220 * self::FILES_ONLY Mark file as existing only if it is a file (not directory)
221 * @return Either array of files and existence flags, or false
222 */
223 function fileExistsBatch( $files, $flags = 0 ) {
224 if ( !file_exists( $this->directory ) || !is_readable( $this->directory ) ) {
225 return false;
226 }
227 $result = array();
228 foreach ( $files as $key => $file ) {
229 if ( self::isVirtualUrl( $file ) ) {
230 $file = $this->resolveVirtualUrl( $file );
231 }
232 if( $flags & self::FILES_ONLY ) {
233 $result[$key] = is_file( $file );
234 } else {
235 $result[$key] = file_exists( $file );
236 }
237 }
238
239 return $result;
240 }
241
242 /**
243 * Take all available measures to prevent web accessibility of new deleted
244 * directories, in case the user has not configured offline storage
245 */
246 protected function initDeletedDir( $dir ) {
247 // Add a .htaccess file to the root of the deleted zone
248 $root = $this->getZonePath( 'deleted' );
249 if ( !file_exists( "$root/.htaccess" ) ) {
250 file_put_contents( "$root/.htaccess", "Deny from all\n" );
251 }
252 // Seed new directories with a blank index.html, to prevent crawling
253 file_put_contents( "$dir/index.html", '' );
254 }
255
256 /**
257 * Pick a random name in the temp zone and store a file to it.
258 * @param string $originalName The base name of the file as specified
259 * by the user. The file extension will be maintained.
260 * @param string $srcPath The current location of the file.
261 * @return FileRepoStatus object with the URL in the value.
262 */
263 function storeTemp( $originalName, $srcPath ) {
264 $date = gmdate( "YmdHis" );
265 $hashPath = $this->getHashPath( $originalName );
266 $dstRel = "$hashPath$date!$originalName";
267 $dstUrlRel = $hashPath . $date . '!' . rawurlencode( $originalName );
268
269 $result = $this->store( $srcPath, 'temp', $dstRel );
270 $result->value = $this->getVirtualUrl( 'temp' ) . '/' . $dstUrlRel;
271 return $result;
272 }
273
274 /**
275 * Remove a temporary file or mark it for garbage collection
276 * @param string $virtualUrl The virtual URL returned by storeTemp
277 * @return boolean True on success, false on failure
278 */
279 function freeTemp( $virtualUrl ) {
280 $temp = "mwrepo://{$this->name}/temp";
281 if ( substr( $virtualUrl, 0, strlen( $temp ) ) != $temp ) {
282 wfDebug( __METHOD__.": Invalid virtual URL\n" );
283 return false;
284 }
285 $path = $this->resolveVirtualUrl( $virtualUrl );
286 wfSuppressWarnings();
287 $success = unlink( $path );
288 wfRestoreWarnings();
289 return $success;
290 }
291
292 /**
293 * Publish a batch of files
294 * @param array $triplets (source,dest,archive) triplets as per publish()
295 * @param integer $flags Bitfield, may be FileRepo::DELETE_SOURCE to indicate
296 * that the source files should be deleted if possible
297 */
298 function publishBatch( $triplets, $flags = 0 ) {
299 // Perform initial checks
300 if ( !wfMkdirParents( $this->directory ) ) {
301 return $this->newFatal( 'upload_directory_missing', $this->directory );
302 }
303 if ( !is_writable( $this->directory ) ) {
304 return $this->newFatal( 'upload_directory_read_only', $this->directory );
305 }
306 $status = $this->newGood( array() );
307 foreach ( $triplets as $i => $triplet ) {
308 list( $srcPath, $dstRel, $archiveRel ) = $triplet;
309
310 if ( substr( $srcPath, 0, 9 ) == 'mwrepo://' ) {
311 $triplets[$i][0] = $srcPath = $this->resolveVirtualUrl( $srcPath );
312 }
313 if ( !$this->validateFilename( $dstRel ) ) {
314 throw new MWException( 'Validation error in $dstRel' );
315 }
316 if ( !$this->validateFilename( $archiveRel ) ) {
317 throw new MWException( 'Validation error in $archiveRel' );
318 }
319 $dstPath = "{$this->directory}/$dstRel";
320 $archivePath = "{$this->directory}/$archiveRel";
321
322 $dstDir = dirname( $dstPath );
323 $archiveDir = dirname( $archivePath );
324 // Abort immediately on directory creation errors since they're likely to be repetitive
325 if ( !is_dir( $dstDir ) && !wfMkdirParents( $dstDir ) ) {
326 return $this->newFatal( 'directorycreateerror', $dstDir );
327 }
328 if ( !is_dir( $archiveDir ) && !wfMkdirParents( $archiveDir ) ) {
329 return $this->newFatal( 'directorycreateerror', $archiveDir );
330 }
331 if ( !is_file( $srcPath ) ) {
332 // Make a list of files that don't exist for return to the caller
333 $status->fatal( 'filenotfound', $srcPath );
334 }
335 }
336
337 if ( !$status->ok ) {
338 return $status;
339 }
340
341 foreach ( $triplets as $i => $triplet ) {
342 list( $srcPath, $dstRel, $archiveRel ) = $triplet;
343 $dstPath = "{$this->directory}/$dstRel";
344 $archivePath = "{$this->directory}/$archiveRel";
345
346 // Archive destination file if it exists
347 if( is_file( $dstPath ) ) {
348 // Check if the archive file exists
349 // This is a sanity check to avoid data loss. In UNIX, the rename primitive
350 // unlinks the destination file if it exists. DB-based synchronisation in
351 // publishBatch's caller should prevent races. In Windows there's no
352 // problem because the rename primitive fails if the destination exists.
353 if ( is_file( $archivePath ) ) {
354 $success = false;
355 } else {
356 wfSuppressWarnings();
357 $success = rename( $dstPath, $archivePath );
358 wfRestoreWarnings();
359 }
360
361 if( !$success ) {
362 $status->error( 'filerenameerror',$dstPath, $archivePath );
363 $status->failCount++;
364 continue;
365 } else {
366 wfDebug(__METHOD__.": moved file $dstPath to $archivePath\n");
367 }
368 $status->value[$i] = 'archived';
369 } else {
370 $status->value[$i] = 'new';
371 }
372
373 $good = true;
374 wfSuppressWarnings();
375 if ( $flags & self::DELETE_SOURCE ) {
376 if ( !rename( $srcPath, $dstPath ) ) {
377 $status->error( 'filerenameerror', $srcPath, $dstPath );
378 $good = false;
379 }
380 } else {
381 if ( !copy( $srcPath, $dstPath ) ) {
382 $status->error( 'filecopyerror', $srcPath, $dstPath );
383 $good = false;
384 }
385 }
386 wfRestoreWarnings();
387
388 if ( $good ) {
389 $status->successCount++;
390 wfDebug(__METHOD__.": wrote tempfile $srcPath to $dstPath\n");
391 // Thread-safe override for umask
392 chmod( $dstPath, 0644 );
393 } else {
394 $status->failCount++;
395 }
396 }
397 return $status;
398 }
399
400 /**
401 * Move a group of files to the deletion archive.
402 * If no valid deletion archive is configured, this may either delete the
403 * file or throw an exception, depending on the preference of the repository.
404 *
405 * @param array $sourceDestPairs Array of source/destination pairs. Each element
406 * is a two-element array containing the source file path relative to the
407 * public root in the first element, and the archive file path relative
408 * to the deleted zone root in the second element.
409 * @return FileRepoStatus
410 */
411 function deleteBatch( $sourceDestPairs ) {
412 $status = $this->newGood();
413 if ( !$this->deletedDir ) {
414 throw new MWException( __METHOD__.': no valid deletion archive directory' );
415 }
416
417 /**
418 * Validate filenames and create archive directories
419 */
420 foreach ( $sourceDestPairs as $pair ) {
421 list( $srcRel, $archiveRel ) = $pair;
422 if ( !$this->validateFilename( $srcRel ) ) {
423 throw new MWException( __METHOD__.':Validation error in $srcRel' );
424 }
425 if ( !$this->validateFilename( $archiveRel ) ) {
426 throw new MWException( __METHOD__.':Validation error in $archiveRel' );
427 }
428 $archivePath = "{$this->deletedDir}/$archiveRel";
429 $archiveDir = dirname( $archivePath );
430 if ( !is_dir( $archiveDir ) ) {
431 if ( !wfMkdirParents( $archiveDir ) ) {
432 $status->fatal( 'directorycreateerror', $archiveDir );
433 continue;
434 }
435 $this->initDeletedDir( $archiveDir );
436 }
437 // Check if the archive directory is writable
438 // This doesn't appear to work on NTFS
439 if ( !is_writable( $archiveDir ) ) {
440 $status->fatal( 'filedelete-archive-read-only', $archiveDir );
441 }
442 }
443 if ( !$status->ok ) {
444 // Abort early
445 return $status;
446 }
447
448 /**
449 * Move the files
450 * We're now committed to returning an OK result, which will lead to
451 * the files being moved in the DB also.
452 */
453 foreach ( $sourceDestPairs as $pair ) {
454 list( $srcRel, $archiveRel ) = $pair;
455 $srcPath = "{$this->directory}/$srcRel";
456 $archivePath = "{$this->deletedDir}/$archiveRel";
457 $good = true;
458 if ( file_exists( $archivePath ) ) {
459 # A file with this content hash is already archived
460 if ( !@unlink( $srcPath ) ) {
461 $status->error( 'filedeleteerror', $srcPath );
462 $good = false;
463 }
464 } else{
465 if ( !@rename( $srcPath, $archivePath ) ) {
466 $status->error( 'filerenameerror', $srcPath, $archivePath );
467 $good = false;
468 } else {
469 @chmod( $archivePath, 0644 );
470 }
471 }
472 if ( $good ) {
473 $status->successCount++;
474 } else {
475 $status->failCount++;
476 }
477 }
478 return $status;
479 }
480
481 /**
482 * Get a relative path for a deletion archive key,
483 * e.g. s/z/a/ for sza251lrxrc1jad41h5mgilp8nysje52.jpg
484 */
485 function getDeletedHashPath( $key ) {
486 $path = '';
487 for ( $i = 0; $i < $this->deletedHashLevels; $i++ ) {
488 $path .= $key[$i] . '/';
489 }
490 return $path;
491 }
492
493 /**
494 * Call a callback function for every file in the repository.
495 * Uses the filesystem even in child classes.
496 */
497 function enumFilesInFS( $callback ) {
498 $numDirs = 1 << ( $this->hashLevels * 4 );
499 for ( $flatIndex = 0; $flatIndex < $numDirs; $flatIndex++ ) {
500 $hexString = sprintf( "%0{$this->hashLevels}x", $flatIndex );
501 $path = $this->directory;
502 for ( $hexPos = 0; $hexPos < $this->hashLevels; $hexPos++ ) {
503 $path .= '/' . substr( $hexString, 0, $hexPos + 1 );
504 }
505 if ( !file_exists( $path ) || !is_dir( $path ) ) {
506 continue;
507 }
508 $dir = opendir( $path );
509 while ( false !== ( $name = readdir( $dir ) ) ) {
510 call_user_func( $callback, $path . '/' . $name );
511 }
512 }
513 }
514
515 /**
516 * Call a callback function for every file in the repository
517 * May use either the database or the filesystem
518 */
519 function enumFiles( $callback ) {
520 $this->enumFilesInFS( $callback );
521 }
522
523 /**
524 * Get properties of a file with a given virtual URL
525 * The virtual URL must refer to this repo
526 */
527 function getFileProps( $virtualUrl ) {
528 $path = $this->resolveVirtualUrl( $virtualUrl );
529 return File::getPropsFromPath( $path );
530 }
531
532 /**
533 * Path disclosure protection functions
534 *
535 * Get a callback function to use for cleaning error message parameters
536 */
537 function getErrorCleanupFunction() {
538 switch ( $this->pathDisclosureProtection ) {
539 case 'simple':
540 $callback = array( $this, 'simpleClean' );
541 break;
542 default:
543 $callback = parent::getErrorCleanupFunction();
544 }
545 return $callback;
546 }
547
548 function simpleClean( $param ) {
549 if ( !isset( $this->simpleCleanPairs ) ) {
550 global $IP;
551 $this->simpleCleanPairs = array(
552 $this->directory => 'public',
553 "{$this->directory}/temp" => 'temp',
554 $IP => '$IP',
555 dirname( __FILE__ ) => '$IP/extensions/WebStore',
556 );
557 if ( $this->deletedDir ) {
558 $this->simpleCleanPairs[$this->deletedDir] = 'deleted';
559 }
560 }
561 return strtr( $param, $this->simpleCleanPairs );
562 }
563
564 }