5 * Created on Sep 27, 2008
7 * Copyright © 2008 Roan Kattouw "<Firstname>.<Lastname>@gmail.com"
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 * http://www.gnu.org/copyleft/gpl.html
28 * A query module to list duplicates of the given file(s)
32 class ApiQueryDuplicateFiles
extends ApiQueryGeneratorBase
{
34 public function __construct( $query, $moduleName ) {
35 parent
::__construct( $query, $moduleName, 'df' );
38 public function execute() {
42 public function getCacheMode( $params ) {
46 public function executeGenerator( $resultPageSet ) {
47 $this->run( $resultPageSet );
51 * @param $resultPageSet ApiPageSet
54 private function run( $resultPageSet = null ) {
55 $params = $this->extractRequestParams();
56 $namespaces = $this->getPageSet()->getAllTitlesByNamespace();
57 if ( empty( $namespaces[NS_FILE
] ) ) {
60 $images = $namespaces[NS_FILE
];
62 if( $params['dir'] == 'descending' ) {
63 $images = array_reverse( $images );
66 $skipUntilThisDup = false;
67 if ( isset( $params['continue'] ) ) {
68 $cont = explode( '|', $params['continue'] );
69 $this->dieContinueUsageIf( count( $cont ) != 2 );
70 $fromImage = $cont[0];
71 $skipUntilThisDup = $cont[1];
72 // Filter out any images before $fromImage
73 foreach ( $images as $image => $pageId ) {
74 if ( $image < $fromImage ) {
75 unset( $images[$image] );
82 $filesToFind = array_keys( $images );
83 if( $params['localonly'] ) {
84 $files = RepoGroup
::singleton()->getLocalRepo()->findFiles( $filesToFind );
86 $files = RepoGroup
::singleton()->findFiles( $filesToFind );
94 foreach ( $files as $file ) {
95 $sha1s[$file->getName()] = $file->getSha1();
98 // find all files with the hashes, result format is: array( hash => array( dup1, dup2 ), hash1 => ... )
99 $filesToFindBySha1s = array_unique( array_values( $sha1s ) );
100 if( $params['localonly'] ) {
101 $filesBySha1s = RepoGroup
::singleton()->getLocalRepo()->findBySha1s( $filesToFindBySha1s );
103 $filesBySha1s = RepoGroup
::singleton()->findBySha1s( $filesToFindBySha1s );
106 // iterate over $images to handle continue param correct
107 foreach( $images as $image => $pageId ) {
108 if( !isset( $sha1s[$image] ) ) {
109 continue; //file does not exist
111 $sha1 = $sha1s[$image];
112 $dupFiles = $filesBySha1s[$sha1];
113 if( $params['dir'] == 'descending' ) {
114 $dupFiles = array_reverse( $dupFiles );
116 foreach ( $dupFiles as $dupFile ) {
117 $dupName = $dupFile->getName();
118 if( $image == $dupName && $dupFile->isLocal() ) {
119 continue; //ignore the local file itself
121 if( $skipUntilThisDup !== false && $dupName < $skipUntilThisDup ) {
122 continue; //skip to pos after the image from continue param
124 $skipUntilThisDup = false;
125 if ( ++
$count > $params['limit'] ) {
126 $fit = false; //break outer loop
127 // We're one over limit which shows that
128 // there are additional images to be had. Stop here...
129 $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
132 if ( !is_null( $resultPageSet ) ) {
133 $titles[] = $dupFile->getTitle();
137 'user' => $dupFile->getUser( 'text' ),
138 'timestamp' => wfTimestamp( TS_ISO_8601
, $dupFile->getTimestamp() )
140 if( !$dupFile->isLocal() ) {
143 $fit = $this->addPageSubItem( $pageId, $r );
145 $this->setContinueEnumParameter( 'continue', $image . '|' . $dupName );
154 if ( !is_null( $resultPageSet ) ) {
155 $resultPageSet->populateFromTitles( $titles );
159 public function getAllowedParams() {
162 ApiBase
::PARAM_DFLT
=> 10,
163 ApiBase
::PARAM_TYPE
=> 'limit',
164 ApiBase
::PARAM_MIN
=> 1,
165 ApiBase
::PARAM_MAX
=> ApiBase
::LIMIT_BIG1
,
166 ApiBase
::PARAM_MAX2
=> ApiBase
::LIMIT_BIG2
170 ApiBase
::PARAM_DFLT
=> 'ascending',
171 ApiBase
::PARAM_TYPE
=> array(
176 'localonly' => false,
180 public function getParamDescription() {
182 'limit' => 'How many duplicate files to return',
183 'continue' => 'When more results are available, use this to continue',
184 'dir' => 'The direction in which to list',
185 'localonly' => 'Look only for files in the local repository',
189 public function getResultProperties() {
194 'timestamp' => 'timestamp',
195 'shared' => 'boolean',
200 public function getDescription() {
201 return 'List all files that are duplicates of the given file(s) based on hash values';
204 public function getExamples() {
206 'api.php?action=query&titles=File:Albert_Einstein_Head.jpg&prop=duplicatefiles',
207 'api.php?action=query&generator=allimages&prop=duplicatefiles',
211 public function getHelpUrls() {
212 return 'https://www.mediawiki.org/wiki/API:Properties#duplicatefiles_.2F_df';