Merge "Make DBAccessBase use DBConnRef, rename $wiki, and hide getLoadBalancer()"
[lhc/web/wiklou.git] / maintenance / populateImageSha1.php
1 <?php
2 /**
3 * Optional upgrade script to populate the img_sha1 field
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Maintenance
22 */
23
24 use MediaWiki\MediaWikiServices;
25 use MediaWiki\Shell\Shell;
26
27 require_once __DIR__ . '/Maintenance.php';
28
29 /**
30 * Maintenance script to populate the img_sha1 field.
31 *
32 * @ingroup Maintenance
33 */
34 class PopulateImageSha1 extends LoggedUpdateMaintenance {
35 public function __construct() {
36 parent::__construct();
37 $this->addDescription( 'Populate the img_sha1 field' );
38 $this->addOption( 'force', "Recalculate sha1 for rows that already have a value" );
39 $this->addOption( 'multiversiononly', "Calculate only for files with several versions" );
40 $this->addOption( 'method', "Use 'pipe' to pipe to mysql command line,\n" .
41 "\t\tdefault uses Database class", false, true );
42 $this->addOption(
43 'file',
44 'Fix for a specific file, without File: namespace prefixed',
45 false,
46 true
47 );
48 }
49
50 protected function getUpdateKey() {
51 return 'populate img_sha1';
52 }
53
54 protected function updateSkippedMessage() {
55 return 'img_sha1 column of image table already populated.';
56 }
57
58 public function execute() {
59 if ( $this->getOption( 'file' ) || $this->hasOption( 'multiversiononly' ) ) {
60 $this->doDBUpdates(); // skip update log checks/saves
61 } else {
62 parent::execute();
63 }
64 }
65
66 public function doDBUpdates() {
67 $method = $this->getOption( 'method', 'normal' );
68 $file = $this->getOption( 'file', '' );
69 $force = $this->getOption( 'force' );
70 $isRegen = ( $force || $file != '' ); // forced recalculation?
71
72 $t = -microtime( true );
73 $dbw = $this->getDB( DB_MASTER );
74 if ( $file != '' ) {
75 $res = $dbw->select(
76 'image',
77 [ 'img_name' ],
78 [ 'img_name' => $file ],
79 __METHOD__
80 );
81 if ( !$res ) {
82 $this->fatalError( "No such file: $file" );
83 }
84 $this->output( "Populating img_sha1 field for specified files\n" );
85 } else {
86 if ( $this->hasOption( 'multiversiononly' ) ) {
87 $conds = [];
88 $this->output( "Populating and recalculating img_sha1 field for versioned files\n" );
89 } elseif ( $force ) {
90 $conds = [];
91 $this->output( "Populating and recalculating img_sha1 field\n" );
92 } else {
93 $conds = [ 'img_sha1' => '' ];
94 $this->output( "Populating img_sha1 field\n" );
95 }
96 if ( $this->hasOption( 'multiversiononly' ) ) {
97 $res = $dbw->select( 'oldimage',
98 [ 'img_name' => 'DISTINCT(oi_name)' ], $conds, __METHOD__ );
99 } else {
100 $res = $dbw->select( 'image', [ 'img_name' ], $conds, __METHOD__ );
101 }
102 }
103
104 $imageTable = $dbw->tableName( 'image' );
105 $oldImageTable = $dbw->tableName( 'oldimage' );
106
107 if ( $method == 'pipe' ) {
108 // Opening a pipe allows the SHA-1 operation to be done in parallel
109 // with the database write operation, because the writes are queued
110 // in the pipe buffer. This can improve performance by up to a
111 // factor of 2.
112 $config = $this->getConfig();
113 $cmd = 'mysql -u' . Shell::escape( $config->get( 'DBuser' ) ) .
114 ' -h' . Shell::escape( $config->get( 'DBserver' ) ) .
115 ' -p' . Shell::escape( $config->get( 'DBpassword' ), $config->get( 'DBname' ) );
116 $this->output( "Using pipe method\n" );
117 $pipe = popen( $cmd, 'w' );
118 }
119
120 $numRows = $res->numRows();
121 $i = 0;
122 foreach ( $res as $row ) {
123 if ( $i % $this->getBatchSize() == 0 ) {
124 $this->output( sprintf(
125 "Done %d of %d, %5.3f%% \r", $i, $numRows, $i / $numRows * 100 ) );
126 wfWaitForSlaves();
127 }
128
129 $file = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo()
130 ->newFile( $row->img_name );
131 if ( !$file ) {
132 continue;
133 }
134
135 // Upgrade the current file version...
136 $sha1 = $file->getRepo()->getFileSha1( $file->getPath() );
137 if ( strval( $sha1 ) !== '' ) { // file on disk and hashed properly
138 if ( $isRegen && $file->getSha1() !== $sha1 ) {
139 // The population was probably done already. If the old SHA1
140 // does not match, then both fix the SHA1 and the metadata.
141 $file->upgradeRow();
142 } else {
143 $sql = "UPDATE $imageTable SET img_sha1=" . $dbw->addQuotes( $sha1 ) .
144 " WHERE img_name=" . $dbw->addQuotes( $file->getName() );
145 if ( $method == 'pipe' ) {
146 fwrite( $pipe, "$sql;\n" );
147 } else {
148 $dbw->query( $sql, __METHOD__ );
149 }
150 }
151 }
152 // Upgrade the old file versions...
153 foreach ( $file->getHistory() as $oldFile ) {
154 /** @var OldLocalFile $oldFile */
155 '@phan-var OldLocalFile $oldFile';
156 $sha1 = $oldFile->getRepo()->getFileSha1( $oldFile->getPath() );
157 if ( strval( $sha1 ) !== '' ) { // file on disk and hashed properly
158 if ( $isRegen && $oldFile->getSha1() !== $sha1 ) {
159 // The population was probably done already. If the old SHA1
160 // does not match, then both fix the SHA1 and the metadata.
161 $oldFile->upgradeRow();
162 } else {
163 $sql = "UPDATE $oldImageTable SET oi_sha1=" . $dbw->addQuotes( $sha1 ) .
164 " WHERE (oi_name=" . $dbw->addQuotes( $oldFile->getName() ) . " AND" .
165 " oi_archive_name=" . $dbw->addQuotes( $oldFile->getArchiveName() ) . ")";
166 if ( $method == 'pipe' ) {
167 fwrite( $pipe, "$sql;\n" );
168 } else {
169 $dbw->query( $sql, __METHOD__ );
170 }
171 }
172 }
173 }
174 $i++;
175 }
176 if ( $method == 'pipe' ) {
177 fflush( $pipe );
178 pclose( $pipe );
179 }
180 $t += microtime( true );
181 $this->output( sprintf( "\nDone %d files in %.1f seconds\n", $numRows, $t ) );
182
183 return !$file; // we only updated *some* files, don't log
184 }
185 }
186
187 $maintClass = PopulateImageSha1::class;
188 require_once RUN_MAINTENANCE_IF_MAIN;