build: Upgrade mediawiki-codesniffer from 26.0.0 to 28.0.0
[lhc/web/wiklou.git] / maintenance / syncFileBackend.php
1 <?php
2 /**
3 * Sync one file backend to another based on the journal of later.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Maintenance
22 */
23
24 require_once __DIR__ . '/Maintenance.php';
25
26 /**
27 * Maintenance script that syncs one file backend to another based on
28 * the journal of later.
29 *
30 * @ingroup Maintenance
31 */
32 class SyncFileBackend extends Maintenance {
33 public function __construct() {
34 parent::__construct();
35 $this->addDescription( 'Sync one file backend with another using the journal' );
36 $this->addOption( 'src', 'Name of backend to sync from', true, true );
37 $this->addOption( 'dst', 'Name of destination backend to sync', false, true );
38 $this->addOption( 'start', 'Starting journal ID', false, true );
39 $this->addOption( 'end', 'Ending journal ID', false, true );
40 $this->addOption( 'posdir', 'Directory to read/record journal positions', false, true );
41 $this->addOption( 'posdump', 'Just dump current journal position into the position dir.' );
42 $this->addOption( 'postime', 'For position dumps, get the ID at this time', false, true );
43 $this->addOption( 'backoff', 'Stop at entries younger than this age (sec).', false, true );
44 $this->addOption( 'verbose', 'Verbose mode', false, false, 'v' );
45 $this->setBatchSize( 50 );
46 }
47
48 public function execute() {
49 $src = FileBackendGroup::singleton()->get( $this->getOption( 'src' ) );
50
51 $posDir = $this->getOption( 'posdir' );
52 if ( $posDir != '' ) {
53 $posFile = "$posDir/" . rawurlencode( $src->getDomainId() );
54 } else {
55 $posFile = false;
56 }
57
58 if ( $this->hasOption( 'posdump' ) ) {
59 // Just dump the current position into the specified position dir
60 if ( !$this->hasOption( 'posdir' ) ) {
61 $this->fatalError( "Param posdir required!" );
62 }
63 if ( $this->hasOption( 'postime' ) ) {
64 $id = (int)$src->getJournal()->getPositionAtTime( $this->getOption( 'postime' ) );
65 $this->output( "Requested journal position is $id.\n" );
66 } else {
67 $id = (int)$src->getJournal()->getCurrentPosition();
68 $this->output( "Current journal position is $id.\n" );
69 }
70 if ( file_put_contents( $posFile, $id, LOCK_EX ) !== false ) {
71 $this->output( "Saved journal position file.\n" );
72 } else {
73 $this->output( "Could not save journal position file.\n" );
74 }
75 if ( $this->isQuiet() ) {
76 print $id; // give a single machine-readable number
77 }
78
79 return;
80 }
81
82 if ( !$this->hasOption( 'dst' ) ) {
83 $this->fatalError( "Param dst required!" );
84 }
85 $dst = FileBackendGroup::singleton()->get( $this->getOption( 'dst' ) );
86
87 $start = $this->getOption( 'start', 0 );
88 if ( !$start && $posFile && is_dir( $posDir ) ) {
89 $start = is_file( $posFile )
90 ? (int)trim( file_get_contents( $posFile ) )
91 : 0;
92 ++$start; // we already did this ID, start with the next one
93 $startFromPosFile = true;
94 } else {
95 $startFromPosFile = false;
96 }
97
98 if ( $this->hasOption( 'backoff' ) ) {
99 $time = time() - $this->getOption( 'backoff', 0 );
100 $end = (int)$src->getJournal()->getPositionAtTime( $time );
101 } else {
102 $end = $this->getOption( 'end', INF );
103 }
104
105 $this->output( "Synchronizing backend '{$dst->getName()}' to '{$src->getName()}'...\n" );
106 $this->output( "Starting journal position is $start.\n" );
107 if ( is_finite( $end ) ) {
108 $this->output( "Ending journal position is $end.\n" );
109 }
110
111 // Periodically update the position file
112 $callback = function ( $pos ) use ( $startFromPosFile, $posFile, $start ) {
113 if ( $startFromPosFile && $pos >= $start ) { // successfully advanced
114 file_put_contents( $posFile, $pos, LOCK_EX );
115 }
116 };
117
118 // Actually sync the dest backend with the reference backend
119 $lastOKPos = $this->syncBackends( $src, $dst, $start, $end, $callback );
120
121 // Update the sync position file
122 if ( $startFromPosFile && $lastOKPos >= $start ) { // successfully advanced
123 if ( file_put_contents( $posFile, $lastOKPos, LOCK_EX ) !== false ) {
124 $this->output( "Updated journal position file.\n" );
125 } else {
126 $this->output( "Could not update journal position file.\n" );
127 }
128 }
129
130 if ( $lastOKPos === false ) {
131 if ( !$start ) {
132 $this->output( "No journal entries found.\n" );
133 } else {
134 $this->output( "No new journal entries found.\n" );
135 }
136 } else {
137 $this->output( "Stopped synchronization at journal position $lastOKPos.\n" );
138 }
139
140 if ( $this->isQuiet() ) {
141 print $lastOKPos; // give a single machine-readable number
142 }
143 }
144
145 /**
146 * Sync $dst backend to $src backend based on the $src logs given after $start.
147 * Returns the journal entry ID this advanced to and handled (inclusive).
148 *
149 * @param FileBackend $src
150 * @param FileBackend $dst
151 * @param int $start Starting journal position
152 * @param int $end Starting journal position
153 * @param Closure $callback Callback to update any position file
154 * @return int|bool Journal entry ID or false if there are none
155 */
156 protected function syncBackends(
157 FileBackend $src, FileBackend $dst, $start, $end, Closure $callback
158 ) {
159 $lastOKPos = 0; // failed
160 $first = true; // first batch
161
162 if ( $start > $end ) { // sanity
163 $this->fatalError( "Error: given starting ID greater than ending ID." );
164 }
165
166 $next = null;
167 do {
168 $limit = min( $this->getBatchSize(), $end - $start + 1 ); // don't go pass ending ID
169 $this->output( "Doing id $start to " . ( $start + $limit - 1 ) . "...\n" );
170
171 $entries = $src->getJournal()->getChangeEntries( $start, $limit, $next );
172 $start = $next; // start where we left off next time
173 if ( $first && !count( $entries ) ) {
174 return false; // nothing to do
175 }
176 $first = false;
177
178 $lastPosInBatch = 0;
179 $pathsInBatch = []; // changed paths
180 foreach ( $entries as $entry ) {
181 if ( $entry['op'] !== 'null' ) { // null ops are just for reference
182 $pathsInBatch[$entry['path']] = 1; // remove duplicates
183 }
184 $lastPosInBatch = $entry['id'];
185 }
186
187 $status = $this->syncFileBatch( array_keys( $pathsInBatch ), $src, $dst );
188 if ( $status->isOK() ) {
189 $lastOKPos = max( $lastOKPos, $lastPosInBatch );
190 $callback( $lastOKPos ); // update position file
191 } else {
192 $this->error( print_r( $status->getErrorsArray(), true ) );
193 break; // no gaps; everything up to $lastPos must be OK
194 }
195
196 if ( !$start ) {
197 $this->output( "End of journal entries.\n" );
198 }
199 } while ( $start && $start <= $end );
200
201 return $lastOKPos;
202 }
203
204 /**
205 * Sync particular files of backend $src to the corresponding $dst backend files
206 *
207 * @param array $paths
208 * @param FileBackend $src
209 * @param FileBackend $dst
210 * @return Status
211 */
212 protected function syncFileBatch( array $paths, FileBackend $src, FileBackend $dst ) {
213 $status = Status::newGood();
214 if ( !count( $paths ) ) {
215 return $status; // nothing to do
216 }
217
218 // Source: convert internal backend names (FileBackendMultiWrite) to the public one
219 $sPaths = $this->replaceNamePaths( $paths, $src );
220 // Destination: get corresponding path name
221 $dPaths = $this->replaceNamePaths( $paths, $dst );
222
223 // Lock the live backend paths from modification
224 $sLock = $src->getScopedFileLocks( $sPaths, LockManager::LOCK_UW, $status );
225 $eLock = $dst->getScopedFileLocks( $dPaths, LockManager::LOCK_EX, $status );
226 if ( !$status->isOK() ) {
227 return $status;
228 }
229
230 $src->preloadFileStat( [ 'srcs' => $sPaths, 'latest' => 1 ] );
231 $dst->preloadFileStat( [ 'srcs' => $dPaths, 'latest' => 1 ] );
232
233 $ops = [];
234 $fsFiles = [];
235 foreach ( $sPaths as $i => $sPath ) {
236 $dPath = $dPaths[$i]; // destination
237 $sExists = $src->fileExists( [ 'src' => $sPath, 'latest' => 1 ] );
238 if ( $sExists === true ) { // exists in source
239 if ( $this->filesAreSame( $src, $dst, $sPath, $dPath ) ) {
240 continue; // avoid local copies for non-FS backends
241 }
242 // Note: getLocalReference() is fast for FS backends
243 $fsFile = $src->getLocalReference( [ 'src' => $sPath, 'latest' => 1 ] );
244 if ( !$fsFile ) {
245 $this->error( "Unable to sync '$dPath': could not get local copy." );
246 $status->fatal( 'backend-fail-internal', $src->getName() );
247
248 return $status;
249 }
250 $fsFiles[] = $fsFile; // keep TempFSFile objects alive as needed
251 // Note: prepare() is usually fast for key/value backends
252 $status->merge( $dst->prepare( [
253 'dir' => dirname( $dPath ), 'bypassReadOnly' => 1 ] ) );
254 if ( !$status->isOK() ) {
255 return $status;
256 }
257 $ops[] = [ 'op' => 'store',
258 'src' => $fsFile->getPath(), 'dst' => $dPath, 'overwrite' => 1 ];
259 } elseif ( $sExists === false ) { // does not exist in source
260 $ops[] = [ 'op' => 'delete', 'src' => $dPath, 'ignoreMissingSource' => 1 ];
261 } else {
262 $this->error( "Unable to sync '$dPath': could not stat file." );
263 $status->fatal( 'backend-fail-internal', $src->getName() );
264
265 return $status;
266 }
267 }
268
269 $t_start = microtime( true );
270 $status = $dst->doQuickOperations( $ops, [ 'bypassReadOnly' => 1 ] );
271 if ( !$status->isOK() ) {
272 sleep( 10 ); // wait and retry copy again
273 $status = $dst->doQuickOperations( $ops, [ 'bypassReadOnly' => 1 ] );
274 }
275 $elapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 );
276 if ( $status->isOK() && $this->getOption( 'verbose' ) ) {
277 $this->output( "Synchronized these file(s) [{$elapsed_ms}ms]:\n" .
278 implode( "\n", $dPaths ) . "\n" );
279 }
280
281 return $status;
282 }
283
284 /**
285 * Substitute the backend name of storage paths with that of a given one
286 *
287 * @param array|string $paths List of paths or single string path
288 * @param FileBackend $backend
289 * @return array|string
290 */
291 protected function replaceNamePaths( $paths, FileBackend $backend ) {
292 return preg_replace(
293 '!^mwstore://([^/]+)!',
294 StringUtils::escapeRegexReplacement( "mwstore://" . $backend->getName() ),
295 $paths // string or array
296 );
297 }
298
299 protected function filesAreSame( FileBackend $src, FileBackend $dst, $sPath, $dPath ) {
300 return (
301 ( $src->getFileSize( [ 'src' => $sPath ] )
302 === $dst->getFileSize( [ 'src' => $dPath ] ) // short-circuit
303 ) && ( $src->getFileSha1Base36( [ 'src' => $sPath ] )
304 === $dst->getFileSha1Base36( [ 'src' => $dPath ] )
305 )
306 );
307 }
308 }
309
310 $maintClass = SyncFileBackend::class;
311 require_once RUN_MAINTENANCE_IF_MAIN;