Merge "[FileBackend] Changed copy script to use batches for concurrency."
[lhc/web/wiklou.git] / maintenance / syncFileBackend.php
1 <?php
2 /**
3 * Sync one file backend to another based on the journal of later.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @ingroup Maintenance
21 */
22
23 require_once( dirname( __FILE__ ) . '/Maintenance.php' );
24
25 class SyncFileBackend extends Maintenance {
26 public function __construct() {
27 parent::__construct();
28 $this->mDescription = "Sync one file backend with another using the journal";
29 $this->addOption( 'src', 'Name of backend to sync from', true, true );
30 $this->addOption( 'dst', 'Name of destination backend to sync', true, true );
31 $this->addOption( 'start', 'Starting journal ID', false, true );
32 $this->addOption( 'end', 'Ending journal ID', false, true );
33 $this->addOption( 'posdir', 'Directory to read/record journal positions', false, true );
34 $this->addOption( 'verbose', 'Verbose mode', false, false, 'v' );
35 $this->setBatchSize( 50 );
36 }
37
38 public function execute() {
39 $src = FileBackendGroup::singleton()->get( $this->getOption( 'src' ) );
40 $dst = FileBackendGroup::singleton()->get( $this->getOption( 'dst' ) );
41
42 $posFile = $this->getOption( 'posdir' )
43 ? $this->getOption( 'posdir' ) . '/' . wfWikiID()
44 : false;
45
46 $start = $this->getOption( 'start', 0 );
47 if ( !$start && $posFile ) {
48 $start = is_file( $posFile )
49 ? (int)trim( file_get_contents( $posFile ) )
50 : 0;
51 ++$start; // we already did this ID, start with the next one
52 $startFromPosFile = true;
53 } else {
54 $startFromPosFile = false;
55 }
56 $end = $this->getOption( 'end', INF );
57
58 $this->output( "Synchronizing backend '{$dst->getName()}' to '{$src->getName()}'...\n" );
59 $this->output( "Starting journal position is $start.\n" );
60 if ( is_finite( $end ) ) {
61 $this->output( "Ending journal position is $end.\n" );
62 }
63
64 // Actually sync the dest backend with the reference backend
65 $lastOKPos = $this->syncBackends( $src, $dst, $start, $end );
66
67 // Update the sync position file
68 if ( $startFromPosFile && $lastOKPos >= $start ) { // successfully advanced
69 file_put_contents( $posFile, $lastOKPos, LOCK_EX );
70 $this->output( "Updated journal position file.\n" );
71 }
72
73 if ( $lastOKPos === false ) {
74 if ( !$start ) {
75 $this->output( "No journal entries found.\n" );
76 } else {
77 $this->output( "No new journal entries found.\n" );
78 }
79 } else {
80 $this->output( "Stopped synchronization at journal position $lastOKPos.\n" );
81 }
82
83 if ( $this->isQuiet() ) {
84 print $lastOKPos; // give a single machine-readable number
85 }
86 }
87
88 /**
89 * Sync $dst backend to $src backend based on the $src logs given after $start.
90 * Returns the journal entry ID this advanced to and handled (inclusive).
91 *
92 * @param $src FileBackend
93 * @param $dst FileBackend
94 * @param $start integer Starting journal position
95 * @param $end integer Starting journal position
96 * @return integer|false Journal entry ID or false if there are none
97 */
98 protected function syncBackends( FileBackend $src, FileBackend $dst, $start, $end ) {
99 $lastOKPos = 0; // failed
100 $first = true; // first batch
101
102 if ( $start > $end ) { // sanity
103 $this->error( "Error: given starting ID greater than ending ID.", 1 );
104 }
105
106 do {
107 $limit = min( $this->mBatchSize, $end - $start + 1 ); // don't go pass ending ID
108 $this->output( "Doing id $start to " . ( $start + $limit - 1 ) . "...\n" );
109
110 $entries = $src->getJournal()->getChangeEntries( $start, $limit, $next );
111 $start = $next; // start where we left off next time
112 if ( $first && !count( $entries ) ) {
113 return false; // nothing to do
114 }
115 $first = false;
116
117 $lastPosInBatch = 0;
118 $pathsInBatch = array(); // changed paths
119 foreach ( $entries as $entry ) {
120 if ( $entry['op'] !== 'null' ) { // null ops are just for reference
121 $pathsInBatch[$entry['path']] = 1; // remove duplicates
122 }
123 $lastPosInBatch = $entry['id'];
124 }
125
126 $status = $this->syncFileBatch( array_keys( $pathsInBatch ), $src, $dst );
127 if ( $status->isOK() ) {
128 $lastOKPos = max( $lastOKPos, $lastPosInBatch );
129 } else {
130 $this->output( print_r( $status->getErrorsArray(), true ) );
131 break; // no gaps; everything up to $lastPos must be OK
132 }
133
134 if ( !$start ) {
135 $this->output( "End of journal entries.\n" );
136 }
137 } while ( $start && $start <= $end );
138
139 return $lastOKPos;
140 }
141
142 /**
143 * Sync particular files of backend $src to the corresponding $dst backend files
144 *
145 * @param $paths Array
146 * @param $src FileBackend
147 * @param $dst FileBackend
148 * @return Status
149 */
150 protected function syncFileBatch( array $paths, FileBackend $src, FileBackend $dst ) {
151 $status = Status::newGood();
152 if ( !count( $paths ) ) {
153 return $status; // nothing to do
154 }
155
156 // Source: convert internal backend names (FileBackendMultiWrite) to the public one
157 $sPaths = $this->replaceNamePaths( $paths, $src );
158 // Destination: get corresponding path name
159 $dPaths = $this->replaceNamePaths( $paths, $dst );
160
161 // Lock the live backend paths from modification
162 $sLock = $src->getScopedFileLocks( $sPaths, LockManager::LOCK_UW, $status );
163 $eLock = $dst->getScopedFileLocks( $dPaths, LockManager::LOCK_EX, $status );
164 if ( !$status->isOK() ) {
165 return $status;
166 }
167
168 $ops = array();
169 $fsFiles = array();
170 foreach ( $sPaths as $i => $sPath ) {
171 $dPath = $dPaths[$i]; // destination
172 $sExists = $src->fileExists( array( 'src' => $sPath, 'latest' => 1 ) );
173 if ( $sExists === true ) { // exists in source
174 if ( $this->filesAreSame( $src, $dst, $sPath, $dPath ) ) {
175 continue; // avoid local copies for non-FS backends
176 }
177 // Note: getLocalReference() is fast for FS backends
178 $fsFile = $src->getLocalReference( array( 'src' => $sPath, 'latest' => 1 ) );
179 if ( !$fsFile ) {
180 $this->error( "Unable to sync '$dPath': could not get local copy." );
181 $status->fatal( 'backend-fail-internal', $src->getName() );
182 return $status;
183 }
184 $fsFiles[] = $fsFile; // keep TempFSFile objects alive as needed
185 // Note: prepare() is usually fast for key/value backends
186 $status->merge( $dst->prepare( array( 'dir' => dirname( $dPath ) ) ) );
187 if ( !$status->isOK() ) {
188 return $status;
189 }
190 $ops[] = array( 'op' => 'store',
191 'src' => $fsFile->getPath(), 'dst' => $dPath, 'overwrite' => 1 );
192 } elseif ( $sExists === false ) { // does not exist in source
193 $ops[] = array( 'op' => 'delete', 'src' => $dPath, 'ignoreMissingSource' => 1 );
194 } else { // error
195 $this->error( "Unable to sync '$dPath': could not stat file." );
196 $status->fatal( 'backend-fail-internal', $src->getName() );
197 return $status;
198 }
199 }
200
201 $status->merge( $dst->doOperations( $ops,
202 array( 'nonLocking' => 1, 'nonJournaled' => 1 ) ) );
203 if ( $status->isOK() && $this->getOption( 'verbose' ) ) {
204 $this->output( "Synchronized these file(s):\n" . implode( "\n", $dPaths ) . "\n" );
205 }
206
207 return $status;
208 }
209
210 /**
211 * Substitute the backend name of storage paths with that of a given one
212 *
213 * @param $paths Array|string List of paths or single string path
214 * @return Array|string
215 */
216 protected function replaceNamePaths( $paths, FileBackend $backend ) {
217 return preg_replace(
218 '!^mwstore://([^/]+)!',
219 StringUtils::escapeRegexReplacement( "mwstore://" . $backend->getName() ),
220 $paths // string or array
221 );
222 }
223
224 protected function filesAreSame( FileBackend $src, FileBackend $dst, $sPath, $dPath ) {
225 return (
226 ( $src->getFileSize( array( 'src' => $sPath ) )
227 === $dst->getFileSize( array( 'src' => $dPath ) ) // short-circuit
228 ) && ( $src->getFileSha1Base36( array( 'src' => $sPath ) )
229 === $dst->getFileSha1Base36( array( 'src' => $dPath ) )
230 )
231 );
232 }
233 }
234
235 $maintClass = "SyncFileBackend";
236 require_once( RUN_MAINTENANCE_IF_MAIN );