Some bugzilla.wikimedia.org -> phabricator.wikimedia.org changes
[lhc/web/wiklou.git] / includes / site / MediaWikiSite.php
1 <?php
2 /**
3 * Class representing a MediaWiki site.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Site
22 * @license GNU GPL v2+
23 * @author John Erling Blad < jeblad@gmail.com >
24 * @author Daniel Kinzler
25 * @author Jeroen De Dauw < jeroendedauw@gmail.com >
26 */
27
28 /**
29 * Class representing a MediaWiki site.
30 *
31 * @since 1.21
32 *
33 * @ingroup Site
34 */
35 class MediaWikiSite extends Site {
36 const PATH_FILE = 'file_path';
37 const PATH_PAGE = 'page_path';
38
39 /**
40 * @since 1.21
41 * @deprecated since 1.21 Just use the constructor or the factory Site::newForType
42 *
43 * @param int $globalId
44 *
45 * @return MediaWikiSite
46 */
47 public static function newFromGlobalId( $globalId ) {
48 $site = new static();
49 $site->setGlobalId( $globalId );
50 return $site;
51 }
52
53 /**
54 * Constructor.
55 *
56 * @since 1.21
57 *
58 * @param string $type
59 */
60 public function __construct( $type = self::TYPE_MEDIAWIKI ) {
61 parent::__construct( $type );
62 }
63
64 /**
65 * Returns the database form of the given title.
66 *
67 * @since 1.21
68 *
69 * @param string $title The target page's title, in normalized form.
70 *
71 * @return string
72 */
73 public function toDBKey( $title ) {
74 return str_replace( ' ', '_', $title );
75 }
76
77 /**
78 * Returns the normalized form of the given page title, using the
79 * normalization rules of the given site. If the given title is a redirect,
80 * the redirect weill be resolved and the redirect target is returned.
81 *
82 * @note This actually makes an API request to the remote site, so beware
83 * that this function is slow and depends on an external service.
84 *
85 * @note If MW_PHPUNIT_TEST is defined, the call to the external site is
86 * skipped, and the title is normalized using the local normalization
87 * rules as implemented by the Title class.
88 *
89 * @see Site::normalizePageName
90 *
91 * @since 1.21
92 *
93 * @param string $pageName
94 *
95 * @return string
96 * @throws MWException
97 */
98 public function normalizePageName( $pageName ) {
99
100 // Check if we have strings as arguments.
101 if ( !is_string( $pageName ) ) {
102 throw new MWException( '$pageName must be a string' );
103 }
104
105 // Go on call the external site
106 if ( defined( 'MW_PHPUNIT_TEST' ) ) {
107 // If the code is under test, don't call out to other sites, just
108 // normalize locally.
109 // Note: this may cause results to be inconsistent with the actual
110 // normalization used by the respective remote site!
111
112 $t = Title::newFromText( $pageName );
113 return $t->getPrefixedText();
114 } else {
115
116 // Make sure the string is normalized into NFC (due to T42017)
117 // but do nothing to the whitespaces, that should work appropriately.
118 // @see https://phabricator.wikimedia.org/T42017
119 $pageName = UtfNormal\Validator::cleanUp( $pageName );
120
121 // Build the args for the specific call
122 $args = array(
123 'action' => 'query',
124 'prop' => 'info',
125 'redirects' => true,
126 'converttitles' => true,
127 'format' => 'json',
128 'titles' => $pageName,
129 // @todo options for maxlag and maxage
130 // Note that maxlag will lead to a long delay before a reply is made,
131 // but that maxage can avoid the extreme delay. On the other hand
132 // maxage could be nice to use anyhow as it stops unnecessary requests.
133 // Also consider smaxage if maxage is used.
134 );
135
136 $url = wfAppendQuery( $this->getFileUrl( 'api.php' ), $args );
137
138 // Go on call the external site
139 // @todo we need a good way to specify a timeout here.
140 $ret = Http::get( $url, array(), __METHOD__ );
141 }
142
143 if ( $ret === false ) {
144 wfDebugLog( "MediaWikiSite", "call to external site failed: $url" );
145 return false;
146 }
147
148 $data = FormatJson::decode( $ret, true );
149
150 if ( !is_array( $data ) ) {
151 wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret );
152 return false;
153 }
154
155 $page = static::extractPageRecord( $data, $pageName );
156
157 if ( isset( $page['missing'] ) ) {
158 wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for a missing page title! "
159 . $ret );
160 return false;
161 }
162
163 if ( isset( $page['invalid'] ) ) {
164 wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for an invalid page title! "
165 . $ret );
166 return false;
167 }
168
169 if ( !isset( $page['title'] ) ) {
170 wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret );
171 return false;
172 }
173
174 return $page['title'];
175 }
176
177 /**
178 * Get normalization record for a given page title from an API response.
179 *
180 * @since 1.21
181 *
182 * @param array $externalData A reply from the API on a external server.
183 * @param string $pageTitle Identifies the page at the external site, needing normalization.
184 *
185 * @return array|bool A 'page' structure representing the page identified by $pageTitle.
186 */
187 private static function extractPageRecord( $externalData, $pageTitle ) {
188 // If there is a special case with only one returned page
189 // we can cheat, and only return
190 // the single page in the "pages" substructure.
191 if ( isset( $externalData['query']['pages'] ) ) {
192 $pages = array_values( $externalData['query']['pages'] );
193 if ( count( $pages ) === 1 ) {
194 return $pages[0];
195 }
196 }
197 // This is only used during internal testing, as it is assumed
198 // a more optimal (and lossfree) storage.
199 // Make initial checks and return if prerequisites are not meet.
200 if ( !is_array( $externalData ) || !isset( $externalData['query'] ) ) {
201 return false;
202 }
203 // Loop over the tree different named structures, that otherwise are similar
204 $structs = array(
205 'normalized' => 'from',
206 'converted' => 'from',
207 'redirects' => 'from',
208 'pages' => 'title'
209 );
210 foreach ( $structs as $listId => $fieldId ) {
211 // Check if the substructure exist at all.
212 if ( !isset( $externalData['query'][$listId] ) ) {
213 continue;
214 }
215 // Filter the substructure down to what we actually are using.
216 $collectedHits = array_filter(
217 array_values( $externalData['query'][$listId] ),
218 function ( $a ) use ( $fieldId, $pageTitle ) {
219 return $a[$fieldId] === $pageTitle;
220 }
221 );
222 // If still looping over normalization, conversion or redirects,
223 // then we need to keep the new page title for later rounds.
224 if ( $fieldId === 'from' && is_array( $collectedHits ) ) {
225 switch ( count( $collectedHits ) ) {
226 case 0:
227 break;
228 case 1:
229 $pageTitle = $collectedHits[0]['to'];
230 break;
231 default:
232 return false;
233 }
234 }
235 // If on the pages structure we should prepare for returning.
236 elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) {
237 switch ( count( $collectedHits ) ) {
238 case 0:
239 return false;
240 case 1:
241 return array_shift( $collectedHits );
242 default:
243 return false;
244 }
245 }
246 }
247 // should never be here
248 return false;
249 }
250
251 /**
252 * @see Site::getLinkPathType
253 * Returns Site::PATH_PAGE
254 *
255 * @since 1.21
256 *
257 * @return string
258 */
259 public function getLinkPathType() {
260 return self::PATH_PAGE;
261 }
262
263 /**
264 * Returns the relative page path.
265 *
266 * @since 1.21
267 *
268 * @return string
269 */
270 public function getRelativePagePath() {
271 return parse_url( $this->getPath( self::PATH_PAGE ), PHP_URL_PATH );
272 }
273
274 /**
275 * Returns the relative file path.
276 *
277 * @since 1.21
278 *
279 * @return string
280 */
281 public function getRelativeFilePath() {
282 return parse_url( $this->getPath( self::PATH_FILE ), PHP_URL_PATH );
283 }
284
285 /**
286 * Sets the relative page path.
287 *
288 * @since 1.21
289 *
290 * @param string $path
291 */
292 public function setPagePath( $path ) {
293 $this->setPath( self::PATH_PAGE, $path );
294 }
295
296 /**
297 * Sets the relative file path.
298 *
299 * @since 1.21
300 *
301 * @param string $path
302 */
303 public function setFilePath( $path ) {
304 $this->setPath( self::PATH_FILE, $path );
305 }
306
307 /**
308 * @see Site::getPageUrl
309 *
310 * This implementation returns a URL constructed using the path returned by getLinkPath().
311 * In addition to the default behavior implemented by Site::getPageUrl(), this
312 * method converts the $pageName to DBKey-format by replacing spaces with underscores
313 * before using it in the URL.
314 *
315 * @since 1.21
316 *
317 * @param string|bool $pageName Page name or false (default: false)
318 *
319 * @return string
320 */
321 public function getPageUrl( $pageName = false ) {
322 $url = $this->getLinkPath();
323
324 if ( $url === false ) {
325 return false;
326 }
327
328 if ( $pageName !== false ) {
329 $pageName = $this->toDBKey( trim( $pageName ) );
330 $url = str_replace( '$1', wfUrlencode( $pageName ), $url );
331 }
332
333 return $url;
334 }
335
336 /**
337 * Returns the full file path (ie site url + relative file path).
338 * The path should go at the $1 marker. If the $path
339 * argument is provided, the marker will be replaced by it's value.
340 *
341 * @since 1.21
342 *
343 * @param string|bool $path
344 *
345 * @return string
346 */
347 public function getFileUrl( $path = false ) {
348 $filePath = $this->getPath( self::PATH_FILE );
349
350 if ( $filePath !== false ) {
351 $filePath = str_replace( '$1', $path, $filePath );
352 }
353
354 return $filePath;
355 }
356 }