fix some spacing
[lhc/web/wiklou.git] / includes / site / MediaWikiSite.php
1 <?php
2 /**
3 * Class representing a MediaWiki site.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Site
22 * @license GNU GPL v2+
23 * @author John Erling Blad < jeblad@gmail.com >
24 * @author Daniel Kinzler
25 * @author Jeroen De Dauw < jeroendedauw@gmail.com >
26 */
27
28 /**
29 * Class representing a MediaWiki site.
30 *
31 * @since 1.21
32 *
33 * @ingroup Site
34 */
35 class MediaWikiSite extends Site {
36
37 const PATH_FILE = 'file_path';
38 const PATH_PAGE = 'page_path';
39
40 /**
41 * @since 1.21
42 * @deprecated Just use the constructor or the factory Site::newForType
43 *
44 * @param integer $globalId
45 *
46 * @return MediaWikiSite
47 */
48 public static function newFromGlobalId( $globalId ) {
49 $site = new static();
50 $site->setGlobalId( $globalId );
51 return $site;
52 }
53
54 /**
55 * Constructor.
56 *
57 * @since 1.21
58 *
59 * @param string $type
60 */
61 public function __construct( $type = self::TYPE_MEDIAWIKI ) {
62 parent::__construct( $type );
63 }
64
65 /**
66 * Returns the database form of the given title.
67 *
68 * @since 1.21
69 *
70 * @param String $title the target page's title, in normalized form.
71 *
72 * @return String
73 */
74 public function toDBKey( $title ) {
75 return str_replace( ' ', '_', $title );
76 }
77
78 /**
79 * Returns the normalized form of the given page title, using the normalization rules of the given site.
80 * If the given title is a redirect, the redirect weill be resolved and the redirect target is returned.
81 *
82 * @note : This actually makes an API request to the remote site, so beware that this function is slow and depends
83 * on an external service.
84 *
85 * @note : If MW_PHPUNIT_TEST is defined, the call to the external site is skipped, and the title
86 * is normalized using the local normalization rules as implemented by the Title class.
87 *
88 * @see Site::normalizePageName
89 *
90 * @since 1.21
91 *
92 * @param string $pageName
93 *
94 * @return string
95 * @throws MWException
96 */
97 public function normalizePageName( $pageName ) {
98
99 // Check if we have strings as arguments.
100 if ( !is_string( $pageName ) ) {
101 throw new MWException( '$pageName must be a string' );
102 }
103
104 // Go on call the external site
105 if ( defined( 'MW_PHPUNIT_TEST' ) ) {
106 // If the code is under test, don't call out to other sites, just normalize locally.
107 // Note: this may cause results to be inconsistent with the actual normalization used by the respective remote site!
108
109 $t = Title::newFromText( $pageName );
110 return $t->getPrefixedText();
111 } else {
112
113 // Make sure the string is normalized into NFC (due to the bug 40017)
114 // but do nothing to the whitespaces, that should work appropriately.
115 // @see https://bugzilla.wikimedia.org/show_bug.cgi?id=40017
116 $pageName = UtfNormal::cleanUp( $pageName );
117
118 // Build the args for the specific call
119 $args = array(
120 'action' => 'query',
121 'prop' => 'info',
122 'redirects' => true,
123 'converttitles' => true,
124 'format' => 'json',
125 'titles' => $pageName,
126 //@todo: options for maxlag and maxage
127 // Note that maxlag will lead to a long delay before a reply is made,
128 // but that maxage can avoid the extreme delay. On the other hand
129 // maxage could be nice to use anyhow as it stops unnecessary requests.
130 // Also consider smaxage if maxage is used.
131 );
132
133 $url = $this->getFileUrl( 'api.php' ) . '?' . wfArrayToCgi( $args );
134
135 // Go on call the external site
136 //@todo: we need a good way to specify a timeout here.
137 $ret = Http::get( $url );
138 }
139
140 if ( $ret === false ) {
141 wfDebugLog( "MediaWikiSite", "call to external site failed: $url" );
142 return false;
143 }
144
145 $data = FormatJson::decode( $ret, true );
146
147 if ( !is_array( $data ) ) {
148 wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret );
149 return false;
150 }
151
152 $page = static::extractPageRecord( $data, $pageName );
153
154 if ( isset( $page['missing'] ) ) {
155 wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for a missing page title! " . $ret );
156 return false;
157 }
158
159 if ( isset( $page['invalid'] ) ) {
160 wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for an invalid page title! " . $ret );
161 return false;
162 }
163
164 if ( !isset( $page['title'] ) ) {
165 wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret );
166 return false;
167 }
168
169 return $page['title'];
170 }
171
172 /**
173 * Get normalization record for a given page title from an API response.
174 *
175 * @since 1.21
176 *
177 * @param array $externalData A reply from the API on a external server.
178 * @param string $pageTitle Identifies the page at the external site, needing normalization.
179 *
180 * @return array|boolean a 'page' structure representing the page identified by $pageTitle.
181 */
182 private static function extractPageRecord( $externalData, $pageTitle ) {
183 // If there is a special case with only one returned page
184 // we can cheat, and only return
185 // the single page in the "pages" substructure.
186 if ( isset( $externalData['query']['pages'] ) ) {
187 $pages = array_values( $externalData['query']['pages'] );
188 if ( count( $pages) === 1 ) {
189 return $pages[0];
190 }
191 }
192 // This is only used during internal testing, as it is assumed
193 // a more optimal (and lossfree) storage.
194 // Make initial checks and return if prerequisites are not meet.
195 if ( !is_array( $externalData ) || !isset( $externalData['query'] ) ) {
196 return false;
197 }
198 // Loop over the tree different named structures, that otherwise are similar
199 $structs = array(
200 'normalized' => 'from',
201 'converted' => 'from',
202 'redirects' => 'from',
203 'pages' => 'title'
204 );
205 foreach ( $structs as $listId => $fieldId ) {
206 // Check if the substructure exist at all.
207 if ( !isset( $externalData['query'][$listId] ) ) {
208 continue;
209 }
210 // Filter the substructure down to what we actually are using.
211 $collectedHits = array_filter(
212 array_values( $externalData['query'][$listId] ),
213 function( $a ) use ( $fieldId, $pageTitle ) {
214 return $a[$fieldId] === $pageTitle;
215 }
216 );
217 // If still looping over normalization, conversion or redirects,
218 // then we need to keep the new page title for later rounds.
219 if ( $fieldId === 'from' && is_array( $collectedHits ) ) {
220 switch ( count( $collectedHits ) ) {
221 case 0:
222 break;
223 case 1:
224 $pageTitle = $collectedHits[0]['to'];
225 break;
226 default:
227 return false;
228 }
229 }
230 // If on the pages structure we should prepare for returning.
231 elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) {
232 switch ( count( $collectedHits ) ) {
233 case 0:
234 return false;
235 case 1:
236 return array_shift( $collectedHits );
237 default:
238 return false;
239 }
240 }
241 }
242 // should never be here
243 return false;
244 }
245
246 /**
247 * @see Site::getLinkPathType
248 * Returns Site::PATH_PAGE
249 *
250 * @since 1.21
251 *
252 * @return string
253 */
254 public function getLinkPathType() {
255 return self::PATH_PAGE;
256 }
257
258 /**
259 * Returns the relative page path.
260 *
261 * @since 1.21
262 *
263 * @return string
264 */
265 public function getRelativePagePath() {
266 return parse_url( $this->getPath( self::PATH_PAGE ), PHP_URL_PATH );
267 }
268
269 /**
270 * Returns the relative file path.
271 *
272 * @since 1.21
273 *
274 * @return string
275 */
276 public function getRelativeFilePath() {
277 return parse_url( $this->getPath( self::PATH_FILE ), PHP_URL_PATH );
278 }
279
280 /**
281 * Sets the relative page path.
282 *
283 * @since 1.21
284 *
285 * @param string $path
286 */
287 public function setPagePath( $path ) {
288 $this->setPath( self::PATH_PAGE, $path );
289 }
290
291 /**
292 * Sets the relative file path.
293 *
294 * @since 1.21
295 *
296 * @param string $path
297 */
298 public function setFilePath( $path ) {
299 $this->setPath( self::PATH_FILE, $path );
300 }
301
302 /**
303 * @see Site::getPageUrl
304 *
305 * This implementation returns a URL constructed using the path returned by getLinkPath().
306 * In addition to the default behavior implemented by Site::getPageUrl(), this
307 * method converts the $pageName to DBKey-format by replacing spaces with underscores
308 * before using it in the URL.
309 *
310 * @since 1.21
311 *
312 * @param string|boolean $pageName Page name or false (default: false)
313 *
314 * @return string
315 */
316 public function getPageUrl( $pageName = false ) {
317 $url = $this->getLinkPath();
318
319 if ( $url === false ) {
320 return false;
321 }
322
323 if ( $pageName !== false ) {
324 $pageName = $this->toDBKey( trim( $pageName ) );
325 $url = str_replace( '$1', wfUrlencode( $pageName ), $url );
326 }
327
328 return $url;
329 }
330
331 /**
332 * Returns the full file path (ie site url + relative file path).
333 * The path should go at the $1 marker. If the $path
334 * argument is provided, the marker will be replaced by it's value.
335 *
336 * @since 1.21
337 *
338 * @param string|boolean $path
339 *
340 * @return string
341 */
342 public function getFileUrl( $path = false ) {
343 $filePath = $this->getPath( self::PATH_FILE );
344
345 if ( $filePath !== false ) {
346 $filePath = str_replace( '$1', $path, $filePath );
347 }
348
349 return $filePath;
350 }
351
352 }