Merge "Clean up Language::markNoConversion()."
[lhc/web/wiklou.git] / includes / site / MediaWikiSite.php
1 <?php
2 /**
3 * Class representing a MediaWiki site.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Site
22 * @license GNU GPL v2+
23 * @author John Erling Blad < jeblad@gmail.com >
24 * @author Daniel Kinzler
25 * @author Jeroen De Dauw < jeroendedauw@gmail.com >
26 */
27
28 /**
29 * Class representing a MediaWiki site.
30 *
31 * @since 1.21
32 *
33 * @ingroup Site
34 */
35 class MediaWikiSite extends SiteObject {
36
37 const PATH_FILE = 'file_path';
38 const PATH_PAGE = 'page_path';
39
40 /**
41 * @since 1.21
42 *
43 * @param integer $globalId
44 *
45 * @return MediaWikiSite
46 */
47 public static function newFromGlobalId( $globalId ) {
48 return SitesTable::singleton()->newRow( array(
49 'type' => Site::TYPE_MEDIAWIKI,
50 'global_key' => $globalId,
51 ), true );
52 }
53
54 /**
55 * Returns the database form of the given title.
56 *
57 * @since 1.21
58 *
59 * @param String $title the target page's title, in normalized form.
60 *
61 * @return String
62 */
63 public function toDBKey( $title ) {
64 return str_replace( ' ', '_', $title );
65 }
66
67 /**
68 * Returns the normalized form of the given page title, using the normalization rules of the given site.
69 * If the given title is a redirect, the redirect weill be resolved and the redirect target is returned.
70 *
71 * @note : This actually makes an API request to the remote site, so beware that this function is slow and depends
72 * on an external service.
73 *
74 * @note : If MW_PHPUNIT_TEST is defined, the call to the external site is skipped, and the title
75 * is normalized using the local normalization rules as implemented by the Title class.
76 *
77 * @see Site::normalizePageName
78 *
79 * @since 1.21
80 *
81 * @param string $pageName
82 *
83 * @return string
84 * @throws MWException
85 */
86 public function normalizePageName( $pageName ) {
87
88 // Check if we have strings as arguments.
89 if ( !is_string( $pageName ) ) {
90 throw new MWException( '$pageName must be a string' );
91 }
92
93 // Go on call the external site
94 if ( defined( 'MW_PHPUNIT_TEST' ) ) {
95 // If the code is under test, don't call out to other sites, just normalize locally.
96 // Note: this may cause results to be inconsistent with the actual normalization used by the respective remote site!
97
98 $t = Title::newFromText( $pageName );
99 return $t->getPrefixedText();
100 } else {
101
102 // Make sure the string is normalized into NFC (due to the bug 40017)
103 // but do nothing to the whitespaces, that should work appropriately.
104 // @see https://bugzilla.wikimedia.org/show_bug.cgi?id=40017
105 $pageName = UtfNormal::cleanUp( $pageName );
106
107 // Build the args for the specific call
108 $args = array(
109 'action' => 'query',
110 'prop' => 'info',
111 'redirects' => true,
112 'converttitles' => true,
113 'format' => 'json',
114 'titles' => $pageName,
115 //@todo: options for maxlag and maxage
116 // Note that maxlag will lead to a long delay before a reply is made,
117 // but that maxage can avoid the extreme delay. On the other hand
118 // maxage could be nice to use anyhow as it stops unnecessary requests.
119 // Also consider smaxage if maxage is used.
120 );
121
122 $url = $this->getFileUrl( 'api.php' ) . '?' . wfArrayToCgi( $args );
123
124 // Go on call the external site
125 //@todo: we need a good way to specify a timeout here.
126 $ret = Http::get( $url );
127 }
128
129 if ( $ret === false ) {
130 wfDebugLog( "MediaWikiSite", "call to external site failed: $url" );
131 return false;
132 }
133
134 $data = FormatJson::decode( $ret, true );
135
136 if ( !is_array( $data ) ) {
137 wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret );
138 return false;
139 }
140
141 $page = static::extractPageRecord( $data, $pageName );
142
143 if ( isset( $page['missing'] ) ) {
144 wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for a missing page title! " . $ret );
145 return false;
146 }
147
148 if ( isset( $page['invalid'] ) ) {
149 wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for an invalid page title! " . $ret );
150 return false;
151 }
152
153 if ( !isset( $page['title'] ) ) {
154 wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret );
155 return false;
156 }
157
158 return $page['title'];
159 }
160
161
162 /**
163 * Get normalization record for a given page title from an API response.
164 *
165 * @since 1.21
166 *
167 * @param array $externalData A reply from the API on a external server.
168 * @param string $pageTitle Identifies the page at the external site, needing normalization.
169 *
170 * @return array|false a 'page' structure representing the page identified by $pageTitle.
171 */
172 private static function extractPageRecord( $externalData, $pageTitle ) {
173 // If there is a special case with only one returned page
174 // we can cheat, and only return
175 // the single page in the "pages" substructure.
176 if ( isset( $externalData['query']['pages'] ) ) {
177 $pages = array_values( $externalData['query']['pages'] );
178 if ( count( $pages) === 1 ) {
179 return $pages[0];
180 }
181 }
182 // This is only used during internal testing, as it is assumed
183 // a more optimal (and lossfree) storage.
184 // Make initial checks and return if prerequisites are not meet.
185 if ( !is_array( $externalData ) || !isset( $externalData['query'] ) ) {
186 return false;
187 }
188 // Loop over the tree different named structures, that otherwise are similar
189 $structs = array(
190 'normalized' => 'from',
191 'converted' => 'from',
192 'redirects' => 'from',
193 'pages' => 'title'
194 );
195 foreach ( $structs as $listId => $fieldId ) {
196 // Check if the substructure exist at all.
197 if ( !isset( $externalData['query'][$listId] ) ) {
198 continue;
199 }
200 // Filter the substructure down to what we actually are using.
201 $collectedHits = array_filter(
202 array_values( $externalData['query'][$listId] ),
203 function( $a ) use ( $fieldId, $pageTitle ) {
204 return $a[$fieldId] === $pageTitle;
205 }
206 );
207 // If still looping over normalization, conversion or redirects,
208 // then we need to keep the new page title for later rounds.
209 if ( $fieldId === 'from' && is_array( $collectedHits ) ) {
210 switch ( count( $collectedHits ) ) {
211 case 0:
212 break;
213 case 1:
214 $pageTitle = $collectedHits[0]['to'];
215 break;
216 default:
217 return false;
218 }
219 }
220 // If on the pages structure we should prepare for returning.
221 elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) {
222 switch ( count( $collectedHits ) ) {
223 case 0:
224 return false;
225 case 1:
226 return array_shift( $collectedHits );
227 default:
228 return false;
229 }
230 }
231 }
232 // should never be here
233 return false;
234 }
235
236 /**
237 * @see Site::getLinkPathType
238 * Returns Site::PATH_PAGE
239 *
240 * @since 1.21
241 *
242 * @return string
243 */
244 public function getLinkPathType() {
245 return self::PATH_PAGE;
246 }
247
248 /**
249 * Returns the relative page path.
250 *
251 * @since 1.21
252 *
253 * @return string
254 */
255 public function getRelativePagePath() {
256 return parse_url( $this->getPath( self::PATH_PAGE ), PHP_URL_PATH );
257 }
258
259 /**
260 * Returns the relative file path.
261 *
262 * @since 1.21
263 *
264 * @return string
265 */
266 public function getRelativeFilePath() {
267 return parse_url( $this->getPath( self::PATH_FILE ), PHP_URL_PATH );
268 }
269
270 /**
271 * Sets the relative page path.
272 *
273 * @since 1.21
274 *
275 * @param string $path
276 */
277 public function setPagePath( $path ) {
278 $this->setPath( self::PATH_PAGE, $path );
279 }
280
281 /**
282 * Sets the relative file path.
283 *
284 * @since 1.21
285 *
286 * @param string $path
287 */
288 public function setFilePath( $path ) {
289 $this->setPath( self::PATH_FILE, $path );
290 }
291
292 /**
293 * @see Site::getPagePath
294 *
295 * This implementation returns a URL constructed using the path returned by getLinkPath().
296 * In addition to the default behaviour implemented by SiteObject::getPageUrl(), this
297 * method converts the $pageName to DBKey-format by replacing spaces with underscores
298 * before using it in the URL.
299 *
300 * @since 1.21
301 *
302 * @param $pagename string: Page name (default: false)
303 *
304 * @return string
305 */
306 public function getPageUrl( $pageName = false ) {
307 $url = $this->getLinkPath();
308
309 if ( $url === false ) {
310 return false;
311 }
312
313 if ( $pageName !== false ) {
314 $pageName = $this->toDBKey( trim( $pageName ) );
315 $url = str_replace( '$1', wfUrlencode( $pageName ), $url ) ;
316 }
317
318 return $url;
319 }
320
321 /**
322 * Returns the full file path (ie site url + relative file path).
323 * The path should go at the $1 marker. If the $path
324 * argument is provided, the marker will be replaced by it's value.
325 *
326 * @since 1.21
327 *
328 * @param string|false $path
329 *
330 * @return string
331 */
332 public function getFileUrl( $path = false ) {
333 $filePath = $this->getPath( self::PATH_FILE );
334
335 if ( $filePath !== false ) {
336 $filePath = str_replace( '$1', $path, $filePath );
337 }
338
339 return $filePath;
340 }
341
342 }