Merge "(bug 35923) tweaks to mediawiki.action.history.diff.css"
[lhc/web/wiklou.git] / includes / site / MediaWikiSite.php
1 <?php
2
3 /**
4 * Class representing a MediaWiki site.
5 *
6 * @since 1.21
7 *
8 * @file
9 * @ingroup Site
10 *
11 * @license GNU GPL v2+
12 * @author John Erling Blad < jeblad@gmail.com >
13 * @author Daniel Kinzler
14 * @author Jeroen De Dauw < jeroendedauw@gmail.com >
15 */
16 class MediaWikiSite extends SiteObject {
17
18 const PATH_FILE = 'file_path';
19 const PATH_PAGE = 'page_path';
20
21 /**
22 * @since 1.21
23 *
24 * @param integer $globalId
25 *
26 * @return MediaWikiSite
27 */
28 public static function newFromGlobalId( $globalId ) {
29 return SitesTable::singleton()->newRow( array(
30 'type' => Site::TYPE_MEDIAWIKI,
31 'global_key' => $globalId,
32 ), true );
33 }
34
35 /**
36 * Returns the database form of the given title.
37 *
38 * @since 1.21
39 *
40 * @param String $title the target page's title, in normalized form.
41 *
42 * @return String
43 */
44 public function toDBKey( $title ) {
45 return str_replace( ' ', '_', $title );
46 }
47
48 /**
49 * Returns the normalized form of the given page title, using the normalization rules of the given site.
50 * If the given title is a redirect, the redirect weill be resolved and the redirect target is returned.
51 *
52 * @note : This actually makes an API request to the remote site, so beware that this function is slow and depends
53 * on an external service.
54 *
55 * @note : If MW_PHPUNIT_TEST is defined, the call to the external site is skipped, and the title
56 * is normalized using the local normalization rules as implemented by the Title class.
57 *
58 * @see Site::normalizePageName
59 *
60 * @since 1.21
61 *
62 * @param string $pageName
63 *
64 * @return string
65 * @throws MWException
66 */
67 public function normalizePageName( $pageName ) {
68
69 // Check if we have strings as arguments.
70 if ( !is_string( $pageName ) ) {
71 throw new MWException( '$pageName must be a string' );
72 }
73
74 // Go on call the external site
75 if ( defined( 'MW_PHPUNIT_TEST' ) ) {
76 // If the code is under test, don't call out to other sites, just normalize locally.
77 // Note: this may cause results to be inconsistent with the actual normalization used by the respective remote site!
78
79 $t = Title::newFromText( $pageName );
80 return $t->getPrefixedText();
81 } else {
82
83 // Make sure the string is normalized into NFC (due to the bug 40017)
84 // but do nothing to the whitespaces, that should work appropriately.
85 // @see https://bugzilla.wikimedia.org/show_bug.cgi?id=40017
86 $pageName = UtfNormal::cleanUp( $pageName );
87
88 // Build the args for the specific call
89 $args = array(
90 'action' => 'query',
91 'prop' => 'info',
92 'redirects' => true,
93 'converttitles' => true,
94 'format' => 'json',
95 'titles' => $pageName,
96 //@todo: options for maxlag and maxage
97 // Note that maxlag will lead to a long delay before a reply is made,
98 // but that maxage can avoid the extreme delay. On the other hand
99 // maxage could be nice to use anyhow as it stops unnecessary requests.
100 // Also consider smaxage if maxage is used.
101 );
102
103 $url = $this->getFileUrl( 'api.php' ) . '?' . wfArrayToCgi( $args );
104
105 // Go on call the external site
106 //@todo: we need a good way to specify a timeout here.
107 $ret = Http::get( $url );
108 }
109
110 if ( $ret === false ) {
111 wfDebugLog( "MediaWikiSite", "call to external site failed: $url" );
112 return false;
113 }
114
115 $data = FormatJson::decode( $ret, true );
116
117 if ( !is_array( $data ) ) {
118 wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret );
119 return false;
120 }
121
122 $page = static::extractPageRecord( $data, $pageName );
123
124 if ( isset( $page['missing'] ) ) {
125 wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for a missing page title! " . $ret );
126 return false;
127 }
128
129 if ( isset( $page['invalid'] ) ) {
130 wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for an invalid page title! " . $ret );
131 return false;
132 }
133
134 if ( !isset( $page['title'] ) ) {
135 wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret );
136 return false;
137 }
138
139 return $page['title'];
140 }
141
142
143 /**
144 * Get normalization record for a given page title from an API response.
145 *
146 * @since 1.21
147 *
148 * @param array $externalData A reply from the API on a external server.
149 * @param string $pageTitle Identifies the page at the external site, needing normalization.
150 *
151 * @return array|false a 'page' structure representing the page identified by $pageTitle.
152 */
153 private static function extractPageRecord( $externalData, $pageTitle ) {
154 // If there is a special case with only one returned page
155 // we can cheat, and only return
156 // the single page in the "pages" substructure.
157 if ( isset( $externalData['query']['pages'] ) ) {
158 $pages = array_values( $externalData['query']['pages'] );
159 if ( count( $pages) === 1 ) {
160 return $pages[0];
161 }
162 }
163 // This is only used during internal testing, as it is assumed
164 // a more optimal (and lossfree) storage.
165 // Make initial checks and return if prerequisites are not meet.
166 if ( !is_array( $externalData ) || !isset( $externalData['query'] ) ) {
167 return false;
168 }
169 // Loop over the tree different named structures, that otherwise are similar
170 $structs = array(
171 'normalized' => 'from',
172 'converted' => 'from',
173 'redirects' => 'from',
174 'pages' => 'title'
175 );
176 foreach ( $structs as $listId => $fieldId ) {
177 // Check if the substructure exist at all.
178 if ( !isset( $externalData['query'][$listId] ) ) {
179 continue;
180 }
181 // Filter the substructure down to what we actually are using.
182 $collectedHits = array_filter(
183 array_values( $externalData['query'][$listId] ),
184 function( $a ) use ( $fieldId, $pageTitle ) {
185 return $a[$fieldId] === $pageTitle;
186 }
187 );
188 // If still looping over normalization, conversion or redirects,
189 // then we need to keep the new page title for later rounds.
190 if ( $fieldId === 'from' && is_array( $collectedHits ) ) {
191 switch ( count( $collectedHits ) ) {
192 case 0:
193 break;
194 case 1:
195 $pageTitle = $collectedHits[0]['to'];
196 break;
197 default:
198 return false;
199 }
200 }
201 // If on the pages structure we should prepare for returning.
202 elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) {
203 switch ( count( $collectedHits ) ) {
204 case 0:
205 return false;
206 case 1:
207 return array_shift( $collectedHits );
208 default:
209 return false;
210 }
211 }
212 }
213 // should never be here
214 return false;
215 }
216
217 /**
218 * @see Site::getLinkPathType
219 * Returns Site::PATH_PAGE
220 *
221 * @since 1.21
222 *
223 * @return string
224 */
225 public function getLinkPathType() {
226 return self::PATH_PAGE;
227 }
228
229 /**
230 * Returns the relative page path.
231 *
232 * @since 1.21
233 *
234 * @return string
235 */
236 public function getRelativePagePath() {
237 return parse_url( $this->getPath( self::PATH_PAGE ), PHP_URL_PATH );
238 }
239
240 /**
241 * Returns the relative file path.
242 *
243 * @since 1.21
244 *
245 * @return string
246 */
247 public function getRelativeFilePath() {
248 return parse_url( $this->getPath( self::PATH_FILE ), PHP_URL_PATH );
249 }
250
251 /**
252 * Sets the relative page path.
253 *
254 * @since 1.21
255 *
256 * @param string $path
257 */
258 public function setPagePath( $path ) {
259 $this->setPath( self::PATH_PAGE, $path );
260 }
261
262 /**
263 * Sets the relative file path.
264 *
265 * @since 1.21
266 *
267 * @param string $path
268 */
269 public function setFilePath( $path ) {
270 $this->setPath( self::PATH_FILE, $path );
271 }
272
273 /**
274 * @see Site::getPagePath
275 *
276 * This implementation returns a URL constructed using the path returned by getLinkPath().
277 * In addition to the default behaviour implemented by SiteObject::getPageUrl(), this
278 * method converts the $pageName to DBKey-format by replacing spaces with underscores
279 * before using it in the URL.
280 *
281 * @since 1.21
282 *
283 * @param $pagename string: Page name (default: false)
284 *
285 * @return string
286 */
287 public function getPageUrl( $pageName = false ) {
288 $url = $this->getLinkPath();
289
290 if ( $url === false ) {
291 return false;
292 }
293
294 if ( $pageName !== false ) {
295 $pageName = $this->toDBKey( trim( $pageName ) );
296 $url = str_replace( '$1', wfUrlencode( $pageName ), $url ) ;
297 }
298
299 return $url;
300 }
301
302 /**
303 * Returns the full file path (ie site url + relative file path).
304 * The path should go at the $1 marker. If the $path
305 * argument is provided, the marker will be replaced by it's value.
306 *
307 * @since 1.21
308 *
309 * @param string|false $path
310 *
311 * @return string
312 */
313 public function getFileUrl( $path = false ) {
314 $filePath = $this->getPath( self::PATH_FILE );
315
316 if ( $filePath !== false ) {
317 $filePath = str_replace( '$1', $path, $filePath );
318 }
319
320 return $filePath;
321 }
322
323 }