Merge "Remove a bunch of trailing spaces and unneeded newlines"
[lhc/web/wiklou.git] / includes / site / MediaWikiSite.php
1 <?php
2
3 /**
4 * Class representing a MediaWiki site.
5 *
6 * @since 1.21
7 *
8 * @file
9 * @ingroup Site
10 *
11 * @licence GNU GPL v2+
12 * @author John Erling Blad < jeblad@gmail.com >
13 * @author Daniel Kinzler
14 * @author Jeroen De Dauw < jeroendedauw@gmail.com >
15 */
16 class MediaWikiSite extends SiteObject {
17
18 const PATH_FILE = 'file_path';
19 const PATH_PAGE = 'page_path';
20
21 /**
22 * @since 1.21
23 *
24 * @param integer $globalId
25 *
26 * @return MediaWikiSite
27 */
28 public static function newFromGlobalId( $globalId ) {
29 return SitesTable::singleton()->newRow( array(
30 'type' => Site::TYPE_MEDIAWIKI,
31 'global_key' => $globalId,
32 ), true );
33 }
34
35 /**
36 * Returns the database form of the given title.
37 *
38 * @since 1.21
39 *
40 * @param String $title the target page's title, in normalized form.
41 *
42 * @return String
43 */
44 public function toDBKey( $title ) {
45 return str_replace( ' ', '_', $title );
46 }
47
48 /**
49 * Returns the normalized form of the given page title, using the normalization rules of the given site.
50 * If the given title is a redirect, the redirect weill be resolved and the redirect target is returned.
51 *
52 * @note : This actually makes an API request to the remote site, so beware that this function is slow and depends
53 * on an external service.
54 *
55 * @note : If MW_PHPUNIT_TEST is defined or $egWBRemoteTitleNormalization is set to false, the call to the
56 * external site is skipped, and the title is normalized using the local normalization rules as
57 * implemented by the Title class.
58 *
59 * @see Site::normalizePageName
60 *
61 * @since 1.21
62 *
63 * @param string $pageName
64 *
65 * @return string
66 * @throws MWException
67 */
68 public function normalizePageName( $pageName ) {
69 global $egWBRemoteTitleNormalization;
70
71 // Check if we have strings as arguments.
72 if ( !is_string( $pageName ) ) {
73 throw new MWException( '$pageName must be a string' );
74 }
75
76 // Go on call the external site
77 if ( defined( 'MW_PHPUNIT_TEST' ) ) {
78 // If the code is under test, don't call out to other sites, just normalize locally.
79 // Note: this may cause results to be inconsistent with the actual normalization used by the respective remote site!
80
81 $t = Title::newFromText( $pageName );
82 return $t->getPrefixedText();
83 } else {
84
85 // Make sure the string is normalized into NFC (due to the bug 40017)
86 // but do nothing to the whitespaces, that should work appropriately.
87 // @see https://bugzilla.wikimedia.org/show_bug.cgi?id=40017
88 $pageName = UtfNormal::cleanUp( $pageName );
89
90 // Build the args for the specific call
91 $args = array(
92 'action' => 'query',
93 'prop' => 'info',
94 'redirects' => true,
95 'converttitles' => true,
96 'format' => 'json',
97 'titles' => $pageName,
98 //@todo: options for maxlag and maxage
99 // Note that maxlag will lead to a long delay before a reply is made,
100 // but that maxage can avoid the extreme delay. On the other hand
101 // maxage could be nice to use anyhow as it stops unnecessary requests.
102 // Also consider smaxage if maxage is used.
103 );
104
105 $url = $this->getFileUrl( 'api.php' ) . '?' . wfArrayToCgi( $args );
106
107 // Go on call the external site
108 //@todo: we need a good way to specify a timeout here.
109 $ret = Http::get( $url );
110 }
111
112 if ( $ret === false ) {
113 wfDebugLog( "MediaWikiSite", "call to external site failed: $url" );
114 return false;
115 }
116
117 $data = FormatJson::decode( $ret, true );
118
119 if ( !is_array( $data ) ) {
120 wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret );
121 return false;
122 }
123
124 $page = static::extractPageRecord( $data, $pageName );
125
126 if ( isset( $page['missing'] ) ) {
127 wfDebugLog( "MediaWikiSite", "call to <$url> returned a missing page title! " . $ret );
128 return false;
129 }
130
131 if ( !isset( $page['title'] ) ) {
132 wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret );
133 return false;
134 }
135
136 return $page['title'];
137 }
138
139
140 /**
141 * Get normalization record for a given page title from an API response.
142 *
143 * @since 1.21
144 *
145 * @param array $externalData A reply from the API on a external server.
146 * @param string $pageTitle Identifies the page at the external site, needing normalization.
147 *
148 * @return array|false a 'page' structure representing the page identified by $pageTitle.
149 */
150 private static function extractPageRecord( $externalData, $pageTitle ) {
151 // If there is a special case with only one returned page
152 // we can cheat, and only return
153 // the single page in the "pages" substructure.
154 if ( isset( $externalData['query']['pages'] ) ) {
155 $pages = array_values( $externalData['query']['pages'] );
156 if ( count( $pages) === 1 ) {
157 return $pages[0];
158 }
159 }
160 // This is only used during internal testing, as it is assumed
161 // a more optimal (and lossfree) storage.
162 // Make initial checks and return if prerequisites are not meet.
163 if ( !is_array( $externalData ) || !isset( $externalData['query'] ) ) {
164 return false;
165 }
166 // Loop over the tree different named structures, that otherwise are similar
167 $structs = array(
168 'normalized' => 'from',
169 'converted' => 'from',
170 'redirects' => 'from',
171 'pages' => 'title'
172 );
173 foreach ( $structs as $listId => $fieldId ) {
174 // Check if the substructure exist at all.
175 if ( !isset( $externalData['query'][$listId] ) ) {
176 continue;
177 }
178 // Filter the substructure down to what we actually are using.
179 $collectedHits = array_filter(
180 array_values( $externalData['query'][$listId] ),
181 function( $a ) use ( $fieldId, $pageTitle ) {
182 return $a[$fieldId] === $pageTitle;
183 }
184 );
185 // If still looping over normalization, conversion or redirects,
186 // then we need to keep the new page title for later rounds.
187 if ( $fieldId === 'from' && is_array( $collectedHits ) ) {
188 switch ( count( $collectedHits ) ) {
189 case 0:
190 break;
191 case 1:
192 $pageTitle = $collectedHits[0]['to'];
193 break;
194 default:
195 return false;
196 }
197 }
198 // If on the pages structure we should prepare for returning.
199 elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) {
200 switch ( count( $collectedHits ) ) {
201 case 0:
202 return false;
203 case 1:
204 return array_shift( $collectedHits );
205 default:
206 return false;
207 }
208 }
209 }
210 // should never be here
211 return false;
212 }
213
214 /**
215 * @see Site::getLinkPathType
216 * Returns Site::PATH_PAGE
217 *
218 * @since 1.21
219 *
220 * @return string
221 */
222 public function getLinkPathType() {
223 return self::PATH_PAGE;
224 }
225
226 /**
227 * Returns the relative page path.
228 *
229 * @since 1.21
230 *
231 * @return string
232 */
233 public function getRelativePagePath() {
234 return parse_url( $this->getPath( self::PATH_PAGE ), PHP_URL_PATH );
235 }
236
237 /**
238 * Returns the relative file path.
239 *
240 * @since 1.21
241 *
242 * @return string
243 */
244 public function getRelativeFilePath() {
245 return parse_url( $this->getPath( self::PATH_FILE ), PHP_URL_PATH );
246 }
247
248 /**
249 * Sets the relative page path.
250 *
251 * @since 1.21
252 *
253 * @param string $path
254 */
255 public function setPagePath( $path ) {
256 $this->setPath( self::PATH_PAGE, $path );
257 }
258
259 /**
260 * Sets the relative file path.
261 *
262 * @since 1.21
263 *
264 * @param string $path
265 */
266 public function setFilePath( $path ) {
267 $this->setPath( self::PATH_FILE, $path );
268 }
269
270 /**
271 * @see Site::getPagePath
272 *
273 * This implementation returns a URL constructed using the path returned by getLinkPath().
274 * In addition to the default behaviour implemented by SiteObject::getPageUrl(), this
275 * method converts the $pageName to DBKey-format by replacing spaces with underscores
276 * before using it in the URL.
277 *
278 * @since 1.21
279 *
280 * @param string|false
281 *
282 * @return string
283 */
284 public function getPageUrl( $pageName = false ) {
285 $url = $this->getLinkPath();
286
287 if ( $url === false ) {
288 return false;
289 }
290
291 if ( $pageName !== false ) {
292 $pageName = $this->toDBKey( trim( $pageName ) );
293 $url = str_replace( '$1', wfUrlencode( $pageName ), $url ) ;
294 }
295
296 return $url;
297 }
298
299 /**
300 * Returns the full file path (ie site url + relative file path).
301 * The path should go at the $1 marker. If the $path
302 * argument is provided, the marker will be replaced by it's value.
303 *
304 * @since 1.21
305 *
306 * @param string|false $path
307 *
308 * @return string
309 */
310 public function getFileUrl( $path = false ) {
311 $filePath = $this->getPath( self::PATH_FILE );
312
313 if ( $filePath !== false ) {
314 $filePath = str_replace( '$1', $path, $filePath );
315 }
316
317 return $filePath;
318 }
319
320 }