71a17c821ec8036b868673728c52a7bb3a81d84e
[lhc/web/wiklou.git] / includes / site / MediaWikiSite.php
1 <?php
2
3 /**
4 * Class representing a MediaWiki site.
5 *
6 * @since 1.21
7 *
8 * @file
9 * @ingroup Site
10 *
11 * @licence GNU GPL v2+
12 * @author John Erling Blad < jeblad@gmail.com >
13 * @author Daniel Kinzler
14 * @author Jeroen De Dauw < jeroendedauw@gmail.com >
15 */
16 class MediaWikiSite extends SiteObject {
17
18 const PATH_FILE = 'file_path';
19 const PATH_PAGE = 'page_path';
20
21 /**
22 * @since 1.21
23 *
24 * @param integer $globalId
25 *
26 * @return MediaWikiSite
27 */
28 public static function newFromGlobalId( $globalId ) {
29 return SitesTable::singleton()->newRow( array(
30 'type' => Site::TYPE_MEDIAWIKI,
31 'global_key' => $globalId,
32 ), true );
33 }
34
35 /**
36 * Returns the database form of the given title.
37 *
38 * @since 1.21
39 *
40 * @param String $title the target page's title, in normalized form.
41 *
42 * @return String
43 */
44 public function toDBKey( $title ) {
45 return str_replace( ' ', '_', $title );
46 }
47
48 /**
49 * Returns the normalized form of the given page title, using the normalization rules of the given site.
50 * If the given title is a redirect, the redirect weill be resolved and the redirect target is returned.
51 *
52 * @note : This actually makes an API request to the remote site, so beware that this function is slow and depends
53 * on an external service.
54 *
55 * @note : If MW_PHPUNIT_TEST is defined or $egWBRemoteTitleNormalization is set to false, the call to the
56 * external site is skipped, and the title is normalized using the local normalization rules as
57 * implemented by the Title class.
58 *
59 * @see Site::normalizePageName
60 *
61 * @since 1.21
62 *
63 * @param string $pageName
64 *
65 * @return string
66 * @throws MWException
67 */
68 public function normalizePageName( $pageName ) {
69 global $egWBRemoteTitleNormalization;
70
71 // Check if we have strings as arguments.
72 if ( !is_string( $pageName ) ) {
73 throw new MWException( '$pageName must be a string' );
74 }
75
76 // Go on call the external site
77 if ( defined( 'MW_PHPUNIT_TEST' ) ) {
78 // If the code is under test, don't call out to other sites, just normalize locally.
79 // Note: this may cause results to be inconsistent with the actual normalization used by the respective remote site!
80
81 $t = Title::newFromText( $pageName );
82 return $t->getPrefixedText();
83 } else {
84
85 // Make sure the string is normalized into NFC (due to the bug 40017)
86 // but do nothing to the whitespaces, that should work appropriately.
87 // @see https://bugzilla.wikimedia.org/show_bug.cgi?id=40017
88 $pageName = UtfNormal::cleanUp( $pageName );
89
90 // Build the args for the specific call
91 $args = array(
92 'action' => 'query',
93 'prop' => 'info',
94 'redirects' => true,
95 'converttitles' => true,
96 'format' => 'json',
97 'titles' => $pageName,
98 //@todo: options for maxlag and maxage
99 // Note that maxlag will lead to a long delay before a reply is made,
100 // but that maxage can avoid the extreme delay. On the other hand
101 // maxage could be nice to use anyhow as it stops unnecessary requests.
102 // Also consider smaxage if maxage is used.
103 );
104
105 $url = $this->getFileUrl( 'api.php' ) . '?' . wfArrayToCgi( $args );
106
107 // Go on call the external site
108 //@todo: we need a good way to specify a timeout here.
109 $ret = Http::get( $url );
110 }
111
112 if ( $ret === false ) {
113 wfDebugLog( "MediaWikiSite", "call to external site failed: $url" );
114 return false;
115 }
116
117 $data = FormatJson::decode( $ret, true );
118
119 if ( !is_array( $data ) ) {
120 wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret );
121 return false;
122 }
123
124 $page = static::extractPageRecord( $data, $pageName );
125
126 if ( isset( $page['missing'] ) ) {
127 wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for a missing page title! " . $ret );
128 return false;
129 }
130
131 if ( isset( $page['invalid'] ) ) {
132 wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for an invalid page title! " . $ret );
133 return false;
134 }
135
136 if ( !isset( $page['title'] ) ) {
137 wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret );
138 return false;
139 }
140
141 return $page['title'];
142 }
143
144
145 /**
146 * Get normalization record for a given page title from an API response.
147 *
148 * @since 1.21
149 *
150 * @param array $externalData A reply from the API on a external server.
151 * @param string $pageTitle Identifies the page at the external site, needing normalization.
152 *
153 * @return array|false a 'page' structure representing the page identified by $pageTitle.
154 */
155 private static function extractPageRecord( $externalData, $pageTitle ) {
156 // If there is a special case with only one returned page
157 // we can cheat, and only return
158 // the single page in the "pages" substructure.
159 if ( isset( $externalData['query']['pages'] ) ) {
160 $pages = array_values( $externalData['query']['pages'] );
161 if ( count( $pages) === 1 ) {
162 return $pages[0];
163 }
164 }
165 // This is only used during internal testing, as it is assumed
166 // a more optimal (and lossfree) storage.
167 // Make initial checks and return if prerequisites are not meet.
168 if ( !is_array( $externalData ) || !isset( $externalData['query'] ) ) {
169 return false;
170 }
171 // Loop over the tree different named structures, that otherwise are similar
172 $structs = array(
173 'normalized' => 'from',
174 'converted' => 'from',
175 'redirects' => 'from',
176 'pages' => 'title'
177 );
178 foreach ( $structs as $listId => $fieldId ) {
179 // Check if the substructure exist at all.
180 if ( !isset( $externalData['query'][$listId] ) ) {
181 continue;
182 }
183 // Filter the substructure down to what we actually are using.
184 $collectedHits = array_filter(
185 array_values( $externalData['query'][$listId] ),
186 function( $a ) use ( $fieldId, $pageTitle ) {
187 return $a[$fieldId] === $pageTitle;
188 }
189 );
190 // If still looping over normalization, conversion or redirects,
191 // then we need to keep the new page title for later rounds.
192 if ( $fieldId === 'from' && is_array( $collectedHits ) ) {
193 switch ( count( $collectedHits ) ) {
194 case 0:
195 break;
196 case 1:
197 $pageTitle = $collectedHits[0]['to'];
198 break;
199 default:
200 return false;
201 }
202 }
203 // If on the pages structure we should prepare for returning.
204 elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) {
205 switch ( count( $collectedHits ) ) {
206 case 0:
207 return false;
208 case 1:
209 return array_shift( $collectedHits );
210 default:
211 return false;
212 }
213 }
214 }
215 // should never be here
216 return false;
217 }
218
219 /**
220 * @see Site::getLinkPathType
221 * Returns Site::PATH_PAGE
222 *
223 * @since 1.21
224 *
225 * @return string
226 */
227 public function getLinkPathType() {
228 return self::PATH_PAGE;
229 }
230
231 /**
232 * Returns the relative page path.
233 *
234 * @since 1.21
235 *
236 * @return string
237 */
238 public function getRelativePagePath() {
239 return parse_url( $this->getPath( self::PATH_PAGE ), PHP_URL_PATH );
240 }
241
242 /**
243 * Returns the relative file path.
244 *
245 * @since 1.21
246 *
247 * @return string
248 */
249 public function getRelativeFilePath() {
250 return parse_url( $this->getPath( self::PATH_FILE ), PHP_URL_PATH );
251 }
252
253 /**
254 * Sets the relative page path.
255 *
256 * @since 1.21
257 *
258 * @param string $path
259 */
260 public function setPagePath( $path ) {
261 $this->setPath( self::PATH_PAGE, $path );
262 }
263
264 /**
265 * Sets the relative file path.
266 *
267 * @since 1.21
268 *
269 * @param string $path
270 */
271 public function setFilePath( $path ) {
272 $this->setPath( self::PATH_FILE, $path );
273 }
274
275 /**
276 * @see Site::getPagePath
277 *
278 * This implementation returns a URL constructed using the path returned by getLinkPath().
279 * In addition to the default behaviour implemented by SiteObject::getPageUrl(), this
280 * method converts the $pageName to DBKey-format by replacing spaces with underscores
281 * before using it in the URL.
282 *
283 * @since 1.21
284 *
285 * @param string|false
286 *
287 * @return string
288 */
289 public function getPageUrl( $pageName = false ) {
290 $url = $this->getLinkPath();
291
292 if ( $url === false ) {
293 return false;
294 }
295
296 if ( $pageName !== false ) {
297 $pageName = $this->toDBKey( trim( $pageName ) );
298 $url = str_replace( '$1', wfUrlencode( $pageName ), $url ) ;
299 }
300
301 return $url;
302 }
303
304 /**
305 * Returns the full file path (ie site url + relative file path).
306 * The path should go at the $1 marker. If the $path
307 * argument is provided, the marker will be replaced by it's value.
308 *
309 * @since 1.21
310 *
311 * @param string|false $path
312 *
313 * @return string
314 */
315 public function getFileUrl( $path = false ) {
316 $filePath = $this->getPath( self::PATH_FILE );
317
318 if ( $filePath !== false ) {
319 $filePath = str_replace( '$1', $path, $filePath );
320 }
321
322 return $filePath;
323 }
324
325 }