- // Make sure the string is normalized into NFC (due to the bug 40017)
- // but do nothing to the whitespaces, that should work appropriately.
- // @see https://bugzilla.wikimedia.org/show_bug.cgi?id=40017
- $pageName = UtfNormal::cleanUp( $pageName );
-
- // Build the args for the specific call
- $args = array(
- 'action' => 'query',
- 'prop' => 'info',
- 'redirects' => true,
- 'converttitles' => true,
- 'format' => 'json',
- 'titles' => $pageName,
- // @todo options for maxlag and maxage
- // Note that maxlag will lead to a long delay before a reply is made,
- // but that maxage can avoid the extreme delay. On the other hand
- // maxage could be nice to use anyhow as it stops unnecessary requests.
- // Also consider smaxage if maxage is used.
- );
-
- $url = wfAppendQuery( $this->getFileUrl( 'api.php' ), $args );
-
- // Go on call the external site
- // @todo we need a good way to specify a timeout here.
- $ret = Http::get( $url );
- }
-
- if ( $ret === false ) {
- wfDebugLog( "MediaWikiSite", "call to external site failed: $url" );
- return false;
- }
-
- $data = FormatJson::decode( $ret, true );
-
- if ( !is_array( $data ) ) {
- wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret );
- return false;
- }
-
- $page = static::extractPageRecord( $data, $pageName );
-
- if ( isset( $page['missing'] ) ) {
- wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for a missing page title! "
- . $ret );
- return false;
- }
-
- if ( isset( $page['invalid'] ) ) {
- wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for an invalid page title! "
- . $ret );
- return false;
- }
-
- if ( !isset( $page['title'] ) ) {
- wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret );
- return false;
- }
-
- return $page['title'];
- }
-
- /**
- * Get normalization record for a given page title from an API response.
- *
- * @since 1.21
- *
- * @param array $externalData A reply from the API on a external server.
- * @param string $pageTitle Identifies the page at the external site, needing normalization.
- *
- * @return array|bool A 'page' structure representing the page identified by $pageTitle.
- */
- private static function extractPageRecord( $externalData, $pageTitle ) {
- // If there is a special case with only one returned page
- // we can cheat, and only return
- // the single page in the "pages" substructure.
- if ( isset( $externalData['query']['pages'] ) ) {
- $pages = array_values( $externalData['query']['pages'] );
- if ( count( $pages ) === 1 ) {
- return $pages[0];
- }
- }
- // This is only used during internal testing, as it is assumed
- // a more optimal (and lossfree) storage.
- // Make initial checks and return if prerequisites are not meet.
- if ( !is_array( $externalData ) || !isset( $externalData['query'] ) ) {
- return false;
- }
- // Loop over the tree different named structures, that otherwise are similar
- $structs = array(
- 'normalized' => 'from',
- 'converted' => 'from',
- 'redirects' => 'from',
- 'pages' => 'title'
- );
- foreach ( $structs as $listId => $fieldId ) {
- // Check if the substructure exist at all.
- if ( !isset( $externalData['query'][$listId] ) ) {
- continue;