X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2Fsite%2FMediaWikiSite.php;h=6734d5f70c997ea78b7c8a24f148516b227156fe;hb=54d50ef3921dc9f30dc4d863ddc471dc564998e9;hp=029919c49ddd24d28b229018ca2a4fcfa49076ea;hpb=638c4528259b71a5bc90439fad7cd8d110a86b06;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/site/MediaWikiSite.php b/includes/site/MediaWikiSite.php index 029919c49d..e1e7ce69cf 100644 --- a/includes/site/MediaWikiSite.php +++ b/includes/site/MediaWikiSite.php @@ -1,4 +1,7 @@ setGlobalId( $globalId ); - return $site; - } - - /** - * Constructor. - * * @since 1.21 * * @param string $type @@ -77,7 +64,8 @@ class MediaWikiSite extends Site { /** * Returns the normalized form of the given page title, using the * normalization rules of the given site. If the given title is a redirect, - * the redirect weill be resolved and the redirect target is returned. + * the redirect will be resolved and the redirect target is returned. + * Only titles of existing pages will be returned. * * @note This actually makes an API request to the remote site, so beware * that this function is slow and depends on an external service. @@ -92,17 +80,12 @@ class MediaWikiSite extends Site { * * @param string $pageName * - * @return string + * @return string|false The normalized form of the title, + * or false to indicate an invalid title, a missing page, + * or some other kind of error. * @throws MWException */ public function normalizePageName( $pageName ) { - - // Check if we have strings as arguments. - if ( !is_string( $pageName ) ) { - throw new MWException( '$pageName must be a string' ); - } - - // Go on call the external site if ( defined( 'MW_PHPUNIT_TEST' ) ) { // If the code is under test, don't call out to other sites, just // normalize locally. @@ -112,140 +95,17 @@ class MediaWikiSite extends Site { $t = Title::newFromText( $pageName ); return $t->getPrefixedText(); } else { + static $mediaWikiPageNameNormalizer = null; - // Make sure the string is normalized into NFC (due to T42017) - // but do nothing to the whitespaces, that should work appropriately. - // @see https://phabricator.wikimedia.org/T42017 - $pageName = UtfNormal\Validator::cleanUp( $pageName ); - - // Build the args for the specific call - $args = array( - 'action' => 'query', - 'prop' => 'info', - 'redirects' => true, - 'converttitles' => true, - 'format' => 'json', - 'titles' => $pageName, - // @todo options for maxlag and maxage - // Note that maxlag will lead to a long delay before a reply is made, - // but that maxage can avoid the extreme delay. On the other hand - // maxage could be nice to use anyhow as it stops unnecessary requests. - // Also consider smaxage if maxage is used. - ); - - $url = wfAppendQuery( $this->getFileUrl( 'api.php' ), $args ); - - // Go on call the external site - // @todo we need a good way to specify a timeout here. - $ret = Http::get( $url, array(), __METHOD__ ); - } - - if ( $ret === false ) { - wfDebugLog( "MediaWikiSite", "call to external site failed: $url" ); - return false; - } - - $data = FormatJson::decode( $ret, true ); - - if ( !is_array( $data ) ) { - wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret ); - return false; - } - - $page = static::extractPageRecord( $data, $pageName ); - - if ( isset( $page['missing'] ) ) { - wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for a missing page title! " - . $ret ); - return false; - } - - if ( isset( $page['invalid'] ) ) { - wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for an invalid page title! " - . $ret ); - return false; - } - - if ( !isset( $page['title'] ) ) { - wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret ); - return false; - } - - return $page['title']; - } - - /** - * Get normalization record for a given page title from an API response. - * - * @since 1.21 - * - * @param array $externalData A reply from the API on a external server. - * @param string $pageTitle Identifies the page at the external site, needing normalization. - * - * @return array|bool A 'page' structure representing the page identified by $pageTitle. - */ - private static function extractPageRecord( $externalData, $pageTitle ) { - // If there is a special case with only one returned page - // we can cheat, and only return - // the single page in the "pages" substructure. - if ( isset( $externalData['query']['pages'] ) ) { - $pages = array_values( $externalData['query']['pages'] ); - if ( count( $pages ) === 1 ) { - return $pages[0]; + if ( $mediaWikiPageNameNormalizer === null ) { + $mediaWikiPageNameNormalizer = new MediaWikiPageNameNormalizer(); } - } - // This is only used during internal testing, as it is assumed - // a more optimal (and lossfree) storage. - // Make initial checks and return if prerequisites are not meet. - if ( !is_array( $externalData ) || !isset( $externalData['query'] ) ) { - return false; - } - // Loop over the tree different named structures, that otherwise are similar - $structs = array( - 'normalized' => 'from', - 'converted' => 'from', - 'redirects' => 'from', - 'pages' => 'title' - ); - foreach ( $structs as $listId => $fieldId ) { - // Check if the substructure exist at all. - if ( !isset( $externalData['query'][$listId] ) ) { - continue; - } - // Filter the substructure down to what we actually are using. - $collectedHits = array_filter( - array_values( $externalData['query'][$listId] ), - function ( $a ) use ( $fieldId, $pageTitle ) { - return $a[$fieldId] === $pageTitle; - } + + return $mediaWikiPageNameNormalizer->normalizePageName( + $pageName, + $this->getFileUrl( 'api.php' ) ); - // If still looping over normalization, conversion or redirects, - // then we need to keep the new page title for later rounds. - if ( $fieldId === 'from' && is_array( $collectedHits ) ) { - switch ( count( $collectedHits ) ) { - case 0: - break; - case 1: - $pageTitle = $collectedHits[0]['to']; - break; - default: - return false; - } - } - // If on the pages structure we should prepare for returning. - elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) { - switch ( count( $collectedHits ) ) { - case 0: - return false; - case 1: - return array_shift( $collectedHits ); - default: - return false; - } - } } - // should never be here - return false; } /**