<?php
+
+use MediaWiki\Site\MediaWikiPageNameNormalizer;
+
/**
* Class representing a MediaWiki site.
*
const PATH_FILE = 'file_path';
const PATH_PAGE = 'page_path';
- /**
- * @since 1.21
- * @deprecated since 1.21 Just use the constructor or the factory Site::newForType
- *
- * @param int $globalId
- *
- * @return MediaWikiSite
- */
- public static function newFromGlobalId( $globalId ) {
- $site = new static();
- $site->setGlobalId( $globalId );
- return $site;
- }
-
/**
* Constructor.
*
* @throws MWException
*/
public function normalizePageName( $pageName ) {
-
- // Check if we have strings as arguments.
- if ( !is_string( $pageName ) ) {
- throw new MWException( '$pageName must be a string' );
- }
-
- // Go on call the external site
if ( defined( 'MW_PHPUNIT_TEST' ) ) {
// If the code is under test, don't call out to other sites, just
// normalize locally.
$t = Title::newFromText( $pageName );
return $t->getPrefixedText();
} else {
+ static $mediaWikiPageNameNormalizer = null;
- // Make sure the string is normalized into NFC (due to T42017)
- // but do nothing to the whitespaces, that should work appropriately.
- // @see https://phabricator.wikimedia.org/T42017
- $pageName = UtfNormal\Validator::cleanUp( $pageName );
-
- // Build the args for the specific call
- $args = array(
- 'action' => 'query',
- 'prop' => 'info',
- 'redirects' => true,
- 'converttitles' => true,
- 'format' => 'json',
- 'titles' => $pageName,
- // @todo options for maxlag and maxage
- // Note that maxlag will lead to a long delay before a reply is made,
- // but that maxage can avoid the extreme delay. On the other hand
- // maxage could be nice to use anyhow as it stops unnecessary requests.
- // Also consider smaxage if maxage is used.
- );
-
- $url = wfAppendQuery( $this->getFileUrl( 'api.php' ), $args );
-
- // Go on call the external site
- // @todo we need a good way to specify a timeout here.
- $ret = Http::get( $url, array(), __METHOD__ );
- }
-
- if ( $ret === false ) {
- wfDebugLog( "MediaWikiSite", "call to external site failed: $url" );
- return false;
- }
-
- $data = FormatJson::decode( $ret, true );
-
- if ( !is_array( $data ) ) {
- wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret );
- return false;
- }
-
- $page = static::extractPageRecord( $data, $pageName );
-
- if ( isset( $page['missing'] ) ) {
- wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for a missing page title! "
- . $ret );
- return false;
- }
-
- if ( isset( $page['invalid'] ) ) {
- wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for an invalid page title! "
- . $ret );
- return false;
- }
-
- if ( !isset( $page['title'] ) ) {
- wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret );
- return false;
- }
-
- return $page['title'];
- }
-
- /**
- * Get normalization record for a given page title from an API response.
- *
- * @since 1.21
- *
- * @param array $externalData A reply from the API on a external server.
- * @param string $pageTitle Identifies the page at the external site, needing normalization.
- *
- * @return array|bool A 'page' structure representing the page identified by $pageTitle.
- */
- private static function extractPageRecord( $externalData, $pageTitle ) {
- // If there is a special case with only one returned page
- // we can cheat, and only return
- // the single page in the "pages" substructure.
- if ( isset( $externalData['query']['pages'] ) ) {
- $pages = array_values( $externalData['query']['pages'] );
- if ( count( $pages ) === 1 ) {
- return $pages[0];
+ if ( $mediaWikiPageNameNormalizer === null ) {
+ $mediaWikiPageNameNormalizer = new MediaWikiPageNameNormalizer();
}
- }
- // This is only used during internal testing, as it is assumed
- // a more optimal (and lossfree) storage.
- // Make initial checks and return if prerequisites are not meet.
- if ( !is_array( $externalData ) || !isset( $externalData['query'] ) ) {
- return false;
- }
- // Loop over the tree different named structures, that otherwise are similar
- $structs = array(
- 'normalized' => 'from',
- 'converted' => 'from',
- 'redirects' => 'from',
- 'pages' => 'title'
- );
- foreach ( $structs as $listId => $fieldId ) {
- // Check if the substructure exist at all.
- if ( !isset( $externalData['query'][$listId] ) ) {
- continue;
- }
- // Filter the substructure down to what we actually are using.
- $collectedHits = array_filter(
- array_values( $externalData['query'][$listId] ),
- function ( $a ) use ( $fieldId, $pageTitle ) {
- return $a[$fieldId] === $pageTitle;
- }
+
+ return $mediaWikiPageNameNormalizer->normalizePageName(
+ $pageName,
+ $this->getFileUrl( 'api.php' )
);
- // If still looping over normalization, conversion or redirects,
- // then we need to keep the new page title for later rounds.
- if ( $fieldId === 'from' && is_array( $collectedHits ) ) {
- switch ( count( $collectedHits ) ) {
- case 0:
- break;
- case 1:
- $pageTitle = $collectedHits[0]['to'];
- break;
- default:
- return false;
- }
- } elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) {
- // If on the pages structure we should prepare for returning.
- switch ( count( $collectedHits ) ) {
- case 0:
- return false;
- case 1:
- return array_shift( $collectedHits );
- default:
- return false;
- }
- }
}
- // should never be here
- return false;
}
/**