4 * Class representing a MediaWiki site.
11 * @license GNU GPL v2+
12 * @author John Erling Blad < jeblad@gmail.com >
13 * @author Daniel Kinzler
14 * @author Jeroen De Dauw < jeroendedauw@gmail.com >
16 class MediaWikiSite
extends SiteObject
{
18 const PATH_FILE
= 'file_path';
19 const PATH_PAGE
= 'page_path';
24 * @param integer $globalId
26 * @return MediaWikiSite
28 public static function newFromGlobalId( $globalId ) {
29 return SitesTable
::singleton()->newRow( array(
30 'type' => Site
::TYPE_MEDIAWIKI
,
31 'global_key' => $globalId,
36 * Returns the database form of the given title.
40 * @param String $title the target page's title, in normalized form.
44 public function toDBKey( $title ) {
45 return str_replace( ' ', '_', $title );
49 * Returns the normalized form of the given page title, using the normalization rules of the given site.
50 * If the given title is a redirect, the redirect weill be resolved and the redirect target is returned.
52 * @note : This actually makes an API request to the remote site, so beware that this function is slow and depends
53 * on an external service.
55 * @note : If MW_PHPUNIT_TEST is defined or $egWBRemoteTitleNormalization is set to false, the call to the
56 * external site is skipped, and the title is normalized using the local normalization rules as
57 * implemented by the Title class.
59 * @see Site::normalizePageName
63 * @param string $pageName
68 public function normalizePageName( $pageName ) {
69 global $egWBRemoteTitleNormalization;
71 // Check if we have strings as arguments.
72 if ( !is_string( $pageName ) ) {
73 throw new MWException( '$pageName must be a string' );
76 // Go on call the external site
77 if ( defined( 'MW_PHPUNIT_TEST' ) ) {
78 // If the code is under test, don't call out to other sites, just normalize locally.
79 // Note: this may cause results to be inconsistent with the actual normalization used by the respective remote site!
81 $t = Title
::newFromText( $pageName );
82 return $t->getPrefixedText();
85 // Make sure the string is normalized into NFC (due to the bug 40017)
86 // but do nothing to the whitespaces, that should work appropriately.
87 // @see https://bugzilla.wikimedia.org/show_bug.cgi?id=40017
88 $pageName = UtfNormal
::cleanUp( $pageName );
90 // Build the args for the specific call
95 'converttitles' => true,
97 'titles' => $pageName,
98 //@todo: options for maxlag and maxage
99 // Note that maxlag will lead to a long delay before a reply is made,
100 // but that maxage can avoid the extreme delay. On the other hand
101 // maxage could be nice to use anyhow as it stops unnecessary requests.
102 // Also consider smaxage if maxage is used.
105 $url = $this->getFileUrl( 'api.php' ) . '?' . wfArrayToCgi( $args );
107 // Go on call the external site
108 //@todo: we need a good way to specify a timeout here.
109 $ret = Http
::get( $url );
112 if ( $ret === false ) {
113 wfDebugLog( "MediaWikiSite", "call to external site failed: $url" );
117 $data = FormatJson
::decode( $ret, true );
119 if ( !is_array( $data ) ) {
120 wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret );
124 $page = static::extractPageRecord( $data, $pageName );
126 if ( isset( $page['missing'] ) ) {
127 wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for a missing page title! " . $ret );
131 if ( isset( $page['invalid'] ) ) {
132 wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for an invalid page title! " . $ret );
136 if ( !isset( $page['title'] ) ) {
137 wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret );
141 return $page['title'];
146 * Get normalization record for a given page title from an API response.
150 * @param array $externalData A reply from the API on a external server.
151 * @param string $pageTitle Identifies the page at the external site, needing normalization.
153 * @return array|false a 'page' structure representing the page identified by $pageTitle.
155 private static function extractPageRecord( $externalData, $pageTitle ) {
156 // If there is a special case with only one returned page
157 // we can cheat, and only return
158 // the single page in the "pages" substructure.
159 if ( isset( $externalData['query']['pages'] ) ) {
160 $pages = array_values( $externalData['query']['pages'] );
161 if ( count( $pages) === 1 ) {
165 // This is only used during internal testing, as it is assumed
166 // a more optimal (and lossfree) storage.
167 // Make initial checks and return if prerequisites are not meet.
168 if ( !is_array( $externalData ) ||
!isset( $externalData['query'] ) ) {
171 // Loop over the tree different named structures, that otherwise are similar
173 'normalized' => 'from',
174 'converted' => 'from',
175 'redirects' => 'from',
178 foreach ( $structs as $listId => $fieldId ) {
179 // Check if the substructure exist at all.
180 if ( !isset( $externalData['query'][$listId] ) ) {
183 // Filter the substructure down to what we actually are using.
184 $collectedHits = array_filter(
185 array_values( $externalData['query'][$listId] ),
186 function( $a ) use ( $fieldId, $pageTitle ) {
187 return $a[$fieldId] === $pageTitle;
190 // If still looping over normalization, conversion or redirects,
191 // then we need to keep the new page title for later rounds.
192 if ( $fieldId === 'from' && is_array( $collectedHits ) ) {
193 switch ( count( $collectedHits ) ) {
197 $pageTitle = $collectedHits[0]['to'];
203 // If on the pages structure we should prepare for returning.
204 elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) {
205 switch ( count( $collectedHits ) ) {
209 return array_shift( $collectedHits );
215 // should never be here
220 * @see Site::getLinkPathType
221 * Returns Site::PATH_PAGE
227 public function getLinkPathType() {
228 return self
::PATH_PAGE
;
232 * Returns the relative page path.
238 public function getRelativePagePath() {
239 return parse_url( $this->getPath( self
::PATH_PAGE
), PHP_URL_PATH
);
243 * Returns the relative file path.
249 public function getRelativeFilePath() {
250 return parse_url( $this->getPath( self
::PATH_FILE
), PHP_URL_PATH
);
254 * Sets the relative page path.
258 * @param string $path
260 public function setPagePath( $path ) {
261 $this->setPath( self
::PATH_PAGE
, $path );
265 * Sets the relative file path.
269 * @param string $path
271 public function setFilePath( $path ) {
272 $this->setPath( self
::PATH_FILE
, $path );
276 * @see Site::getPagePath
278 * This implementation returns a URL constructed using the path returned by getLinkPath().
279 * In addition to the default behaviour implemented by SiteObject::getPageUrl(), this
280 * method converts the $pageName to DBKey-format by replacing spaces with underscores
281 * before using it in the URL.
285 * @param $pagename string: Page name (default: false)
289 public function getPageUrl( $pageName = false ) {
290 $url = $this->getLinkPath();
292 if ( $url === false ) {
296 if ( $pageName !== false ) {
297 $pageName = $this->toDBKey( trim( $pageName ) );
298 $url = str_replace( '$1', wfUrlencode( $pageName ), $url ) ;
305 * Returns the full file path (ie site url + relative file path).
306 * The path should go at the $1 marker. If the $path
307 * argument is provided, the marker will be replaced by it's value.
311 * @param string|false $path
315 public function getFileUrl( $path = false ) {
316 $filePath = $this->getPath( self
::PATH_FILE
);
318 if ( $filePath !== false ) {
319 $filePath = str_replace( '$1', $path, $filePath );