<?php
/**
* File for magic words
+ *
* See docs/magicword.txt
*
* @file
/**
* This class encapsulates "magic words" such as #redirect, __NOTOC__, etc.
- * Usage:
- * if (MagicWord::get( 'redirect' )->match( $text ) )
+ *
+ * @par Usage:
+ * @code
+ * if (MagicWord::get( 'redirect' )->match( $text ) ) {
+ * // some code
+ * }
+ * @endcode
*
* Possible future improvements:
* * Simultaneous searching for a number of magic words
* Please avoid reading the data out of one of these objects and then writing
* special case code. If possible, add another match()-like function here.
*
- * To add magic words in an extension, use the LanguageGetMagic hook. For
- * magic words which are also Parser variables, add a MagicWordwgVariableIDs
+ * To add magic words in an extension, use $magicWords in a file listed in
+ * $wgExtensionMessagesFiles[].
+ *
+ * @par Example:
+ * @code
+ * $magicWords = array();
+ *
+ * $magicWords['en'] = array(
+ * 'magicwordkey' => array( 0, 'case_insensitive_magic_word' ),
+ * 'magicwordkey2' => array( 1, 'CASE_sensitive_magic_word2' ),
+ * );
+ * @endcode
+ *
+ * For magic words which are also Parser variables, add a MagicWordwgVariableIDs
* hook. Use string keys.
*
* @ingroup Parser
/**#@+
* @private
*/
- var $mId, $mSynonyms, $mCaseSensitive, $mRegex;
- var $mRegexStart, $mBaseRegex, $mVariableRegex;
- var $mModified, $mFound;
+ var $mId, $mSynonyms, $mCaseSensitive;
+ var $mRegex = '';
+ var $mRegexStart = '';
+ var $mBaseRegex = '';
+ var $mVariableRegex = '';
+ var $mVariableStartToEndRegex = '';
+ var $mModified = false;
+ var $mFound = false;
static public $mVariableIDsInitialised = false;
static public $mVariableIDs = array(
'numberofarticles',
'numberoffiles',
'numberofedits',
+ 'articlepath',
'sitename',
'server',
'servername',
'scriptpath',
+ 'stylepath',
'pagename',
'pagenamee',
'fullpagename',
'revisionday',
'revisionday2',
'revisionmonth',
+ 'revisionmonth1',
'revisionyear',
'revisiontimestamp',
'revisionuser',
/* Array of caching hints for ParserCache */
static public $mCacheTTLs = array (
- 'currentmonth' => 86400,
- 'currentmonth1' => 86400,
- 'currentmonthname' => 86400,
- 'currentmonthnamegen' => 86400,
- 'currentmonthabbrev' => 86400,
- 'currentday' => 3600,
- 'currentday2' => 3600,
- 'currentdayname' => 3600,
- 'currentyear' => 86400,
- 'currenttime' => 3600,
- 'currenthour' => 3600,
- 'localmonth' => 86400,
- 'localmonth1' => 86400,
- 'localmonthname' => 86400,
- 'localmonthnamegen' => 86400,
- 'localmonthabbrev' => 86400,
- 'localday' => 3600,
- 'localday2' => 3600,
- 'localdayname' => 3600,
- 'localyear' => 86400,
- 'localtime' => 3600,
- 'localhour' => 3600,
- 'numberofarticles' => 3600,
- 'numberoffiles' => 3600,
- 'numberofedits' => 3600,
- 'currentweek' => 3600,
- 'currentdow' => 3600,
- 'localweek' => 3600,
- 'localdow' => 3600,
- 'numberofusers' => 3600,
- 'numberofactiveusers' => 3600,
- 'numberofpages' => 3600,
- 'currentversion' => 86400,
- 'currenttimestamp' => 3600,
- 'localtimestamp' => 3600,
- 'pagesinnamespace' => 3600,
- 'numberofadmins' => 3600,
- 'numberofviews' => 3600,
- 'numberingroup' => 3600,
- 'numberofcontribs' => 3600,
+ 'currentmonth' => 86400,
+ 'currentmonth1' => 86400,
+ 'currentmonthname' => 86400,
+ 'currentmonthnamegen' => 86400,
+ 'currentmonthabbrev' => 86400,
+ 'currentday' => 3600,
+ 'currentday2' => 3600,
+ 'currentdayname' => 3600,
+ 'currentyear' => 86400,
+ 'currenttime' => 3600,
+ 'currenthour' => 3600,
+ 'localmonth' => 86400,
+ 'localmonth1' => 86400,
+ 'localmonthname' => 86400,
+ 'localmonthnamegen' => 86400,
+ 'localmonthabbrev' => 86400,
+ 'localday' => 3600,
+ 'localday2' => 3600,
+ 'localdayname' => 3600,
+ 'localyear' => 86400,
+ 'localtime' => 3600,
+ 'localhour' => 3600,
+ 'numberofarticles' => 3600,
+ 'numberoffiles' => 3600,
+ 'numberofedits' => 3600,
+ 'currentweek' => 3600,
+ 'currentdow' => 3600,
+ 'localweek' => 3600,
+ 'localdow' => 3600,
+ 'numberofusers' => 3600,
+ 'numberofactiveusers' => 3600,
+ 'numberofpages' => 3600,
+ 'currentversion' => 86400,
+ 'currenttimestamp' => 3600,
+ 'localtimestamp' => 3600,
+ 'pagesinnamespace' => 3600,
+ 'numberofadmins' => 3600,
+ 'numberofviews' => 3600,
+ 'numberingroup' => 3600,
);
static public $mDoubleUnderscoreIDs = array(
'index',
'noindex',
'staticredirect',
+ 'notitleconvert',
+ 'nocontentconvert',
);
+ static public $mSubstIDs = array(
+ 'subst',
+ 'safesubst',
+ );
static public $mObjects = array();
static public $mDoubleUnderscoreArray = null;
/**#@-*/
- function __construct($id = 0, $syn = '', $cs = false) {
+ function __construct($id = 0, $syn = array(), $cs = false) {
$this->mId = $id;
$this->mSynonyms = (array)$syn;
$this->mCaseSensitive = $cs;
- $this->mRegex = '';
- $this->mRegexStart = '';
- $this->mVariableRegex = '';
- $this->mVariableStartToEndRegex = '';
- $this->mModified = false;
}
/**
* Factory: creates an object representing an ID
- * @static
+ *
+ * @param $id
+ *
+ * @return MagicWord
*/
static function &get( $id ) {
- wfProfileIn( __METHOD__ );
- if (!array_key_exists( $id, self::$mObjects ) ) {
+ if ( !isset( self::$mObjects[$id] ) ) {
$mw = new MagicWord();
$mw->load( $id );
self::$mObjects[$id] = $mw;
}
- wfProfileOut( __METHOD__ );
return self::$mObjects[$id];
}
/**
* Get an array of parser variable IDs
+ *
+ * @return array
*/
static function getVariableIDs() {
if ( !self::$mVariableIDsInitialised ) {
- # Deprecated constant definition hook, available for extensions that need it
- $magicWords = array();
- wfRunHooks( 'MagicWordMagicWords', array( &$magicWords ) );
- foreach ( $magicWords as $word ) {
- define( $word, $word );
- }
-
# Get variable IDs
wfRunHooks( 'MagicWordwgVariableIDs', array( &self::$mVariableIDs ) );
self::$mVariableIDsInitialised = true;
return self::$mVariableIDs;
}
- /* Allow external reads of TTL array */
- static function getCacheTTL($id) {
- if (array_key_exists($id,self::$mCacheTTLs)) {
+ /**
+ * Get an array of parser substitution modifier IDs
+ * @return array
+ */
+ static function getSubstIDs() {
+ return self::$mSubstIDs;
+ }
+
+ /**
+ * Allow external reads of TTL array
+ *
+ * @param $id int
+ * @return array
+ */
+ static function getCacheTTL( $id ) {
+ if ( array_key_exists( $id, self::$mCacheTTLs ) ) {
return self::$mCacheTTLs[$id];
} else {
return -1;
}
}
- /** Get a MagicWordArray of double-underscore entities */
+ /**
+ * Get a MagicWordArray of double-underscore entities
+ *
+ * @return MagicWordArray
+ */
static function getDoubleUnderscoreArray() {
if ( is_null( self::$mDoubleUnderscoreArray ) ) {
self::$mDoubleUnderscoreArray = new MagicWordArray( self::$mDoubleUnderscoreIDs );
return self::$mDoubleUnderscoreArray;
}
- # Initialises this object with an ID
+ /**
+ * Clear the self::$mObjects variable
+ * For use in parser tests
+ */
+ public static function clearCache() {
+ self::$mObjects = array();
+ }
+
+ /**
+ * Initialises this object with an ID
+ *
+ * @param $id
+ */
function load( $id ) {
global $wgContLang;
+ wfProfileIn( __METHOD__ );
$this->mId = $id;
$wgContLang->getMagic( $this );
if ( !$this->mSynonyms ) {
#throw new MWException( "Error: invalid magic word '$id'" );
wfDebugLog( 'exception', "Error: invalid magic word '$id'\n" );
}
+ wfProfileOut( __METHOD__ );
}
/**
* @private
*/
function initRegex() {
- #$variableClass = Title::legalChars();
- # This was used for matching "$1" variables, but different uses of the feature will have
- # different restrictions, which should be checked *after* the MagicWord has been matched,
- # not here. - IMSoP
+ // Sort the synonyms by length, descending, so that the longest synonym
+ // matches in precedence to the shortest
+ $synonyms = $this->mSynonyms;
+ usort( $synonyms, array( $this, 'compareStringLength' ) );
$escSyn = array();
- foreach ( $this->mSynonyms as $synonym )
+ foreach ( $synonyms as $synonym )
// In case a magic word contains /, like that's going to happen;)
$escSyn[] = preg_quote( $synonym, '/' );
$this->mBaseRegex = implode( '|', $escSyn );
"/^(?:{$this->mBaseRegex})$/{$case}" );
}
+ /**
+ * A comparison function that returns -1, 0 or 1 depending on whether the
+ * first string is longer, the same length or shorter than the second
+ * string.
+ *
+ * @param $s1 string
+ * @param $s2 string
+ *
+ * @return int
+ */
+ function compareStringLength( $s1, $s2 ) {
+ $l1 = strlen( $s1 );
+ $l2 = strlen( $s2 );
+ if ( $l1 < $l2 ) {
+ return 1;
+ } elseif ( $l1 > $l2 ) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
/**
* Gets a regex representing matching the word
+ *
+ * @return string
*/
function getRegex() {
if ($this->mRegex == '' ) {
* Gets the regexp case modifier to use, i.e. i or nothing, to be used if
* one is using MagicWord::getBaseRegex(), otherwise it'll be included in
* the complete expression
+ *
+ * @return string
*/
function getRegexCase() {
if ( $this->mRegex === '' )
/**
* Gets a regex matching the word, if it is at the string start
+ *
+ * @return string
*/
function getRegexStart() {
if ($this->mRegex == '' ) {
/**
* regex without the slashes and what not
+ *
+ * @return string
*/
function getBaseRegex() {
if ($this->mRegex == '') {
/**
* Returns true if the text contains the word
+ *
+ * @param $text string
+ *
* @return bool
*/
function match( $text ) {
- return preg_match( $this->getRegex(), $text );
+ return (bool)preg_match( $this->getRegex(), $text );
}
/**
* Returns true if the text starts with the word
+ *
+ * @param $text string
+ *
* @return bool
*/
function matchStart( $text ) {
- return preg_match( $this->getRegexStart(), $text );
+ return (bool)preg_match( $this->getRegexStart(), $text );
}
/**
* The return code is the matched string, if there's no variable
* part in the regex and the matched variable part ($1) if there
* is one.
+ *
+ * @param $text string
+ *
+ * @return string
*/
function matchVariableStartToEnd( $text ) {
$matches = array();
$matchcount = preg_match( $this->getVariableStartToEndRegex(), $text, $matches );
if ( $matchcount == 0 ) {
- return NULL;
+ return null;
} else {
# multiple matched parts (variable match); some will be empty because of
# synonyms. The variable will be the second non-empty one so remove any
$matches = array_values(array_filter($matches));
- if ( count($matches) == 1 ) { return $matches[0]; }
- else { return $matches[1]; }
+ if ( count($matches) == 1 ) {
+ return $matches[0];
+ } else {
+ return $matches[1];
+ }
}
}
/**
* Returns true if the text matches the word, and alters the
* input string, removing all instances of the word
+ *
+ * @param $text string
+ *
+ * @return bool
*/
function matchAndRemove( &$text ) {
$this->mFound = false;
return $this->mFound;
}
+ /**
+ * @param $text
+ * @return bool
+ */
function matchStartAndRemove( &$text ) {
$this->mFound = false;
$text = preg_replace_callback( $this->getRegexStart(), array( &$this, 'pregRemoveAndRecord' ), $text );
/**
* Used in matchAndRemove()
- * @private
- **/
- function pregRemoveAndRecord( ) {
+ *
+ * @return string
+ */
+ function pregRemoveAndRecord() {
$this->mFound = true;
return '';
}
/**
* Replaces the word with something else
+ *
+ * @param $replacement
+ * @param $subject
+ * @param $limit int
+ *
+ * @return string
*/
- function replace( $replacement, $subject, $limit=-1 ) {
+ function replace( $replacement, $subject, $limit = -1 ) {
$res = preg_replace( $this->getRegex(), StringUtils::escapeRegexReplacement( $replacement ), $subject, $limit );
$this->mModified = !($res === $subject);
return $res;
* Variable handling: {{SUBST:xxx}} style words
* Calls back a function to determine what to replace xxx with
* Input word must contain $1
+ *
+ * @param $text string
+ * @param $callback
+ *
+ * @return string
*/
function substituteCallback( $text, $callback ) {
$res = preg_replace_callback( $this->getVariableRegex(), $callback, $text );
/**
* Matches the word, where $1 is a wildcard
+ *
+ * @return string
*/
function getVariableRegex() {
if ( $this->mVariableRegex == '' ) {
/**
* Matches the entire string, where $1 is a wildcard
+ *
+ * @return string
*/
function getVariableStartToEndRegex() {
if ( $this->mVariableStartToEndRegex == '' ) {
/**
* Accesses the synonym list directly
+ *
+ * @param $i int
+ *
+ * @return string
*/
function getSynonym( $i ) {
return $this->mSynonyms[$i];
}
+ /**
+ * @return array
+ */
function getSynonyms() {
return $this->mSynonyms;
}
/**
* Returns true if the last call to replace() or substituteCallback()
* returned a modified text, otherwise false.
+ *
+ * @return bool
*/
function getWasModified(){
return $this->mModified;
* This method uses the php feature to do several replacements at the same time,
* thereby gaining some efficiency. The result is placed in the out variable
* $result. The return value is true if something was replaced.
- * @static
- **/
+ * @todo Should this be static? It doesn't seem to be used at all
+ *
+ * @param $magicarr
+ * @param $subject
+ * @param $result
+ *
+ * @return bool
+ */
function replaceMultiple( $magicarr, $subject, &$result ){
$search = array();
$replace = array();
/**
* Adds all the synonyms of this MagicWord to an array, to allow quick
* lookup in a list of magic words
+ *
+ * @param $array
+ * @param $value
*/
function addToArray( &$array, $value ) {
global $wgContLang;
}
}
+ /**
+ * @return bool
+ */
function isCaseSensitive() {
return $this->mCaseSensitive;
}
+ /**
+ * @return int
+ */
function getId() {
return $this->mId;
}
/**
* Add a magic word by name
+ *
+ * @param $name string
*/
public function add( $name ) {
- global $wgContLang;
$this->names[] = $name;
$this->hash = $this->baseRegex = $this->regex = null;
}
/**
* Add a number of magic words by name
+ *
+ * @param $names array
*/
public function addArray( $names ) {
$this->names = array_merge( $this->names, array_values( $names ) );
}
/**
- * Get an unanchored regex
+ * Get an unanchored regex that does not match parameters
*/
function getRegex() {
if ( is_null( $this->regex ) ) {
}
/**
- * Get a regex for matching variables
+ * Get a regex for matching variables with parameters
+ *
+ * @return string
*/
function getVariableRegex() {
return str_replace( "\\$1", "(.*?)", $this->getRegex() );
}
/**
- * Get an anchored regex for matching variables
+ * Get a regex anchored to the start of the string that does not match parameters
+ *
+ * @return array
+ */
+ function getRegexStart() {
+ $base = $this->getBaseRegex();
+ $newRegex = array( '', '' );
+ if ( $base[0] !== '' ) {
+ $newRegex[0] = "/^(?:{$base[0]})/iuS";
+ }
+ if ( $base[1] !== '' ) {
+ $newRegex[1] = "/^(?:{$base[1]})/S";
+ }
+ return $newRegex;
+ }
+
+ /**
+ * Get an anchored regex for matching variables with parameters
+ *
+ * @return array
*/
function getVariableStartToEndRegex() {
$base = $this->getBaseRegex();
* Parse a match array from preg_match
* Returns array(magic word ID, parameter value)
* If there is no parameter value, that element will be false.
+ *
+ * @param $m array
+ *
+ * @return array
*/
function parseMatch( $m ) {
reset( $m );
}
// This shouldn't happen either
throw new MWException( __METHOD__.': parameter not found' );
- return array( false, false );
}
/**
* Returns an array with the magic word name in the first element and the
* parameter in the second element.
* Both elements are false if there was no match.
+ *
+ * @param $text string
+ *
+ * @return array
*/
public function matchVariableStartToEnd( $text ) {
- global $wgContLang;
$regexes = $this->getVariableStartToEndRegex();
foreach ( $regexes as $regex ) {
if ( $regex !== '' ) {
/**
* Match some text, without parameter capture
* Returns the magic word name, or false if there was no capture
+ *
+ * @param $text string
+ *
+ * @return string|bool False on failure
*/
public function matchStartToEnd( $text ) {
$hash = $this->getHash();
/**
* Returns an associative array, ID => param value, for all items that match
* Removes the matched items from the input string (passed by reference)
+ *
+ * @param $text string
+ *
+ * @return array
*/
public function matchAndRemove( &$text ) {
$found = array();
}
return $found;
}
+
+ /**
+ * Return the ID of the magic word at the start of $text, and remove
+ * the prefix from $text.
+ * Return false if no match found and $text is not modified.
+ * Does not match parameters.
+ *
+ * @param $text string
+ *
+ * @return int|bool False on failure
+ */
+ public function matchStartAndRemove( &$text ) {
+ $regexes = $this->getRegexStart();
+ foreach ( $regexes as $regex ) {
+ if ( $regex === '' ) {
+ continue;
+ }
+ if ( preg_match( $regex, $text, $m ) ) {
+ list( $id, ) = $this->parseMatch( $m );
+ if ( strlen( $m[0] ) >= strlen( $text ) ) {
+ $text = '';
+ } else {
+ $text = substr( $text, strlen( $m[0] ) );
+ }
+ return $id;
+ }
+ }
+ return false;
+ }
}