}
/**
- * Parses the given text and constructs a TitleValue. Normalization
- * is applied according to the rules appropriate for the form specified by $form.
+ * Parses the given text and constructs a TitleValue.
*
* @param string $text The text to parse
* @param int $defaultNamespace Namespace to assume per default (usually NS_MAIN)
* @return TitleValue
*/
public function parseTitle( $text, $defaultNamespace = NS_MAIN ) {
+ // Convert things like é ā or 〗 into normalized (T16952) text
+ $filteredText = Sanitizer::decodeCharReferencesAndNormalize( $text );
+
// NOTE: this is an ugly cludge that allows this class to share the
// code for parsing with the old Title class. The parser code should
// be refactored to avoid this.
- $parts = $this->splitTitleString( $text, $defaultNamespace );
+ $parts = $this->splitTitleString( $filteredText, $defaultNamespace );
- // Relative fragment links are not supported by TitleValue
- if ( $parts['dbkey'] === '' ) {
+ // Fragment-only is okay, but only with no namespace
+ if ( $parts['dbkey'] === '' &&
+ ( $parts['fragment'] === '' || $parts['namespace'] !== NS_MAIN ) ) {
throw new MalformedTitleException( 'title-invalid-empty', $text );
}
# Strip Unicode bidi override characters.
# Sometimes they slip into cut-n-pasted page titles, where the
# override chars get included in list displays.
- $dbkey = preg_replace( '/\xE2\x80[\x8E\x8F\xAA-\xAE]/S', '', $dbkey );
+ $dbkey = preg_replace( '/[\x{200E}\x{200F}\x{202A}-\x{202E}]+/u', '', $dbkey );
# Clean up whitespace
# Note: use of the /u option on preg_replace here will cause