From: Reedy Date: Sun, 14 Apr 2019 00:07:50 +0000 (+0100) Subject: Split SVGReader to its own file X-Git-Tag: 1.34.0-rc.0~2004 X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=commitdiff_plain;h=4be4fdc9cf1bf395e748bfaa7327ad305d511d2b Split SVGReader to its own file Change-Id: I9fc442225a37c14d0606508aed5ef496a5ad82ba --- diff --git a/.phpcs.xml b/.phpcs.xml index 6b625e5267..5f9d078f64 100644 --- a/.phpcs.xml +++ b/.phpcs.xml @@ -214,7 +214,6 @@ */includes/libs/filebackend/SwiftFileBackend\.php */includes/logging/LogEntry\.php */includes/logging/LogFormatter\.php - */includes/media/SVGMetadataExtractor\.php */includes/parser/Preprocessor_DOM\.php */includes/parser/Preprocessor_Hash\.php */includes/parser/Preprocessor\.php diff --git a/autoload.php b/autoload.php index 38ad652eb0..1bd32a74e6 100644 --- a/autoload.php +++ b/autoload.php @@ -1291,7 +1291,7 @@ $wgAutoloadLocalClasses = [ 'RunJobs' => __DIR__ . '/maintenance/runJobs.php', 'RunnableJob' => __DIR__ . '/includes/jobqueue/RunnableJob.php', 'SVGMetadataExtractor' => __DIR__ . '/includes/media/SVGMetadataExtractor.php', - 'SVGReader' => __DIR__ . '/includes/media/SVGMetadataExtractor.php', + 'SVGReader' => __DIR__ . '/includes/media/SVGReader.php', 'SamplingStatsdClient' => __DIR__ . '/includes/libs/stats/SamplingStatsdClient.php', 'Sanitizer' => __DIR__ . '/includes/parser/Sanitizer.php', 'ScopedLock' => __DIR__ . '/includes/libs/lockmanager/ScopedLock.php', diff --git a/includes/media/SVGMetadataExtractor.php b/includes/media/SVGMetadataExtractor.php index bc5eb09a4f..ac332b75db 100644 --- a/includes/media/SVGMetadataExtractor.php +++ b/includes/media/SVGMetadataExtractor.php @@ -35,361 +35,3 @@ class SVGMetadataExtractor { return $svg->getMetadata(); } } - -/** - * @ingroup Media - */ -class SVGReader { - const DEFAULT_WIDTH = 512; - const DEFAULT_HEIGHT = 512; - const NS_SVG = 'http://www.w3.org/2000/svg'; - const LANG_PREFIX_MATCH = 1; - const LANG_FULL_MATCH = 2; - - /** @var null|XMLReader */ - private $reader = null; - - /** @var bool */ - private $mDebug = false; - - /** @var array */ - private $metadata = []; - private $languages = []; - private $languagePrefixes = []; - - /** - * Creates an SVGReader drawing from the source provided - * @param string $source URI from which to read - * @throws MWException|Exception - */ - function __construct( $source ) { - global $wgSVGMetadataCutoff; - $this->reader = new XMLReader(); - - // Don't use $file->getSize() since file object passed to SVGHandler::getMetadata is bogus. - $size = filesize( $source ); - if ( $size === false ) { - throw new MWException( "Error getting filesize of SVG." ); - } - - if ( $size > $wgSVGMetadataCutoff ) { - $this->debug( "SVG is $size bytes, which is bigger than $wgSVGMetadataCutoff. Truncating." ); - $contents = file_get_contents( $source, false, null, 0, $wgSVGMetadataCutoff ); - if ( $contents === false ) { - throw new MWException( 'Error reading SVG file.' ); - } - $this->reader->XML( $contents, null, LIBXML_NOERROR | LIBXML_NOWARNING ); - } else { - $this->reader->open( $source, null, LIBXML_NOERROR | LIBXML_NOWARNING ); - } - - // Expand entities, since Adobe Illustrator uses them for xmlns - // attributes (T33719). Note that libxml2 has some protection - // against large recursive entity expansions so this is not as - // insecure as it might appear to be. However, it is still extremely - // insecure. It's necessary to wrap any read() calls with - // libxml_disable_entity_loader() to avoid arbitrary local file - // inclusion, or even arbitrary code execution if the expect - // extension is installed (T48859). - $oldDisable = libxml_disable_entity_loader( true ); - $this->reader->setParserProperty( XMLReader::SUBST_ENTITIES, true ); - - $this->metadata['width'] = self::DEFAULT_WIDTH; - $this->metadata['height'] = self::DEFAULT_HEIGHT; - - // The size in the units specified by the SVG file - // (for the metadata box) - // Per the SVG spec, if unspecified, default to '100%' - $this->metadata['originalWidth'] = '100%'; - $this->metadata['originalHeight'] = '100%'; - - // Because we cut off the end of the svg making an invalid one. Complicated - // try catch thing to make sure warnings get restored. Seems like there should - // be a better way. - Wikimedia\suppressWarnings(); - try { - $this->read(); - } catch ( Exception $e ) { - // Note, if this happens, the width/height will be taken to be 0x0. - // Should we consider it the default 512x512 instead? - Wikimedia\restoreWarnings(); - libxml_disable_entity_loader( $oldDisable ); - throw $e; - } - Wikimedia\restoreWarnings(); - libxml_disable_entity_loader( $oldDisable ); - } - - /** - * @return array Array with the known metadata - */ - public function getMetadata() { - return $this->metadata; - } - - /** - * Read the SVG - * @throws MWException - * @return bool - */ - protected function read() { - $keepReading = $this->reader->read(); - - /* Skip until first element */ - while ( $keepReading && $this->reader->nodeType != XMLReader::ELEMENT ) { - $keepReading = $this->reader->read(); - } - - if ( $this->reader->localName != 'svg' || $this->reader->namespaceURI != self::NS_SVG ) { - throw new MWException( "Expected tag, got " . - $this->reader->localName . " in NS " . $this->reader->namespaceURI ); - } - $this->debug( " tag is correct." ); - $this->handleSVGAttribs(); - - $exitDepth = $this->reader->depth; - $keepReading = $this->reader->read(); - while ( $keepReading ) { - $tag = $this->reader->localName; - $type = $this->reader->nodeType; - $isSVG = ( $this->reader->namespaceURI == self::NS_SVG ); - - $this->debug( "$tag" ); - - if ( $isSVG && $tag == 'svg' && $type == XMLReader::END_ELEMENT - && $this->reader->depth <= $exitDepth - ) { - break; - } elseif ( $isSVG && $tag == 'title' ) { - $this->readField( $tag, 'title' ); - } elseif ( $isSVG && $tag == 'desc' ) { - $this->readField( $tag, 'description' ); - } elseif ( $isSVG && $tag == 'metadata' && $type == XMLReader::ELEMENT ) { - $this->readXml( 'metadata' ); - } elseif ( $isSVG && $tag == 'script' ) { - // We normally do not allow scripted svgs. - // However its possible to configure MW to let them - // in, and such files should be considered animated. - $this->metadata['animated'] = true; - } elseif ( $tag !== '#text' ) { - $this->debug( "Unhandled top-level XML tag $tag" ); - - // Recurse into children of current tag, looking for animation and languages. - $this->animateFilterAndLang( $tag ); - } - - // Goto next element, which is sibling of current (Skip children). - $keepReading = $this->reader->next(); - } - - $this->reader->close(); - - $this->metadata['translations'] = $this->languages + $this->languagePrefixes; - - return true; - } - - /** - * Read a textelement from an element - * - * @param string $name Name of the element that we are reading from - * @param string $metafield Field that we will fill with the result - */ - private function readField( $name, $metafield = null ) { - $this->debug( "Read field $metafield" ); - if ( !$metafield || $this->reader->nodeType != XMLReader::ELEMENT ) { - return; - } - $keepReading = $this->reader->read(); - while ( $keepReading ) { - if ( $this->reader->localName == $name - && $this->reader->namespaceURI == self::NS_SVG - && $this->reader->nodeType == XMLReader::END_ELEMENT - ) { - break; - } elseif ( $this->reader->nodeType == XMLReader::TEXT ) { - $this->metadata[$metafield] = trim( $this->reader->value ); - } - $keepReading = $this->reader->read(); - } - } - - /** - * Read an XML snippet from an element - * - * @param string $metafield Field that we will fill with the result - * @throws MWException - */ - private function readXml( $metafield = null ) { - $this->debug( "Read top level metadata" ); - if ( !$metafield || $this->reader->nodeType != XMLReader::ELEMENT ) { - return; - } - // @todo Find and store type of xml snippet. metadata['metadataType'] = "rdf" - $this->metadata[$metafield] = trim( $this->reader->readInnerXml() ); - - $this->reader->next(); - } - - /** - * Filter all children, looking for animated elements. - * Also get a list of languages that can be targeted. - * - * @param string $name Name of the element that we are reading from - */ - private function animateFilterAndLang( $name ) { - $this->debug( "animate filter for tag $name" ); - if ( $this->reader->nodeType != XMLReader::ELEMENT ) { - return; - } - if ( $this->reader->isEmptyElement ) { - return; - } - $exitDepth = $this->reader->depth; - $keepReading = $this->reader->read(); - while ( $keepReading ) { - if ( $this->reader->localName == $name && $this->reader->depth <= $exitDepth - && $this->reader->nodeType == XMLReader::END_ELEMENT - ) { - break; - } elseif ( $this->reader->namespaceURI == self::NS_SVG - && $this->reader->nodeType == XMLReader::ELEMENT - ) { - $sysLang = $this->reader->getAttribute( 'systemLanguage' ); - if ( !is_null( $sysLang ) && $sysLang !== '' ) { - // See https://www.w3.org/TR/SVG/struct.html#SystemLanguageAttribute - $langList = explode( ',', $sysLang ); - foreach ( $langList as $langItem ) { - $langItem = trim( $langItem ); - if ( Language::isWellFormedLanguageTag( $langItem ) ) { - $this->languages[$langItem] = self::LANG_FULL_MATCH; - } - // Note, the standard says that any prefix should work, - // here we do only the initial prefix, since that will catch - // 99% of cases, and we are going to compare against fallbacks. - // This differs mildly from how the spec says languages should be - // handled, however it matches better how the MediaWiki language - // preference is generally handled. - $dash = strpos( $langItem, '-' ); - // Intentionally checking both !false and > 0 at the same time. - if ( $dash ) { - $itemPrefix = substr( $langItem, 0, $dash ); - if ( Language::isWellFormedLanguageTag( $itemPrefix ) ) { - $this->languagePrefixes[$itemPrefix] = self::LANG_PREFIX_MATCH; - } - } - } - } - switch ( $this->reader->localName ) { - case 'script': - // Normally we disallow files with - //