+ /**
+ * Handle coming across a <!DOCTYPE declaration.
+ *
+ * @param XMLReader $reader Reader currently pointing at DOCTYPE node.
+ */
+ private function DTDHandler( XMLReader $reader ) {
+ $externalCallback = $this->parserOptions['external_dtd_handler'];
+ $generalCallback = $this->parserOptions['dtd_handler'];
+ $checkIfSafe = $this->parserOptions['require_safe_dtd'];
+ if ( !$externalCallback && !$generalCallback && !$checkIfSafe ) {
+ return;
+ }
+ $dtd = $reader->readOuterXML();
+ $callbackReturn = false;
+
+ if ( $generalCallback ) {
+ $callbackReturn = call_user_func( $generalCallback, $dtd );
+ }
+ if ( $callbackReturn ) {
+ // Filter hit!
+ $this->filterMatch = true;
+ $this->filterMatchType = $callbackReturn;
+ $callbackReturn = false;
+ }
+
+ $parsedDTD = $this->parseDTD( $dtd );
+ if ( $externalCallback && isset( $parsedDTD['type'] ) ) {
+ $callbackReturn = call_user_func(
+ $externalCallback,
+ $parsedDTD['type'],
+ isset( $parsedDTD['publicid'] ) ? $parsedDTD['publicid'] : null,
+ isset( $parsedDTD['systemid'] ) ? $parsedDTD['systemid'] : null
+ );
+ }
+ if ( $callbackReturn ) {
+ // Filter hit!
+ $this->filterMatch = true;
+ $this->filterMatchType = $callbackReturn;
+ $callbackReturn = false;
+ }
+
+ if ( $checkIfSafe && isset( $parsedDTD['internal'] ) ) {
+ if ( !$this->checkDTDIsSafe( $parsedDTD['internal'] ) ) {
+ $this->wellFormed = false;
+ }
+ }
+ }
+
+ /**
+ * Check if the internal subset of the DTD is safe.
+ *
+ * We whitelist an extremely restricted subset of DTD features.
+ *
+ * Safe is defined as:
+ * * Only contains entity defintions (e.g. No <!ATLIST )
+ * * Entity definitions are not "system" entities
+ * * Entity definitions are not "parameter" (i.e. %) entities
+ * * Entity definitions do not reference other entites except &
+ * and quotes. Entity aliases (where the entity contains only
+ * another entity are allowed)
+ * * Entity references aren't overly long (>255 bytes).
+ * * <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">
+ * allowed if matched exactly for compatibility with graphviz
+ * * Comments.
+ *
+ * @param string $internalSubset The internal subset of the DTD
+ * @return bool true if safe.
+ */
+ private function checkDTDIsSafe( $internalSubset ) {
+ $offset = 0;
+ $res = preg_match(
+ '/^(?:\s*<!ENTITY\s+\S+\s+' .
+ '(?:"(?:&[^"%&;]{1,64};|(?:[^"%&]|&|"){0,255})"' .
+ '|\'(?:&[^"%&;]{1,64};|(?:[^\'%&]|&|'){0,255})\')\s*>' .
+ '|\s*<!--(?:[^-]|-[^-])*-->' .
+ '|\s*<!ATTLIST svg xmlns:xlink CDATA #FIXED ' .
+ '"http:\/\/www.w3.org\/1999\/xlink">)*\s*$/',
+ $internalSubset
+ );
+
+ return (bool)$res;
+ }
+
+ /**
+ * Parse DTD into parts.
+ *
+ * If there is an error parsing the dtd, sets wellFormed to false.
+ *
+ * @param $dtd string
+ * @return array Possibly containing keys publicid, systemid, type and internal.
+ */
+ private function parseDTD( $dtd ) {
+ $m = [];
+ $res = preg_match(
+ '/^<!DOCTYPE\s*\S+\s*' .
+ '(?:(?P<typepublic>PUBLIC)\s*' .
+ '(?:"(?P<pubquote>[^"]*)"|\'(?P<pubapos>[^\']*)\')' . // public identifer
+ '\s*"(?P<pubsysquote>[^"]*)"|\'(?P<pubsysapos>[^\']*)\'' . // system identifier
+ '|(?P<typesystem>SYSTEM)\s*' .
+ '(?:"(?P<sysquote>[^"]*)"|\'(?P<sysapos>[^\']*)\')' .
+ ')?\s*' .
+ '(?:\[\s*(?P<internal>.*)\])?\s*>$/s',
+ $dtd,
+ $m
+ );
+ if ( !$res ) {
+ $this->wellFormed = false;
+ return [];
+ }
+ $parsed = [];
+ foreach ( $m as $field => $value ) {
+ if ( $value === '' || is_numeric( $field ) ) {
+ continue;
+ }
+ switch ( $field ) {
+ case 'typepublic':
+ case 'typesystem':
+ $parsed['type'] = $value;
+ break;
+ case 'pubquote':
+ case 'pubapos':
+ $parsed['publicid'] = $value;
+ break;
+ case 'pubsysquote':
+ case 'pubsysapos':
+ case 'sysquote':
+ case 'sysapos':
+ $parsed['systemid'] = $value;
+ break;
+ case 'internal':
+ $parsed['internal'] = $value;
+ break;
+ }
+ }
+ return $parsed;
+ }