tests/parser/ParserTestResultNormalizer.php

   1 <?php
   2 /**
   3  * @file
   4  * @ingroup Testing
   5  */
   6
   7 class ParserTestResultNormalizer {
   8         protected $doc, $xpath, $invalid;
   9
  10         public static function normalize( $text, $funcs ) {
  11                 $norm = new self( $text );
  12                 if ( $norm->invalid ) {
  13                         return $text;
  14                 }
  15                 foreach ( $funcs as $func ) {
  16                         $norm->$func();
  17                 }
  18                 return $norm->serialize();
  19         }
  20
  21         protected function __construct( $text ) {
  22                 $this->doc = new DOMDocument( '1.0', 'utf-8' );
  23
  24                 // Note: parsing a supposedly XHTML document with an XML parser is not
  25                 // guaranteed to give accurate results. For example, it may introduce
  26                 // differences in the number of line breaks in <pre> tags.
  27
  28                 MediaWiki\suppressWarnings();
  29                 if ( !$this->doc->loadXML( '<html><body>' . $text . '</body></html>' ) ) {
  30                         $this->invalid = true;
  31                 }
  32                 MediaWiki\restoreWarnings();
  33                 $this->xpath = new DOMXPath( $this->doc );
  34                 $this->body = $this->xpath->query( '//body' )->item( 0 );
  35         }
  36
  37         protected function removeTbody() {
  38                 foreach ( $this->xpath->query( '//tbody' ) as $tbody ) {
  39                         while ( $tbody->firstChild ) {
  40                                 $child = $tbody->firstChild;
  41                                 $tbody->removeChild( $child );
  42                                 $tbody->parentNode->insertBefore( $child, $tbody );
  43                         }
  44                         $tbody->parentNode->removeChild( $tbody );
  45                 }
  46         }
  47
  48         /**
  49          * The point of this function is to produce a normalized DOM in which
  50          * Tidy's output matches the output of html5depurate. Tidy both trims
  51          * and pretty-prints, so this requires fairly aggressive treatment.
  52          *
  53          * In particular, note that Tidy converts <pre>x</pre> to <pre>\nx\n</pre>,
  54          * which theoretically affects display since the second line break is not
  55          * ignored by compliant HTML parsers.
  56          *
  57          * This function also removes empty elements, as does Tidy.
  58          */
  59         protected function trimWhitespace() {
  60                 foreach ( $this->xpath->query( '//text()' ) as $child ) {
  61                         if ( strtolower( $child->parentNode->nodeName ) === 'pre' ) {
  62                                 // Just trim one line break from the start and end
  63                                 if ( substr_compare( $child->data, "\n", 0 ) === 0 ) {
  64                                         $child->data = substr( $child->data, 1 );
  65                                 }
  66                                 if ( substr_compare( $child->data, "\n", -1 ) === 0 ) {
  67                                         $child->data = substr( $child->data, 0, -1 );
  68                                 }
  69                         } else {
  70                                 // Trim all whitespace
  71                                 $child->data = trim( $child->data );
  72                         }
  73                         if ( $child->data === '' ) {
  74                                 $child->parentNode->removeChild( $child );
  75                         }
  76                 }
  77         }
  78
  79         /**
  80          * Serialize the XML DOM for comparison purposes. This does not generate HTML.
  81          */
  82         protected function serialize() {
  83                 return strtr( $this->doc->saveXML( $this->body ),
  84                         [ '<body>' => '', '</body>' => '' ] );
  85         }
  86 }
  87