Add X-Content-Dimensions support to DjVu
authorGilles Dubuc <gilles@wikimedia.org>
Thu, 11 May 2017 11:12:55 +0000 (13:12 +0200)
committerGilles Dubuc <gdubuc@wikimedia.org>
Thu, 11 May 2017 11:54:27 +0000 (13:54 +0200)
Bug: T150741
Change-Id: I4a3bae9bf056a7ba332f2f0a330697cdf59b4d04

includes/media/DjVu.php

index a852215..374e166 100644 (file)
@@ -304,11 +304,28 @@ class DjVuHandler extends ImageHandler {
                        return false;
                }
 
+               $trees = $this->extractTreesFromMetadata( $metadata );
+               $image->djvuTextTree = $trees['TextTree'];
+               $image->dejaMetaTree = $trees['MetaTree'];
+
+               if ( $gettext ) {
+                       return $image->djvuTextTree;
+               } else {
+                       return $image->dejaMetaTree;
+               }
+       }
+
+       /**
+        * Extracts metadata and text trees from metadata XML in string form
+        * @param string $metadata XML metadata as a string
+        * @return array
+        */
+       protected function extractTreesFromMetadata( $metadata ) {
                MediaWiki\suppressWarnings();
                try {
                        // Set to false rather than null to avoid further attempts
-                       $image->dejaMetaTree = false;
-                       $image->djvuTextTree = false;
+                       $metaTree = false;
+                       $textTree = false;
                        $tree = new SimpleXMLElement( $metadata, LIBXML_PARSEHUGE );
                        if ( $tree->getName() == 'mw-djvu' ) {
                                /** @var SimpleXMLElement $b */
@@ -316,23 +333,20 @@ class DjVuHandler extends ImageHandler {
                                        if ( $b->getName() == 'DjVuTxt' ) {
                                                // @todo File::djvuTextTree and File::dejaMetaTree are declared
                                                // dynamically. Add a public File::$data to facilitate this?
-                                               $image->djvuTextTree = $b;
+                                               $textTree = $b;
                                        } elseif ( $b->getName() == 'DjVuXML' ) {
-                                               $image->dejaMetaTree = $b;
+                                               $metaTree = $b;
                                        }
                                }
                        } else {
-                               $image->dejaMetaTree = $tree;
+                               $metaTree = $tree;
                        }
                } catch ( Exception $e ) {
-                       wfDebug( "Bogus multipage XML metadata on '{$image->getName()}'\n" );
+                       wfDebug( "Bogus multipage XML metadata\n" );
                }
                MediaWiki\restoreWarnings();
-               if ( $gettext ) {
-                       return $image->djvuTextTree;
-               } else {
-                       return $image->dejaMetaTree;
-               }
+
+               return [ 'MetaTree' => $metaTree, 'TextTree' => $textTree ];
        }
 
        function getImageSize( $image, $path ) {
@@ -394,30 +408,39 @@ class DjVuHandler extends ImageHandler {
                        $cache::TTL_INDEFINITE,
                        function () use ( $file ) {
                                $tree = $this->getMetaTree( $file );
-                               if ( !$tree ) {
-                                       return false;
-                               }
-
-                               $dimsByPage = [];
-                               $count = count( $tree->xpath( '//OBJECT' ) );
-                               for ( $i = 0; $i < $count; $i++ ) {
-                                       $o = $tree->BODY[0]->OBJECT[$i];
-                                       if ( $o ) {
-                                               $dimsByPage[$i] = [
-                                                       'width' => (int)$o['width'],
-                                                       'height' => (int)$o['height'],
-                                               ];
-                                       } else {
-                                               $dimsByPage[$i] = false;
-                                       }
-                               }
-
-                               return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ];
+                               return $this->getDimensionInfoFromMetaTree( $tree );
                        },
                        [ 'pcTTL' => $cache::TTL_INDEFINITE ]
                );
        }
 
+       /**
+        * Given an XML metadata tree, returns dimension information about the document
+        * @param bool|SimpleXMLElement $metatree The file's XML metadata tree
+        * @return bool|array
+        */
+       protected function getDimensionInfoFromMetaTree( $metatree ) {
+               if ( !$metatree ) {
+                       return false;
+               }
+
+               $dimsByPage = [];
+               $count = count( $metatree->xpath( '//OBJECT' ) );
+               for ( $i = 0; $i < $count; $i++ ) {
+                       $o = $metatree->BODY[0]->OBJECT[$i];
+                       if ( $o ) {
+                               $dimsByPage[$i] = [
+                                       'width' => (int)$o['width'],
+                                       'height' => (int)$o['height'],
+                               ];
+                       } else {
+                               $dimsByPage[$i] = false;
+                       }
+               }
+
+               return [ 'pageCount' => $count, 'dimensionsByPage' => $dimsByPage ];
+       }
+
        /**
         * @param File $image
         * @param int $page Page number to get information for
@@ -438,4 +461,40 @@ class DjVuHandler extends ImageHandler {
                        return false;
                }
        }
+
+       /**
+       * Get useful response headers for GET/HEAD requests for a file with the given metadata
+       * @param $metadata Array Contains this handler's unserialized getMetadata() for a file
+       * @return array
+       */
+       public function getContentHeaders( $metadata ) {
+               if ( !is_array( $metadata ) || !isset( $metadata['xml'] ) ) {
+                       return [];
+               }
+
+               $trees = $this->extractTreesFromMetadata( $metadata['xml'] );
+               $dimensionInfo = $this->getDimensionInfoFromMetaTree( $trees['MetaTree'] );
+
+               if ( !$dimensionInfo ) {
+                       return [];
+               }
+
+               $pagesByDimensions = [];
+               $count = $dimensionInfo['pageCount'];
+
+               for ( $i = 1; $i <= $count; $i++ ) {
+                       $dimensions = $dimensionInfo['dimensionsByPage'][ $i - 1 ];
+                       $dimensionString = $dimensions['width'] . 'x' . $dimensions['height'];
+
+                       if ( isset ( $pagesByDimensions[ $dimensionString ] ) ) {
+                               $pagesByDimensions[ $dimensionString ][] = $i;
+                       } else {
+                               $pagesByDimensions[ $dimensionString ] = [ $i ];
+                       }
+               }
+
+               $pageRangesByDimensions = MediaHandler::getPageRangesByDimensions( $pagesByDimensions );
+
+               return [ 'X-Content-Dimensions' => $pageRangesByDimensions ];
+       }
 }