Tweak for r29561: don't grab a database object until we need it
[lhc/web/wiklou.git] / includes / DjVuImage.php
index 871c563..b48aaff 100644 (file)
@@ -1,11 +1,6 @@
 <?php
+
 /**
- * Support for detecting/validating DjVu image files and getting
- * some basic file metadata (resolution etc)
- *
- * File format docs are available in source package for DjVuLibre:
- * http://djvulibre.djvuzone.org/
- *
  *
  * Copyright (C) 2006 Brion Vibber <brion@pobox.com>
  * http://www.mediawiki.org/
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  * http://www.gnu.org/copyleft/gpl.html
  *
- * @package MediaWiki
  */
 
+/**
+ * Support for detecting/validating DjVu image files and getting
+ * some basic file metadata (resolution etc)
+ *
+ * File format docs are available in source package for DjVuLibre:
+ * http://djvulibre.djvuzone.org/
+ *
+ * @addtogroup Media
+ */
 class DjVuImage {
        function __construct( $filename ) {
                $this->mFilename = $filename;
@@ -68,6 +71,7 @@ class DjVuImage {
        function dump() {
                $file = fopen( $this->mFilename, 'rb' );
                $header = fread( $file, 12 );
+               // FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
                extract( unpack( 'a4magic/a4chunk/NchunkLength', $header ) );
                echo "$chunk $chunkLength\n";
                $this->dumpForm( $file, $chunkLength, 1 );
@@ -83,6 +87,7 @@ class DjVuImage {
                        if( $chunkHeader == '' ) {
                                break;
                        }
+                       // FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
                        extract( unpack( 'a4chunk/NchunkLength', $chunkHeader ) );
                        echo str_repeat( ' ', $indent * 4 ) . "$chunk $chunkLength\n";
                        
@@ -99,7 +104,9 @@ class DjVuImage {
        }
        
        function getInfo() {
+               wfSuppressWarnings();
                $file = fopen( $this->mFilename, 'rb' );
+               wfRestoreWarnings();
                if( $file === false ) {
                        wfDebug( __METHOD__ . ": missing or failed file read\n" );
                        return false;
@@ -111,6 +118,7 @@ class DjVuImage {
                if( strlen( $header ) < 16 ) {
                        wfDebug( __METHOD__ . ": too short file header\n" );
                } else {
+                       // FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
                        extract( unpack( 'a4magic/a4form/NformLength/a4subtype', $header ) );
                        
                        if( $magic != 'AT&T' ) {
@@ -134,6 +142,7 @@ class DjVuImage {
                if( strlen( $header ) < 8 ) {
                        return array( false, 0 );
                } else {
+                       // FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
                        extract( unpack( 'a4chunk/Nlength', $header ) );
                        return array( $chunk, $length );
                }
@@ -192,6 +201,7 @@ class DjVuImage {
                        return false;
                }
                
+               // FIXME: Would be good to replace this extract() call with something that explicitly initializes local variables.
                extract( unpack(
                        'nwidth/' .
                        'nheight/' .
@@ -214,16 +224,121 @@ class DjVuImage {
         * @return string
         */
        function retrieveMetaData() {
-               global $wgDjvuToXML;
-               if ( isset( $wgDjvuToXML ) ) {
-                       $cmd = $wgDjvuToXML . ' --without-anno --without-text ' . $this->mFilename;
-                       $xml = wfShellExec( $cmd, $retval );
+               global $wgDjvuToXML, $wgDjvuDump;
+               if ( isset( $wgDjvuDump ) ) {
+                       # djvudump is faster as of version 3.5
+                       # http://sourceforge.net/tracker/index.php?func=detail&aid=1704049&group_id=32953&atid=406583
+                       wfProfileIn( 'djvudump' );
+                       $cmd = wfEscapeShellArg( $wgDjvuDump ) . ' ' . wfEscapeShellArg( $this->mFilename );
+                       $dump = wfShellExec( $cmd );
+                       $xml = $this->convertDumpToXML( $dump );
+                       wfProfileOut( 'djvudump' );
+               } elseif ( isset( $wgDjvuToXML ) ) {
+                       wfProfileIn( 'djvutoxml' );
+                       $cmd = wfEscapeShellArg( $wgDjvuToXML ) . ' --without-anno --without-text ' .
+                               wfEscapeShellArg( $this->mFilename );
+                       $xml = wfShellExec( $cmd );
+                       wfProfileOut( 'djvutoxml' );
                } else {
                        $xml = null;
                }
                return $xml;
        }
-               
+
+       /**
+        * Hack to temporarily work around djvutoxml bug
+        */
+       function convertDumpToXML( $dump ) {
+               if ( strval( $dump ) == '' ) {
+                       return false;
+               }
+
+               $xml = <<<EOT
+<?xml version="1.0" ?>
+<!DOCTYPE DjVuXML PUBLIC "-//W3C//DTD DjVuXML 1.1//EN" "pubtext/DjVuXML-s.dtd">
+<DjVuXML>
+<HEAD></HEAD>
+<BODY>
+EOT;
+
+               $dump = str_replace( "\r", '', $dump );
+               $line = strtok( $dump, "\n" );
+               $m = false;
+               $good = false;
+               if ( preg_match( '/^( *)FORM:DJVU/', $line, $m ) ) {
+                       # Single-page
+                       if ( $this->parseFormDjvu( $line, $xml ) ) {
+                               $good = true;
+                       } else {
+                               return false;
+                       }
+               } elseif ( preg_match( '/^( *)FORM:DJVM/', $line, $m ) ) {
+                       # Multi-page
+                       $parentLevel = strlen( $m[1] );
+                       # Find DIRM
+                       $line = strtok( "\n" );
+                       while ( $line !== false ) {
+                               $childLevel = strspn( $line, ' ' );
+                               if ( $childLevel <= $parentLevel ) {
+                                       # End of chunk
+                                       break;
+                               }
+
+                               if ( preg_match( '/^ *DIRM.*indirect/', $line ) ) {
+                                       wfDebug( "Indirect multi-page DjVu document, bad for server!\n" );
+                                       return false;
+                               }
+                               if ( preg_match( '/^ *FORM:DJVU/', $line ) ) {
+                                       # Found page
+                                       if ( $this->parseFormDjvu( $line, $xml ) ) {
+                                               $good = true;
+                                       } else {
+                                               return false;
+                                       }
+                               }
+                               $line = strtok( "\n" );
+                       }
+               }
+               if ( !$good ) {
+                       return false;
+               }
+
+               $xml .= "</BODY>\n</DjVuXML>\n";
+               return $xml;
+       }
+
+       function parseFormDjvu( $line, &$xml ) {
+               $parentLevel = strspn( $line, ' ' );
+               $line = strtok( "\n" );
+
+               # Find INFO
+               while ( $line !== false ) {
+                       $childLevel = strspn( $line, ' ' );
+                       if ( $childLevel <= $parentLevel ) {
+                               # End of chunk
+                               break;
+                       }
+
+                       if ( preg_match( '/^ *INFO *\[\d*\] *DjVu *(\d+)x(\d+), *\w*, *(\d+) *dpi, *gamma=([0-9.-]+)/', $line, $m ) ) {
+                               $xml .= Xml::tags( 'OBJECT', 
+                                       array(
+                                               #'data' => '',
+                                               #'type' => 'image/x.djvu',
+                                               'height' => $m[2],
+                                               'width' => $m[1],
+                                               #'usemap' => '',
+                                       ), 
+                                       "\n" .
+                                       Xml::element( 'PARAM', array( 'name' => 'DPI', 'value' => $m[3] ) ) . "\n" .
+                                       Xml::element( 'PARAM', array( 'name' => 'GAMMA', 'value' => $m[4] ) ) . "\n"
+                               ) . "\n";
+                               return true;
+                       }
+                       $line = strtok( "\n" );
+               }
+               # Not found
+               return false;
+       }
 }