Make sure that if we fail to read the App13 (iptc) block of a JPG file, that that...
authorBrian Wolff <bawolff@users.mediawiki.org>
Thu, 5 Jan 2012 23:25:39 +0000 (23:25 +0000)
committerBrian Wolff <bawolff@users.mediawiki.org>
Thu, 5 Jan 2012 23:25:39 +0000 (23:25 +0000)
RELEASE-NOTES-1.19
includes/media/BitmapMetadataHandler.php
includes/media/JpegMetadataExtractor.php
tests/phpunit/data/media/iptc-invalid-psir.jpg [new file with mode: 0644]
tests/phpunit/includes/media/BitmapMetadataHandlerTest.php
tests/phpunit/includes/media/JpegMetadataExtractorTest.php

index 2cb77f7..e073ce5 100644 (file)
@@ -223,6 +223,8 @@ production.
 * (bug 33321) Adding a line to MediaWiki:Sidebar that contains a pipe, but doesn't
   have any pipes after being transformed by MessageCache, causes exception on
   all pages.
+* Files with IPTC blocks we can't read no longer prevent extraction of exif
+  or other metadata.
 
 === API changes in 1.19 ===
 * (bug 19838) siprop=interwikimap can now use the interwiki cache.
index d1caa67..8aab9bd 100644 (file)
@@ -32,7 +32,15 @@ class BitmapMetadataHandler {
        * @param String $app13 String containing app13 block from jpeg file
        */
        private function doApp13 ( $app13 ) {
-               $this->iptcType = JpegMetadataExtractor::doPSIR( $app13 );
+               try {
+                       $this->iptcType = JpegMetadataExtractor::doPSIR( $app13 );
+               } catch ( MWException $e ) {
+                       // Error reading the iptc hash information.
+                       // This probably means the App13 segment is something other than what we expect.
+                       // However, still try to read it, and treat it as if the hash didn't exist.
+                       wfDebug( "Error parsing iptc data of file: " . $e->getMessage() . "\n" );
+                       $this->iptcType = 'iptc-no-hash';
+               }
 
                $iptc = IPTC::parse( $app13 );
                $this->addMetadata( $iptc, $this->iptcType );
@@ -122,8 +130,10 @@ class BitmapMetadataHandler {
                if ( isset( $seg['COM'] ) && isset( $seg['COM'][0] ) ) {
                        $meta->addMetadata( Array( 'JPEGFileComment' => $seg['COM'] ), 'native' );
                }
-               if ( isset( $seg['PSIR'] ) ) {
-                       $meta->doApp13( $seg['PSIR'] );
+               if ( isset( $seg['PSIR'] ) && count( $seg['PSIR'] ) > 0 ) {
+                       foreach( $seg['PSIR'] as $curPSIRValue ) {
+                               $meta->doApp13( $curPSIRValue );
+                       }
                }
                if ( isset( $seg['XMP'] ) && $showXMP ) {
                        $xmp = new XMPReader();
index 4769bf8..224b4a2 100644 (file)
@@ -31,6 +31,7 @@ class JpegMetadataExtractor {
                $segments = array(
                        'XMP_ext' => array(),
                        'COM' => array(),
+                       'PSIR' => array(),
                );
 
                if ( !$filename ) {
@@ -122,7 +123,7 @@ class JpegMetadataExtractor {
                                // APP13 - PSIR. IPTC and some photoshop stuff
                                $temp = self::jpegExtractMarker( $fh );
                                if ( substr( $temp, 0, 14 ) === "Photoshop 3.0\x00" ) {
-                                       $segments["PSIR"] = $temp;
+                                       $segments["PSIR"][] = $temp;
                                }
                        } elseif ( $buffer === "\xD9" || $buffer === "\xDA" ) {
                                // EOI - end of image or SOS - start of scan. either way we're past any interesting segments
@@ -162,11 +163,12 @@ class JpegMetadataExtractor {
        * This should generally be called by BitmapMetadataHandler::doApp13()
        *
        * @param String $app13 photoshop psir app13 block from jpg.
+       * @throws MWException (It gets caught next level up though)
        * @return String if the iptc hash is good or not.
        */
        public static function doPSIR ( $app13 ) {
                if ( !$app13 ) {
-                       return;
+                       throw new MWException( "No App13 segment given" );
                }
                // First compare hash with real thing
                // 0x404 contains IPTC, 0x425 has hash
@@ -218,8 +220,8 @@ class JpegMetadataExtractor {
 
                        // this should not happen, but check.
                        if ( $lenData['len'] + $offset > $appLen ) {
-                               wfDebug( __METHOD__ . " PSIR data too long.\n" );
-                               return 'iptc-no-hash';
+                               throw new MWException( "PSIR data too long. (item length=" . $lenData['len']
+                                       . "; offset=$offset; total length=$appLen)" );
                        }
 
                        if ( $valid ) {
diff --git a/tests/phpunit/data/media/iptc-invalid-psir.jpg b/tests/phpunit/data/media/iptc-invalid-psir.jpg
new file mode 100644 (file)
index 0000000..01b9acf
Binary files /dev/null and b/tests/phpunit/data/media/iptc-invalid-psir.jpg differ
index bb0f0bb..f4f52dd 100644 (file)
@@ -56,6 +56,16 @@ class BitmapMetadataHandlerTest extends MediaWikiTestCase {
                        $meta['JPEGFileComment'][0] );
        }
 
+       /**
+        * Make sure a bad iptc block doesn't stop the other metadata
+        * from being extracted.
+        */
+       public function testBadIPTC() {
+               $meta = BitmapMetadataHandler::Jpeg( $this->filePath .
+                       'iptc-invalid-psir.jpg' );
+               $this->assertEquals( 'Created with GIMP', $meta['JPEGFileComment'][0] );
+       }
+
        public function testIPTCDates() {
                $meta = BitmapMetadataHandler::Jpeg( $this->filePath .
                        'iptc-timetest.jpg' );
index 5bf9bfe..f48382a 100644 (file)
@@ -59,7 +59,7 @@ class JpegMetadataExtractorTest extends MediaWikiTestCase {
        public function testPSIRExtraction() {
                $res = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-xmp-psir.jpg' );
                $expected = '50686f746f73686f7020332e30003842494d04040000000000181c02190004746573741c02190003666f6f1c020000020004';
-               $this->assertEquals( $expected, bin2hex( $res['PSIR'] ) );
+               $this->assertEquals( $expected, bin2hex( $res['PSIR'][0] ) );
        }
        public function testXMPExtractionAltAppId() {
                $res = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-xmp-alt.jpg' );
@@ -70,19 +70,19 @@ class JpegMetadataExtractorTest extends MediaWikiTestCase {
 
        public function testIPTCHashComparisionNoHash() {
                $segments = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-xmp-psir.jpg' );
-               $res = JpegMetadataExtractor::doPSIR( $segments['PSIR'] );
+               $res = JpegMetadataExtractor::doPSIR( $segments['PSIR'][0] );
 
                $this->assertEquals( 'iptc-no-hash', $res );
        }
        public function testIPTCHashComparisionBadHash() {
                $segments = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-iptc-bad-hash.jpg' );
-               $res = JpegMetadataExtractor::doPSIR( $segments['PSIR'] );
+               $res = JpegMetadataExtractor::doPSIR( $segments['PSIR'][0] );
 
                $this->assertEquals( 'iptc-bad-hash', $res );
        }
        public function testIPTCHashComparisionGoodHash() {
                $segments = JpegMetadataExtractor::segmentSplitter( $this->filePath . 'jpeg-iptc-good-hash.jpg' );
-               $res = JpegMetadataExtractor::doPSIR( $segments['PSIR'] );
+               $res = JpegMetadataExtractor::doPSIR( $segments['PSIR'][0] );
 
                $this->assertEquals( 'iptc-good-hash', $res );
        }