9ee5a4a5d84e37775cb6c789bb9df03bb8bfdb49
[lhc/web/wiklou.git] / includes / media / PNGMetadataExtractor.php
1 <?php
2 /**
3 * PNG frame counter and metadata extractor.
4 * Slightly derived from GIFMetadataExtractor.php
5 * Deliberately not using MWExceptions to avoid external dependencies, encouraging
6 * redistribution.
7 *
8 * @file
9 * @ingroup Media
10 */
11
12 /**
13 * PNG frame counter.
14 *
15 * @ingroup Media
16 */
17 class PNGMetadataExtractor {
18 static $png_sig;
19 static $CRC_size;
20 static $text_chunks;
21
22 const VERSION = 1;
23 const MAX_CHUNK_SIZE = 3145728; // 3 megabytes
24
25 static function getMetadata( $filename ) {
26 self::$png_sig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
27 self::$CRC_size = 4;
28 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
29 * and http://www.w3.org/TR/PNG/#11keywords
30 */
31 self::$text_chunks = array(
32 'xml:com.adobe.xmp' => 'xmp',
33 # Artist is unofficial. Author is the recommended
34 # keyword in the PNG spec. However some people output
35 # Artist so support both.
36 'artist' => 'Artist',
37 'model' => 'Model',
38 'make' => 'Make',
39 'author' => 'Artist',
40 'comment' => 'PNGFileComment',
41 'description' => 'ImageDescription',
42 'title' => 'ObjectName',
43 'copyright' => 'Copyright',
44 # Source as in original device used to make image
45 # not as in who gave you the image
46 'source' => 'Model',
47 'software' => 'Software',
48 'disclaimer' => 'Disclaimer',
49 'warning' => 'ContentWarning',
50 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
51 'label' => 'Label',
52 'creation time' => 'DateTimeDigitized',
53 /* Other potentially useful things - Document */
54 );
55
56 $frameCount = 0;
57 $loopCount = 1;
58 $text = array();
59 $duration = 0.0;
60 $bitDepth = 0;
61 $colorType = 'unknown';
62
63 if ( !$filename ) {
64 throw new Exception( __METHOD__ . ": No file name specified" );
65 } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
66 throw new Exception( __METHOD__ . ": File $filename does not exist" );
67 }
68
69 $fh = fopen( $filename, 'r' );
70
71 if ( !$fh ) {
72 throw new Exception( __METHOD__ . ": Unable to open file $filename" );
73 }
74
75 // Check for the PNG header
76 $buf = fread( $fh, 8 );
77 if ( $buf != self::$png_sig ) {
78 throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" );
79 }
80
81 // Read chunks
82 while ( !feof( $fh ) ) {
83 $buf = fread( $fh, 4 );
84 if ( !$buf ) {
85 throw new Exception( __METHOD__ . ": Read error" );
86 }
87 $chunk_size = unpack( "N", $buf );
88 $chunk_size = $chunk_size[1];
89
90 $chunk_type = fread( $fh, 4 );
91 if ( !$chunk_type ) {
92 throw new Exception( __METHOD__ . ": Read error" );
93 }
94
95 if ( $chunk_type == "IHDR" ) {
96 $buf = self::read( $fh, $chunk_size );
97 if ( !$buf ) {
98 throw new Exception( __METHOD__ . ": Read error" );
99 }
100 $bitDepth = ord( substr( $buf, 8, 1 ) );
101 // Detect the color type in British English as per the spec
102 // http://www.w3.org/TR/PNG/#11IHDR
103 switch ( ord( substr( $buf, 9, 1 ) ) ) {
104 case 0:
105 $colorType = 'greyscale';
106 break;
107 case 2:
108 $colorType = 'truecolour';
109 break;
110 case 3:
111 $colorType = 'index-coloured';
112 break;
113 case 4:
114 $colorType = 'greyscale-alpha';
115 break;
116 case 6:
117 $colorType = 'truecolour-alpha';
118 break;
119 default:
120 $colorType = 'unknown';
121 break;
122 }
123 } elseif ( $chunk_type == "acTL" ) {
124 $buf = fread( $fh, $chunk_size );
125 if( !$buf ) {
126 throw new Exception( __METHOD__ . ": Read error" );
127 }
128
129 $actl = unpack( "Nframes/Nplays", $buf );
130 $frameCount = $actl['frames'];
131 $loopCount = $actl['plays'];
132 } elseif ( $chunk_type == "fcTL" ) {
133 $buf = self::read( $fh, $chunk_size );
134 if ( !$buf ) {
135 throw new Exception( __METHOD__ . ": Read error" );
136 }
137 $buf = substr( $buf, 20 );
138
139 $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
140 if ( $fctldur['delay_den'] == 0 ) {
141 $fctldur['delay_den'] = 100;
142 }
143 if ( $fctldur['delay_num'] ) {
144 $duration += $fctldur['delay_num'] / $fctldur['delay_den'];
145 }
146 } elseif ( $chunk_type == "iTXt" ) {
147 // Extracts iTXt chunks, uncompressing if necessary.
148 $buf = self::read( $fh, $chunk_size );
149 $items = array();
150 if ( preg_match(
151 '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
152 $buf, $items )
153 ) {
154 /* $items[1] = text chunk name, $items[2] = compressed flag,
155 * $items[3] = lang code (or ""), $items[4]= compression type.
156 * $items[5] = content
157 */
158
159 // Theoretically should be case-sensitive, but in practise...
160 $items[1] = strtolower( $items[1] );
161 if ( !isset( self::$text_chunks[$items[1]] ) ) {
162 // Only extract textual chunks on our list.
163 fseek( $fh, self::$CRC_size, SEEK_CUR );
164 continue;
165 }
166
167 $items[3] = strtolower( $items[3] );
168 if ( $items[3] == '' ) {
169 // if no lang specified use x-default like in xmp.
170 $items[3] = 'x-default';
171 }
172
173 // if compressed
174 if ( $items[2] == "\x01" ) {
175 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
176 wfSuppressWarnings();
177 $items[5] = gzuncompress( $items[5] );
178 wfRestoreWarnings();
179
180 if ( $items[5] === false ) {
181 // decompression failed
182 wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] );
183 fseek( $fh, self::$CRC_size, SEEK_CUR );
184 continue;
185 }
186
187 } else {
188 wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
189 . ' or potentially invalid compression method' );
190 fseek( $fh, self::$CRC_size, SEEK_CUR );
191 continue;
192 }
193 }
194 $finalKeyword = self::$text_chunks[ $items[1] ];
195 $text[ $finalKeyword ][ $items[3] ] = $items[5];
196 $text[ $finalKeyword ]['_type'] = 'lang';
197
198 } else {
199 // Error reading iTXt chunk
200 throw new Exception( __METHOD__ . ": Read error on iTXt chunk" );
201 }
202
203 } elseif ( $chunk_type == 'tEXt' ) {
204 $buf = self::read( $fh, $chunk_size );
205
206 // In case there is no \x00 which will make explode fail.
207 if ( strpos( $buf, "\x00" ) === false ) {
208 throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
209 }
210
211 list( $keyword, $content ) = explode( "\x00", $buf, 2 );
212 if ( $keyword === '' || $content === '' ) {
213 throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
214 }
215
216 // Theoretically should be case-sensitive, but in practise...
217 $keyword = strtolower( $keyword );
218 if ( !isset( self::$text_chunks[ $keyword ] ) ) {
219 // Don't recognize chunk, so skip.
220 fseek( $fh, self::$CRC_size, SEEK_CUR );
221 continue;
222 }
223 wfSuppressWarnings();
224 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
225 wfRestoreWarnings();
226
227 if ( $content === false ) {
228 throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
229 }
230
231 $finalKeyword = self::$text_chunks[ $keyword ];
232 $text[ $finalKeyword ][ 'x-default' ] = $content;
233 $text[ $finalKeyword ]['_type'] = 'lang';
234
235 } elseif ( $chunk_type == 'zTXt' ) {
236 if ( function_exists( 'gzuncompress' ) ) {
237 $buf = self::read( $fh, $chunk_size );
238
239 // In case there is no \x00 which will make explode fail.
240 if ( strpos( $buf, "\x00" ) === false ) {
241 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
242 }
243
244 list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
245 if ( $keyword === '' || $postKeyword === '' ) {
246 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
247 }
248 // Theoretically should be case-sensitive, but in practise...
249 $keyword = strtolower( $keyword );
250
251 if ( !isset( self::$text_chunks[ $keyword ] ) ) {
252 // Don't recognize chunk, so skip.
253 fseek( $fh, self::$CRC_size, SEEK_CUR );
254 continue;
255 }
256 $compression = substr( $postKeyword, 0, 1 );
257 $content = substr( $postKeyword, 1 );
258 if ( $compression !== "\x00" ) {
259 wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping." );
260 fseek( $fh, self::$CRC_size, SEEK_CUR );
261 continue;
262 }
263
264 wfSuppressWarnings();
265 $content = gzuncompress( $content );
266 wfRestoreWarnings();
267
268 if ( $content === false ) {
269 // decompression failed
270 wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword );
271 fseek( $fh, self::$CRC_size, SEEK_CUR );
272 continue;
273 }
274
275 wfSuppressWarnings();
276 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
277 wfRestoreWarnings();
278
279 if ( $content === false ) {
280 throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
281 }
282
283 $finalKeyword = self::$text_chunks[ $keyword ];
284 $text[ $finalKeyword ][ 'x-default' ] = $content;
285 $text[ $finalKeyword ]['_type'] = 'lang';
286
287 } else {
288 wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping." );
289 fseek( $fh, $chunk_size, SEEK_CUR );
290 }
291 } elseif ( $chunk_type == 'tIME' ) {
292 // last mod timestamp.
293 if ( $chunk_size !== 7 ) {
294 throw new Exception( __METHOD__ . ": tIME wrong size" );
295 }
296 $buf = self::read( $fh, $chunk_size );
297 if ( !$buf ) {
298 throw new Exception( __METHOD__ . ": Read error" );
299 }
300
301 // Note: spec says this should be UTC.
302 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
303 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
304 $t['y'], $t['m'], $t['d'], $t['h'],
305 $t['min'], $t['s'] );
306
307 $exifTime = wfTimestamp( TS_EXIF, $strTime );
308
309 if ( $exifTime ) {
310 $text['DateTime'] = $exifTime;
311 }
312
313 } elseif ( $chunk_type == 'pHYs' ) {
314 // how big pixels are (dots per meter).
315 if ( $chunk_size !== 9 ) {
316 throw new Exception( __METHOD__ . ": pHYs wrong size" );
317 }
318
319 $buf = self::read( $fh, $chunk_size );
320 if ( !$buf ) {
321 throw new Exception( __METHOD__ . ": Read error" );
322 }
323
324 $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
325 if ( $dim['unit'] == 1 ) {
326 // unit is meters
327 // (as opposed to 0 = undefined )
328 $text['XResolution'] = $dim['width']
329 . '/100';
330 $text['YResolution'] = $dim['height']
331 . '/100';
332 $text['ResolutionUnit'] = 3;
333 // 3 = dots per cm (from Exif).
334 }
335
336 } elseif ( $chunk_type == "IEND" ) {
337 break;
338 } else {
339 fseek( $fh, $chunk_size, SEEK_CUR );
340 }
341 fseek( $fh, self::$CRC_size, SEEK_CUR );
342 }
343 fclose( $fh );
344
345 if ( $loopCount > 1 ) {
346 $duration *= $loopCount;
347 }
348
349 if ( isset( $text['DateTimeDigitized'] ) ) {
350 // Convert date format from rfc2822 to exif.
351 foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
352 if ( $name === '_type' ) {
353 continue;
354 }
355
356 // fixme: currently timezones are ignored.
357 // possibly should be wfTimestamp's
358 // responsibility. (at least for numeric TZ)
359 $formatted = wfTimestamp( TS_EXIF, $value );
360 if ( $formatted ) {
361 // Only change if we could convert the
362 // date.
363 // The png standard says it should be
364 // in rfc2822 format, but not required.
365 // In general for the exif stuff we
366 // prettify the date if we can, but we
367 // display as-is if we cannot or if
368 // it is invalid.
369 // So do the same here.
370
371 $value = $formatted;
372 }
373 }
374 }
375 return array(
376 'frameCount' => $frameCount,
377 'loopCount' => $loopCount,
378 'duration' => $duration,
379 'text' => $text,
380 'bitDepth' => $bitDepth,
381 'colorType' => $colorType,
382 );
383
384 }
385 /**
386 * Read a chunk, checking to make sure its not too big.
387 *
388 * @param $fh resource The file handle
389 * @param $size Integer size in bytes.
390 * @throws Exception if too big.
391 * @return String The chunk.
392 */
393 static private function read( $fh, $size ) {
394 if ( $size > self::MAX_CHUNK_SIZE ) {
395 throw new Exception( __METHOD__ . ': Chunk size of ' . $size .
396 ' too big. Max size is: ' . self::MAX_CHUNK_SIZE );
397 }
398 return fread( $fh, $size );
399 }
400 }