Merge to trunk everything in img_metadata branch.
[lhc/web/wiklou.git] / includes / media / PNGMetadataExtractor.php
1 <?php
2 /**
3 * PNG frame counter and metadata extractor.
4 * Slightly derived from GIFMetadataExtractor.php
5 * Deliberately not using MWExceptions to avoid external dependencies, encouraging
6 * redistribution.
7 *
8 * @file
9 * @ingroup Media
10 */
11
12 /**
13 * PNG frame counter.
14 *
15 * @ingroup Media
16 */
17 class PNGMetadataExtractor {
18 static $png_sig;
19 static $CRC_size;
20 static $text_chunks;
21
22 const VERSION = 1;
23 const MAX_CHUNK_SIZE = 3145728; // 3 megabytes
24
25 static function getMetadata( $filename ) {
26 self::$png_sig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
27 self::$CRC_size = 4;
28 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
29 * and http://www.w3.org/TR/PNG/#11keywords
30 */
31 self::$text_chunks = array(
32 'xml:com.adobe.xmp' => 'xmp',
33 # Artist is unofficial. Author is the recommended
34 # keyword in the PNG spec. However some people output
35 # Artist so support both.
36 'artist' => 'Artist',
37 'model' => 'Model',
38 'make' => 'Make',
39 'author' => 'Artist',
40 'comment' => 'PNGFileComment',
41 'description' => 'ImageDescription',
42 'title' => 'ObjectName',
43 'copyright' => 'Copyright',
44 # Source as in original device used to make image
45 # not as in who gave you the image
46 'source' => 'Model',
47 'software' => 'Software',
48 'disclaimer' => 'Disclaimer',
49 'warning' => 'ContentWarning',
50 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
51 'label' => 'Label',
52 'creation time' => 'DateTimeDigitized',
53 /* Other potentially useful things - Document */
54 );
55
56 $showXMP = function_exists( 'xml_parser_create_ns' );
57
58 $frameCount = 0;
59 $loopCount = 1;
60 $text = array();
61 $bitDepth = 0;
62 $colorType = 'unknown';
63
64 if ( !$filename ) {
65 throw new Exception( __METHOD__ . ": No file name specified" );
66 } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
67 throw new Exception( __METHOD__ . ": File $filename does not exist" );
68 }
69
70 $fh = fopen( $filename, 'r' );
71
72 if ( !$fh ) {
73 throw new Exception( __METHOD__ . ": Unable to open file $filename" );
74 }
75
76 // Check for the PNG header
77 $buf = fread( $fh, 8 );
78 if ( $buf != self::$png_sig ) {
79 throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" );
80 }
81
82 // Read chunks
83 while ( !feof( $fh ) ) {
84 $buf = fread( $fh, 4 );
85 if ( !$buf ) {
86 throw new Exception( __METHOD__ . ": Read error" );
87 }
88 $chunk_size = unpack( "N", $buf );
89 $chunk_size = $chunk_size[1];
90
91 $chunk_type = fread( $fh, 4 );
92 if ( !$chunk_type ) {
93 throw new Exception( __METHOD__ . ": Read error" );
94 }
95
96 if ( $chunk_type == "IHDR" ) {
97 $buf = self::read( $fh, $chunk_size );
98 if ( !$buf ) {
99 throw new Exception( __METHOD__ . ": Read error" );
100 }
101 $bitDepth = ord( substr( $buf, 8, 1 ) );
102 // Detect the color type in British English as per the spec
103 // http://www.w3.org/TR/PNG/#11IHDR
104 switch ( ord( substr( $buf, 9, 1 ) ) ) {
105 case 0:
106 $colorType = 'greyscale';
107 break;
108 case 2:
109 $colorType = 'truecolour';
110 break;
111 case 3:
112 $colorType = 'index-coloured';
113 break;
114 case 4:
115 $colorType = 'greyscale-alpha';
116 break;
117 case 6:
118 $colorType = 'truecolour-alpha';
119 break;
120 default:
121 $colorType = 'unknown';
122 break;
123 }
124 } elseif ( $chunk_type == "acTL" ) {
125 $buf = fread( $fh, $chunk_size );
126 if( !$buf ) {
127 throw new Exception( __METHOD__ . ": Read error" );
128 }
129
130 $actl = unpack( "Nframes/Nplays", $buf );
131 $frameCount = $actl['frames'];
132 $loopCount = $actl['plays'];
133 } elseif ( $chunk_type == "fcTL" ) {
134 $buf = self::read( $fh, $chunk_size );
135 if ( !$buf ) {
136 throw new Exception( __METHOD__ . ": Read error" );
137 }
138 $buf = substr( $buf, 20 );
139
140 $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
141 if ( $fctldur['delay_den'] == 0 ) {
142 $fctldur['delay_den'] = 100;
143 }
144 if ( $fctldur['delay_num'] ) {
145 $duration += $fctldur['delay_num'] / $fctldur['delay_den'];
146 }
147 } elseif ( $chunk_type == "iTXt" ) {
148 // Extracts iTXt chunks, uncompressing if necessary.
149 $buf = self::read( $fh, $chunk_size );
150 $items = array();
151 if ( preg_match(
152 '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
153 $buf, $items )
154 ) {
155 /* $items[1] = text chunk name, $items[2] = compressed flag,
156 * $items[3] = lang code (or ""), $items[4]= compression type.
157 * $items[5] = content
158 */
159
160 // Theoretically should be case-sensitive, but in practise...
161 $items[1] = strtolower( $items[1] );
162 if ( !isset( self::$text_chunks[$items[1]] ) ) {
163 // Only extract textual chunks on our list.
164 fseek( $fh, self::$CRC_size, SEEK_CUR );
165 continue;
166 }
167
168 $items[3] = strtolower( $items[3] );
169 if ( $items[3] == '' ) {
170 // if no lang specified use x-default like in xmp.
171 $items[3] = 'x-default';
172 }
173
174 // if compressed
175 if ( $items[2] == "\x01" ) {
176 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
177 wfSuppressWarnings();
178 $items[5] = gzuncompress( $items[5] );
179 wfRestoreWarnings();
180
181 if ( $items[5] === false ) {
182 // decompression failed
183 wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] );
184 fseek( $fh, self::$CRC_size, SEEK_CUR );
185 continue;
186 }
187
188 } else {
189 wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
190 . ' or potentially invalid compression method' );
191 fseek( $fh, self::$CRC_size, SEEK_CUR );
192 continue;
193 }
194 }
195 $finalKeyword = self::$text_chunks[ $items[1] ];
196 $text[ $finalKeyword ][ $items[3] ] = $items[5];
197 $text[ $finalKeyword ]['_type'] = 'lang';
198
199 } else {
200 // Error reading iTXt chunk
201 throw new Exception( __METHOD__ . ": Read error on iTXt chunk" );
202 }
203
204 } elseif ( $chunk_type == 'tEXt' ) {
205 $buf = self::read( $fh, $chunk_size );
206 $keyword = '';
207 $content = '';
208
209 list( $keyword, $content ) = explode( "\x00", $buf, 2 );
210 if ( $keyword === '' || $content === '' ) {
211 throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
212 }
213
214 // Theoretically should be case-sensitive, but in practise...
215 $keyword = strtolower( $keyword );
216 if ( !isset( self::$text_chunks[ $keyword ] ) ) {
217 // Don't recognize chunk, so skip.
218 fseek( $fh, self::$CRC_size, SEEK_CUR );
219 continue;
220 }
221 wfSuppressWarnings();
222 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
223 wfRestoreWarnings();
224
225 if ( $content === false ) {
226 throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
227 }
228
229 $finalKeyword = self::$text_chunks[ $keyword ];
230 $text[ $finalKeyword ][ 'x-default' ] = $content;
231 $text[ $finalKeyword ]['_type'] = 'lang';
232
233 } elseif ( $chunk_type == 'zTXt' ) {
234 if ( function_exists( 'gzuncompress' ) ) {
235 $buf = self::read( $fh, $chunk_size );
236 $keyword = '';
237 $postKeyword = '';
238
239 list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
240 if ( $keyword === '' || $postKeyword === '' ) {
241 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
242 }
243 // Theoretically should be case-sensitive, but in practise...
244 $keyword = strtolower( $keyword );
245
246 if ( !isset( self::$text_chunks[ $keyword ] ) ) {
247 // Don't recognize chunk, so skip.
248 fseek( $fh, self::$CRC_size, SEEK_CUR );
249 continue;
250 }
251 $compression = substr( $postKeyword, 0, 1 );
252 $content = substr( $postKeyword, 1 );
253 if ( $compression !== "\x00" ) {
254 wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping." );
255 fseek( $fh, self::$CRC_size, SEEK_CUR );
256 continue;
257 }
258
259 wfSuppressWarnings();
260 $content = gzuncompress( $content );
261 wfRestoreWarnings();
262
263 if ( $content === false ) {
264 // decompression failed
265 wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword );
266 fseek( $fh, self::$CRC_size, SEEK_CUR );
267 continue;
268 }
269
270 wfSuppressWarnings();
271 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
272 wfRestoreWarnings();
273
274 if ( $content === false ) {
275 throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
276 }
277
278 $finalKeyword = self::$text_chunks[ $keyword ];
279 $text[ $finalKeyword ][ 'x-default' ] = $content;
280 $text[ $finalKeyword ]['_type'] = 'lang';
281
282 } else {
283 wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping." );
284 fseek( $fh, $chunk_size, SEEK_CUR );
285 }
286 } elseif ( $chunk_type == 'tIME' ) {
287 // last mod timestamp.
288 if ( $chunk_size !== 7 ) {
289 throw new Exception( __METHOD__ . ": tIME wrong size" );
290 }
291 $buf = self::read( $fh, $chunk_size );
292 if ( !$buf ) {
293 throw new Exception( __METHOD__ . ": Read error" );
294 }
295
296 // Note: spec says this should be UTC.
297 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
298 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
299 $t['y'], $t['m'], $t['d'], $t['h'],
300 $t['min'], $t['s'] );
301
302 $exifTime = wfTimestamp( TS_EXIF, $strTime );
303
304 if ( $exifTime ) {
305 $text['DateTime'] = $exifTime;
306 }
307
308 } elseif ( $chunk_type == 'pHYs' ) {
309 // how big pixels are (dots per meter).
310 if ( $chunk_size !== 9 ) {
311 throw new Exception( __METHOD__ . ": pHYs wrong size" );
312 }
313
314 $buf = self::read( $fh, $chunk_size );
315 if ( !$buf ) {
316 throw new Exception( __METHOD__ . ": Read error" );
317 }
318
319 $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
320 if ( $dim['unit'] == 1 ) {
321 // unit is meters
322 // (as opposed to 0 = undefined )
323 $text['XResolution'] = $dim['width']
324 . '/100';
325 $text['YResolution'] = $dim['height']
326 . '/100';
327 $text['ResolutionUnit'] = 3;
328 // 3 = dots per cm (from Exif).
329 }
330
331 } elseif ( $chunk_type == "IEND" ) {
332 break;
333 } else {
334 fseek( $fh, $chunk_size, SEEK_CUR );
335 }
336 fseek( $fh, self::$CRC_size, SEEK_CUR );
337 }
338 fclose( $fh );
339
340 if ( $loopCount > 1 ) {
341 $duration *= $loopCount;
342 }
343
344 if ( isset( $text['DateTimeDigitized'] ) ) {
345 // Convert date format from rfc2822 to exif.
346 foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
347 if ( $name === '_type' ) {
348 continue;
349 }
350
351 // fixme: currently timezones are ignored.
352 // possibly should be wfTimestamp's
353 // responsibility. (at least for numeric TZ)
354 $formatted = wfTimestamp( TS_EXIF, $value );
355 if ( $formatted ) {
356 // Only change if we could convert the
357 // date.
358 // The png standard says it should be
359 // in rfc2822 format, but not required.
360 // In general for the exif stuff we
361 // prettify the date if we can, but we
362 // display as-is if we cannot or if
363 // it is invalid.
364 // So do the same here.
365
366 $value = $formatted;
367 }
368 }
369 }
370 return array(
371 'frameCount' => $frameCount,
372 'loopCount' => $loopCount,
373 'duration' => $duration,
374 'text' => $text,
375 'duration' => $duration,
376 'bitDepth' => $bitDepth,
377 'colorType' => $colorType,
378 );
379
380 }
381 /**
382 * Read a chunk, checking to make sure its not too big.
383 *
384 * @param $fh resource The file handle
385 * @param $size Integer size in bytes.
386 * @throws Exception if too big.
387 * @return String The chunk.
388 */
389 static private function read( $fh, $size ) {
390 if ( $size > self::MAX_CHUNK_SIZE ) {
391 throw new Exception( __METHOD__ . ': Chunk size of ' . $size .
392 ' too big. Max size is: ' . self::MAX_CHUNK_SIZE );
393 }
394 return fread( $fh, $size );
395 }
396 }