(bug 19195) Make user IDs more readily available with the API
[lhc/web/wiklou.git] / includes / media / PNGMetadataExtractor.php
1 <?php
2 /**
3 * PNG frame counter and metadata extractor.
4 * Slightly derived from GIFMetadataExtractor.php
5 * Deliberately not using MWExceptions to avoid external dependencies, encouraging
6 * redistribution.
7 *
8 * @file
9 * @ingroup Media
10 */
11
12 /**
13 * PNG frame counter.
14 *
15 * @ingroup Media
16 */
17 class PNGMetadataExtractor {
18 static $png_sig;
19 static $CRC_size;
20 static $text_chunks;
21
22 const VERSION = 1;
23 const MAX_CHUNK_SIZE = 3145728; // 3 megabytes
24
25 static function getMetadata( $filename ) {
26 self::$png_sig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
27 self::$CRC_size = 4;
28 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
29 * and http://www.w3.org/TR/PNG/#11keywords
30 */
31 self::$text_chunks = array(
32 'xml:com.adobe.xmp' => 'xmp',
33 # Artist is unofficial. Author is the recommended
34 # keyword in the PNG spec. However some people output
35 # Artist so support both.
36 'artist' => 'Artist',
37 'model' => 'Model',
38 'make' => 'Make',
39 'author' => 'Artist',
40 'comment' => 'PNGFileComment',
41 'description' => 'ImageDescription',
42 'title' => 'ObjectName',
43 'copyright' => 'Copyright',
44 # Source as in original device used to make image
45 # not as in who gave you the image
46 'source' => 'Model',
47 'software' => 'Software',
48 'disclaimer' => 'Disclaimer',
49 'warning' => 'ContentWarning',
50 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
51 'label' => 'Label',
52 'creation time' => 'DateTimeDigitized',
53 /* Other potentially useful things - Document */
54 );
55
56 $frameCount = 0;
57 $loopCount = 1;
58 $text = array();
59 $duration = 0.0;
60 $bitDepth = 0;
61 $colorType = 'unknown';
62
63 if ( !$filename ) {
64 throw new Exception( __METHOD__ . ": No file name specified" );
65 } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
66 throw new Exception( __METHOD__ . ": File $filename does not exist" );
67 }
68
69 $fh = fopen( $filename, 'rb' );
70
71 if ( !$fh ) {
72 throw new Exception( __METHOD__ . ": Unable to open file $filename" );
73 }
74
75 // Check for the PNG header
76 $buf = fread( $fh, 8 );
77 if ( $buf != self::$png_sig ) {
78 throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" );
79 }
80
81 // Read chunks
82 while ( !feof( $fh ) ) {
83 $buf = fread( $fh, 4 );
84 if ( !$buf || strlen( $buf ) < 4 ) {
85 throw new Exception( __METHOD__ . ": Read error" );
86 }
87 $chunk_size = unpack( "N", $buf );
88 $chunk_size = $chunk_size[1];
89
90 if ( $chunk_size < 0 ) {
91 throw new Exception( __METHOD__ . ": Chunk size too big for unpack" );
92 }
93
94 $chunk_type = fread( $fh, 4 );
95 if ( !$chunk_type || strlen( $chunk_type ) < 4 ) {
96 throw new Exception( __METHOD__ . ": Read error" );
97 }
98
99 if ( $chunk_type == "IHDR" ) {
100 $buf = self::read( $fh, $chunk_size );
101 if ( !$buf || strlen( $buf ) < $chunk_size ) {
102 throw new Exception( __METHOD__ . ": Read error" );
103 }
104 $bitDepth = ord( substr( $buf, 8, 1 ) );
105 // Detect the color type in British English as per the spec
106 // http://www.w3.org/TR/PNG/#11IHDR
107 switch ( ord( substr( $buf, 9, 1 ) ) ) {
108 case 0:
109 $colorType = 'greyscale';
110 break;
111 case 2:
112 $colorType = 'truecolour';
113 break;
114 case 3:
115 $colorType = 'index-coloured';
116 break;
117 case 4:
118 $colorType = 'greyscale-alpha';
119 break;
120 case 6:
121 $colorType = 'truecolour-alpha';
122 break;
123 default:
124 $colorType = 'unknown';
125 break;
126 }
127 } elseif ( $chunk_type == "acTL" ) {
128 $buf = fread( $fh, $chunk_size );
129 if( !$buf || strlen( $buf ) < $chunk_size || $chunk_size < 4 ) {
130 throw new Exception( __METHOD__ . ": Read error" );
131 }
132
133 $actl = unpack( "Nframes/Nplays", $buf );
134 $frameCount = $actl['frames'];
135 $loopCount = $actl['plays'];
136 } elseif ( $chunk_type == "fcTL" ) {
137 $buf = self::read( $fh, $chunk_size );
138 if ( !$buf || strlen( $buf ) < $chunk_size ) {
139 throw new Exception( __METHOD__ . ": Read error" );
140 }
141 $buf = substr( $buf, 20 );
142 if ( strlen( $buf ) < 4 ) {
143 throw new Exception( __METHOD__ . ": Read error" );
144 }
145
146 $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
147 if ( $fctldur['delay_den'] == 0 ) {
148 $fctldur['delay_den'] = 100;
149 }
150 if ( $fctldur['delay_num'] ) {
151 $duration += $fctldur['delay_num'] / $fctldur['delay_den'];
152 }
153 } elseif ( $chunk_type == "iTXt" ) {
154 // Extracts iTXt chunks, uncompressing if necessary.
155 $buf = self::read( $fh, $chunk_size );
156 $items = array();
157 if ( preg_match(
158 '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
159 $buf, $items )
160 ) {
161 /* $items[1] = text chunk name, $items[2] = compressed flag,
162 * $items[3] = lang code (or ""), $items[4]= compression type.
163 * $items[5] = content
164 */
165
166 // Theoretically should be case-sensitive, but in practise...
167 $items[1] = strtolower( $items[1] );
168 if ( !isset( self::$text_chunks[$items[1]] ) ) {
169 // Only extract textual chunks on our list.
170 fseek( $fh, self::$CRC_size, SEEK_CUR );
171 continue;
172 }
173
174 $items[3] = strtolower( $items[3] );
175 if ( $items[3] == '' ) {
176 // if no lang specified use x-default like in xmp.
177 $items[3] = 'x-default';
178 }
179
180 // if compressed
181 if ( $items[2] == "\x01" ) {
182 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
183 wfSuppressWarnings();
184 $items[5] = gzuncompress( $items[5] );
185 wfRestoreWarnings();
186
187 if ( $items[5] === false ) {
188 // decompression failed
189 wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] );
190 fseek( $fh, self::$CRC_size, SEEK_CUR );
191 continue;
192 }
193
194 } else {
195 wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
196 . ' or potentially invalid compression method' );
197 fseek( $fh, self::$CRC_size, SEEK_CUR );
198 continue;
199 }
200 }
201 $finalKeyword = self::$text_chunks[ $items[1] ];
202 $text[ $finalKeyword ][ $items[3] ] = $items[5];
203 $text[ $finalKeyword ]['_type'] = 'lang';
204
205 } else {
206 // Error reading iTXt chunk
207 throw new Exception( __METHOD__ . ": Read error on iTXt chunk" );
208 }
209
210 } elseif ( $chunk_type == 'tEXt' ) {
211 $buf = self::read( $fh, $chunk_size );
212
213 // In case there is no \x00 which will make explode fail.
214 if ( strpos( $buf, "\x00" ) === false ) {
215 throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
216 }
217
218 list( $keyword, $content ) = explode( "\x00", $buf, 2 );
219 if ( $keyword === '' || $content === '' ) {
220 throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
221 }
222
223 // Theoretically should be case-sensitive, but in practise...
224 $keyword = strtolower( $keyword );
225 if ( !isset( self::$text_chunks[ $keyword ] ) ) {
226 // Don't recognize chunk, so skip.
227 fseek( $fh, self::$CRC_size, SEEK_CUR );
228 continue;
229 }
230 wfSuppressWarnings();
231 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
232 wfRestoreWarnings();
233
234 if ( $content === false ) {
235 throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
236 }
237
238 $finalKeyword = self::$text_chunks[ $keyword ];
239 $text[ $finalKeyword ][ 'x-default' ] = $content;
240 $text[ $finalKeyword ]['_type'] = 'lang';
241
242 } elseif ( $chunk_type == 'zTXt' ) {
243 if ( function_exists( 'gzuncompress' ) ) {
244 $buf = self::read( $fh, $chunk_size );
245
246 // In case there is no \x00 which will make explode fail.
247 if ( strpos( $buf, "\x00" ) === false ) {
248 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
249 }
250
251 list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
252 if ( $keyword === '' || $postKeyword === '' ) {
253 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
254 }
255 // Theoretically should be case-sensitive, but in practise...
256 $keyword = strtolower( $keyword );
257
258 if ( !isset( self::$text_chunks[ $keyword ] ) ) {
259 // Don't recognize chunk, so skip.
260 fseek( $fh, self::$CRC_size, SEEK_CUR );
261 continue;
262 }
263 $compression = substr( $postKeyword, 0, 1 );
264 $content = substr( $postKeyword, 1 );
265 if ( $compression !== "\x00" ) {
266 wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping." );
267 fseek( $fh, self::$CRC_size, SEEK_CUR );
268 continue;
269 }
270
271 wfSuppressWarnings();
272 $content = gzuncompress( $content );
273 wfRestoreWarnings();
274
275 if ( $content === false ) {
276 // decompression failed
277 wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword );
278 fseek( $fh, self::$CRC_size, SEEK_CUR );
279 continue;
280 }
281
282 wfSuppressWarnings();
283 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
284 wfRestoreWarnings();
285
286 if ( $content === false ) {
287 throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
288 }
289
290 $finalKeyword = self::$text_chunks[ $keyword ];
291 $text[ $finalKeyword ][ 'x-default' ] = $content;
292 $text[ $finalKeyword ]['_type'] = 'lang';
293
294 } else {
295 wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping." );
296 fseek( $fh, $chunk_size, SEEK_CUR );
297 }
298 } elseif ( $chunk_type == 'tIME' ) {
299 // last mod timestamp.
300 if ( $chunk_size !== 7 ) {
301 throw new Exception( __METHOD__ . ": tIME wrong size" );
302 }
303 $buf = self::read( $fh, $chunk_size );
304 if ( !$buf || strlen( $buf ) < $chunk_size ) {
305 throw new Exception( __METHOD__ . ": Read error" );
306 }
307
308 // Note: spec says this should be UTC.
309 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
310 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
311 $t['y'], $t['m'], $t['d'], $t['h'],
312 $t['min'], $t['s'] );
313
314 $exifTime = wfTimestamp( TS_EXIF, $strTime );
315
316 if ( $exifTime ) {
317 $text['DateTime'] = $exifTime;
318 }
319
320 } elseif ( $chunk_type == 'pHYs' ) {
321 // how big pixels are (dots per meter).
322 if ( $chunk_size !== 9 ) {
323 throw new Exception( __METHOD__ . ": pHYs wrong size" );
324 }
325
326 $buf = self::read( $fh, $chunk_size );
327 if ( !$buf || strlen( $buf ) < $chunk_size ) {
328 throw new Exception( __METHOD__ . ": Read error" );
329 }
330
331 $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
332 if ( $dim['unit'] == 1 ) {
333 // Need to check for negative because php
334 // doesn't deal with super-large unsigned 32-bit ints well
335 if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
336 // unit is meters
337 // (as opposed to 0 = undefined )
338 $text['XResolution'] = $dim['width']
339 . '/100';
340 $text['YResolution'] = $dim['height']
341 . '/100';
342 $text['ResolutionUnit'] = 3;
343 // 3 = dots per cm (from Exif).
344 }
345 }
346
347 } elseif ( $chunk_type == "IEND" ) {
348 break;
349 } else {
350 fseek( $fh, $chunk_size, SEEK_CUR );
351 }
352 fseek( $fh, self::$CRC_size, SEEK_CUR );
353 }
354 fclose( $fh );
355
356 if ( $loopCount > 1 ) {
357 $duration *= $loopCount;
358 }
359
360 if ( isset( $text['DateTimeDigitized'] ) ) {
361 // Convert date format from rfc2822 to exif.
362 foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
363 if ( $name === '_type' ) {
364 continue;
365 }
366
367 // @todo FIXME: Currently timezones are ignored.
368 // possibly should be wfTimestamp's
369 // responsibility. (at least for numeric TZ)
370 $formatted = wfTimestamp( TS_EXIF, $value );
371 if ( $formatted ) {
372 // Only change if we could convert the
373 // date.
374 // The png standard says it should be
375 // in rfc2822 format, but not required.
376 // In general for the exif stuff we
377 // prettify the date if we can, but we
378 // display as-is if we cannot or if
379 // it is invalid.
380 // So do the same here.
381
382 $value = $formatted;
383 }
384 }
385 }
386 return array(
387 'frameCount' => $frameCount,
388 'loopCount' => $loopCount,
389 'duration' => $duration,
390 'text' => $text,
391 'bitDepth' => $bitDepth,
392 'colorType' => $colorType,
393 );
394
395 }
396 /**
397 * Read a chunk, checking to make sure its not too big.
398 *
399 * @param $fh resource The file handle
400 * @param $size Integer size in bytes.
401 * @throws Exception if too big.
402 * @return String The chunk.
403 */
404 static private function read( $fh, $size ) {
405 if ( $size > self::MAX_CHUNK_SIZE ) {
406 throw new Exception( __METHOD__ . ': Chunk size of ' . $size .
407 ' too big. Max size is: ' . self::MAX_CHUNK_SIZE );
408 }
409 return fread( $fh, $size );
410 }
411 }