Merge "Define 'MW_UPDATER' when running update.php"
[lhc/web/wiklou.git] / includes / media / PNGMetadataExtractor.php
1 <?php
2 /**
3 * PNG frame counter and metadata extractor.
4 *
5 * Slightly derived from GIFMetadataExtractor.php
6 * Deliberately not using MWExceptions to avoid external dependencies, encouraging
7 * redistribution.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 * http://www.gnu.org/copyleft/gpl.html
23 *
24 * @file
25 * @ingroup Media
26 */
27
28 /**
29 * PNG frame counter.
30 *
31 * @ingroup Media
32 */
33 class PNGMetadataExtractor {
34 static $png_sig;
35
36 static $CRC_size;
37
38 static $text_chunks;
39
40 const VERSION = 1;
41 const MAX_CHUNK_SIZE = 3145728; // 3 megabytes
42
43 static function getMetadata( $filename ) {
44 self::$png_sig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
45 self::$CRC_size = 4;
46 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
47 * and http://www.w3.org/TR/PNG/#11keywords
48 */
49 self::$text_chunks = array(
50 'xml:com.adobe.xmp' => 'xmp',
51 # Artist is unofficial. Author is the recommended
52 # keyword in the PNG spec. However some people output
53 # Artist so support both.
54 'artist' => 'Artist',
55 'model' => 'Model',
56 'make' => 'Make',
57 'author' => 'Artist',
58 'comment' => 'PNGFileComment',
59 'description' => 'ImageDescription',
60 'title' => 'ObjectName',
61 'copyright' => 'Copyright',
62 # Source as in original device used to make image
63 # not as in who gave you the image
64 'source' => 'Model',
65 'software' => 'Software',
66 'disclaimer' => 'Disclaimer',
67 'warning' => 'ContentWarning',
68 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
69 'label' => 'Label',
70 'creation time' => 'DateTimeDigitized',
71 /* Other potentially useful things - Document */
72 );
73
74 $frameCount = 0;
75 $loopCount = 1;
76 $text = array();
77 $duration = 0.0;
78 $bitDepth = 0;
79 $colorType = 'unknown';
80
81 if ( !$filename ) {
82 throw new Exception( __METHOD__ . ": No file name specified" );
83 } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
84 throw new Exception( __METHOD__ . ": File $filename does not exist" );
85 }
86
87 $fh = fopen( $filename, 'rb' );
88
89 if ( !$fh ) {
90 throw new Exception( __METHOD__ . ": Unable to open file $filename" );
91 }
92
93 // Check for the PNG header
94 $buf = fread( $fh, 8 );
95 if ( $buf != self::$png_sig ) {
96 throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" );
97 }
98
99 // Read chunks
100 while ( !feof( $fh ) ) {
101 $buf = fread( $fh, 4 );
102 if ( !$buf || strlen( $buf ) < 4 ) {
103 throw new Exception( __METHOD__ . ": Read error" );
104 }
105 $chunk_size = unpack( "N", $buf );
106 $chunk_size = $chunk_size[1];
107
108 if ( $chunk_size < 0 ) {
109 throw new Exception( __METHOD__ . ": Chunk size too big for unpack" );
110 }
111
112 $chunk_type = fread( $fh, 4 );
113 if ( !$chunk_type || strlen( $chunk_type ) < 4 ) {
114 throw new Exception( __METHOD__ . ": Read error" );
115 }
116
117 if ( $chunk_type == "IHDR" ) {
118 $buf = self::read( $fh, $chunk_size );
119 if ( !$buf || strlen( $buf ) < $chunk_size ) {
120 throw new Exception( __METHOD__ . ": Read error" );
121 }
122 $bitDepth = ord( substr( $buf, 8, 1 ) );
123 // Detect the color type in British English as per the spec
124 // http://www.w3.org/TR/PNG/#11IHDR
125 switch ( ord( substr( $buf, 9, 1 ) ) ) {
126 case 0:
127 $colorType = 'greyscale';
128 break;
129 case 2:
130 $colorType = 'truecolour';
131 break;
132 case 3:
133 $colorType = 'index-coloured';
134 break;
135 case 4:
136 $colorType = 'greyscale-alpha';
137 break;
138 case 6:
139 $colorType = 'truecolour-alpha';
140 break;
141 default:
142 $colorType = 'unknown';
143 break;
144 }
145 } elseif ( $chunk_type == "acTL" ) {
146 $buf = fread( $fh, $chunk_size );
147 if ( !$buf || strlen( $buf ) < $chunk_size || $chunk_size < 4 ) {
148 throw new Exception( __METHOD__ . ": Read error" );
149 }
150
151 $actl = unpack( "Nframes/Nplays", $buf );
152 $frameCount = $actl['frames'];
153 $loopCount = $actl['plays'];
154 } elseif ( $chunk_type == "fcTL" ) {
155 $buf = self::read( $fh, $chunk_size );
156 if ( !$buf || strlen( $buf ) < $chunk_size ) {
157 throw new Exception( __METHOD__ . ": Read error" );
158 }
159 $buf = substr( $buf, 20 );
160 if ( strlen( $buf ) < 4 ) {
161 throw new Exception( __METHOD__ . ": Read error" );
162 }
163
164 $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
165 if ( $fctldur['delay_den'] == 0 ) {
166 $fctldur['delay_den'] = 100;
167 }
168 if ( $fctldur['delay_num'] ) {
169 $duration += $fctldur['delay_num'] / $fctldur['delay_den'];
170 }
171 } elseif ( $chunk_type == "iTXt" ) {
172 // Extracts iTXt chunks, uncompressing if necessary.
173 $buf = self::read( $fh, $chunk_size );
174 $items = array();
175 if ( preg_match(
176 '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
177 $buf, $items )
178 ) {
179 /* $items[1] = text chunk name, $items[2] = compressed flag,
180 * $items[3] = lang code (or ""), $items[4]= compression type.
181 * $items[5] = content
182 */
183
184 // Theoretically should be case-sensitive, but in practise...
185 $items[1] = strtolower( $items[1] );
186 if ( !isset( self::$text_chunks[$items[1]] ) ) {
187 // Only extract textual chunks on our list.
188 fseek( $fh, self::$CRC_size, SEEK_CUR );
189 continue;
190 }
191
192 $items[3] = strtolower( $items[3] );
193 if ( $items[3] == '' ) {
194 // if no lang specified use x-default like in xmp.
195 $items[3] = 'x-default';
196 }
197
198 // if compressed
199 if ( $items[2] == "\x01" ) {
200 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
201 wfSuppressWarnings();
202 $items[5] = gzuncompress( $items[5] );
203 wfRestoreWarnings();
204
205 if ( $items[5] === false ) {
206 // decompression failed
207 wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] . "\n" );
208 fseek( $fh, self::$CRC_size, SEEK_CUR );
209 continue;
210 }
211 } else {
212 wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
213 . " or potentially invalid compression method\n" );
214 fseek( $fh, self::$CRC_size, SEEK_CUR );
215 continue;
216 }
217 }
218 $finalKeyword = self::$text_chunks[$items[1]];
219 $text[$finalKeyword][$items[3]] = $items[5];
220 $text[$finalKeyword]['_type'] = 'lang';
221 } else {
222 // Error reading iTXt chunk
223 throw new Exception( __METHOD__ . ": Read error on iTXt chunk" );
224 }
225 } elseif ( $chunk_type == 'tEXt' ) {
226 $buf = self::read( $fh, $chunk_size );
227
228 // In case there is no \x00 which will make explode fail.
229 if ( strpos( $buf, "\x00" ) === false ) {
230 throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
231 }
232
233 list( $keyword, $content ) = explode( "\x00", $buf, 2 );
234 if ( $keyword === '' || $content === '' ) {
235 throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
236 }
237
238 // Theoretically should be case-sensitive, but in practise...
239 $keyword = strtolower( $keyword );
240 if ( !isset( self::$text_chunks[$keyword] ) ) {
241 // Don't recognize chunk, so skip.
242 fseek( $fh, self::$CRC_size, SEEK_CUR );
243 continue;
244 }
245 wfSuppressWarnings();
246 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
247 wfRestoreWarnings();
248
249 if ( $content === false ) {
250 throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
251 }
252
253 $finalKeyword = self::$text_chunks[$keyword];
254 $text[$finalKeyword]['x-default'] = $content;
255 $text[$finalKeyword]['_type'] = 'lang';
256 } elseif ( $chunk_type == 'zTXt' ) {
257 if ( function_exists( 'gzuncompress' ) ) {
258 $buf = self::read( $fh, $chunk_size );
259
260 // In case there is no \x00 which will make explode fail.
261 if ( strpos( $buf, "\x00" ) === false ) {
262 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
263 }
264
265 list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
266 if ( $keyword === '' || $postKeyword === '' ) {
267 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
268 }
269 // Theoretically should be case-sensitive, but in practise...
270 $keyword = strtolower( $keyword );
271
272 if ( !isset( self::$text_chunks[$keyword] ) ) {
273 // Don't recognize chunk, so skip.
274 fseek( $fh, self::$CRC_size, SEEK_CUR );
275 continue;
276 }
277 $compression = substr( $postKeyword, 0, 1 );
278 $content = substr( $postKeyword, 1 );
279 if ( $compression !== "\x00" ) {
280 wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping.\n" );
281 fseek( $fh, self::$CRC_size, SEEK_CUR );
282 continue;
283 }
284
285 wfSuppressWarnings();
286 $content = gzuncompress( $content );
287 wfRestoreWarnings();
288
289 if ( $content === false ) {
290 // decompression failed
291 wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword . "\n" );
292 fseek( $fh, self::$CRC_size, SEEK_CUR );
293 continue;
294 }
295
296 wfSuppressWarnings();
297 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
298 wfRestoreWarnings();
299
300 if ( $content === false ) {
301 throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
302 }
303
304 $finalKeyword = self::$text_chunks[$keyword];
305 $text[$finalKeyword]['x-default'] = $content;
306 $text[$finalKeyword]['_type'] = 'lang';
307 } else {
308 wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping.\n" );
309 fseek( $fh, $chunk_size, SEEK_CUR );
310 }
311 } elseif ( $chunk_type == 'tIME' ) {
312 // last mod timestamp.
313 if ( $chunk_size !== 7 ) {
314 throw new Exception( __METHOD__ . ": tIME wrong size" );
315 }
316 $buf = self::read( $fh, $chunk_size );
317 if ( !$buf || strlen( $buf ) < $chunk_size ) {
318 throw new Exception( __METHOD__ . ": Read error" );
319 }
320
321 // Note: spec says this should be UTC.
322 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
323 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
324 $t['y'], $t['m'], $t['d'], $t['h'],
325 $t['min'], $t['s'] );
326
327 $exifTime = wfTimestamp( TS_EXIF, $strTime );
328
329 if ( $exifTime ) {
330 $text['DateTime'] = $exifTime;
331 }
332 } elseif ( $chunk_type == 'pHYs' ) {
333 // how big pixels are (dots per meter).
334 if ( $chunk_size !== 9 ) {
335 throw new Exception( __METHOD__ . ": pHYs wrong size" );
336 }
337
338 $buf = self::read( $fh, $chunk_size );
339 if ( !$buf || strlen( $buf ) < $chunk_size ) {
340 throw new Exception( __METHOD__ . ": Read error" );
341 }
342
343 $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
344 if ( $dim['unit'] == 1 ) {
345 // Need to check for negative because php
346 // doesn't deal with super-large unsigned 32-bit ints well
347 if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
348 // unit is meters
349 // (as opposed to 0 = undefined )
350 $text['XResolution'] = $dim['width']
351 . '/100';
352 $text['YResolution'] = $dim['height']
353 . '/100';
354 $text['ResolutionUnit'] = 3;
355 // 3 = dots per cm (from Exif).
356 }
357 }
358 } elseif ( $chunk_type == "IEND" ) {
359 break;
360 } else {
361 fseek( $fh, $chunk_size, SEEK_CUR );
362 }
363 fseek( $fh, self::$CRC_size, SEEK_CUR );
364 }
365 fclose( $fh );
366
367 if ( $loopCount > 1 ) {
368 $duration *= $loopCount;
369 }
370
371 if ( isset( $text['DateTimeDigitized'] ) ) {
372 // Convert date format from rfc2822 to exif.
373 foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
374 if ( $name === '_type' ) {
375 continue;
376 }
377
378 // @todo FIXME: Currently timezones are ignored.
379 // possibly should be wfTimestamp's
380 // responsibility. (at least for numeric TZ)
381 $formatted = wfTimestamp( TS_EXIF, $value );
382 if ( $formatted ) {
383 // Only change if we could convert the
384 // date.
385 // The png standard says it should be
386 // in rfc2822 format, but not required.
387 // In general for the exif stuff we
388 // prettify the date if we can, but we
389 // display as-is if we cannot or if
390 // it is invalid.
391 // So do the same here.
392
393 $value = $formatted;
394 }
395 }
396 }
397
398 return array(
399 'frameCount' => $frameCount,
400 'loopCount' => $loopCount,
401 'duration' => $duration,
402 'text' => $text,
403 'bitDepth' => $bitDepth,
404 'colorType' => $colorType,
405 );
406 }
407
408 /**
409 * Read a chunk, checking to make sure its not too big.
410 *
411 * @param $fh resource The file handle
412 * @param $size Integer size in bytes.
413 * @throws Exception if too big.
414 * @return String The chunk.
415 */
416 private static function read( $fh, $size ) {
417 if ( $size > self::MAX_CHUNK_SIZE ) {
418 throw new Exception( __METHOD__ . ': Chunk size of ' . $size .
419 ' too big. Max size is: ' . self::MAX_CHUNK_SIZE );
420 }
421
422 return fread( $fh, $size );
423 }
424 }