Merge "Get timestamp from WikiPage, instead of Article"
[lhc/web/wiklou.git] / includes / media / PNGMetadataExtractor.php
1 <?php
2 /**
3 * PNG frame counter and metadata extractor.
4 *
5 * Slightly derived from GIFMetadataExtractor.php
6 * Deliberately not using MWExceptions to avoid external dependencies, encouraging
7 * redistribution.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 * http://www.gnu.org/copyleft/gpl.html
23 *
24 * @file
25 * @ingroup Media
26 */
27
28 /**
29 * PNG frame counter.
30 *
31 * @ingroup Media
32 */
33 class PNGMetadataExtractor {
34 /** @var string */
35 private static $pngSig;
36
37 /** @var int */
38 private static $crcSize;
39
40 /** @var array */
41 private static $textChunks;
42
43 const VERSION = 1;
44 const MAX_CHUNK_SIZE = 3145728; // 3 megabytes
45
46 static function getMetadata( $filename ) {
47 self::$pngSig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
48 self::$crcSize = 4;
49 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
50 * and http://www.w3.org/TR/PNG/#11keywords
51 */
52 self::$textChunks = [
53 'xml:com.adobe.xmp' => 'xmp',
54 # Artist is unofficial. Author is the recommended
55 # keyword in the PNG spec. However some people output
56 # Artist so support both.
57 'artist' => 'Artist',
58 'model' => 'Model',
59 'make' => 'Make',
60 'author' => 'Artist',
61 'comment' => 'PNGFileComment',
62 'description' => 'ImageDescription',
63 'title' => 'ObjectName',
64 'copyright' => 'Copyright',
65 # Source as in original device used to make image
66 # not as in who gave you the image
67 'source' => 'Model',
68 'software' => 'Software',
69 'disclaimer' => 'Disclaimer',
70 'warning' => 'ContentWarning',
71 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
72 'label' => 'Label',
73 'creation time' => 'DateTimeDigitized',
74 /* Other potentially useful things - Document */
75 ];
76
77 $frameCount = 0;
78 $loopCount = 1;
79 $text = [];
80 $duration = 0.0;
81 $bitDepth = 0;
82 $colorType = 'unknown';
83
84 if ( !$filename ) {
85 throw new Exception( __METHOD__ . ": No file name specified" );
86 } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
87 throw new Exception( __METHOD__ . ": File $filename does not exist" );
88 }
89
90 $fh = fopen( $filename, 'rb' );
91
92 if ( !$fh ) {
93 throw new Exception( __METHOD__ . ": Unable to open file $filename" );
94 }
95
96 // Check for the PNG header
97 $buf = fread( $fh, 8 );
98 if ( $buf != self::$pngSig ) {
99 throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" );
100 }
101
102 // Read chunks
103 while ( !feof( $fh ) ) {
104 $buf = fread( $fh, 4 );
105 if ( !$buf || strlen( $buf ) < 4 ) {
106 throw new Exception( __METHOD__ . ": Read error" );
107 }
108 $chunk_size = unpack( "N", $buf );
109 $chunk_size = $chunk_size[1];
110
111 if ( $chunk_size < 0 ) {
112 throw new Exception( __METHOD__ . ": Chunk size too big for unpack" );
113 }
114
115 $chunk_type = fread( $fh, 4 );
116 if ( !$chunk_type || strlen( $chunk_type ) < 4 ) {
117 throw new Exception( __METHOD__ . ": Read error" );
118 }
119
120 if ( $chunk_type == "IHDR" ) {
121 $buf = self::read( $fh, $chunk_size );
122 if ( !$buf || strlen( $buf ) < $chunk_size ) {
123 throw new Exception( __METHOD__ . ": Read error" );
124 }
125 $bitDepth = ord( substr( $buf, 8, 1 ) );
126 // Detect the color type in British English as per the spec
127 // http://www.w3.org/TR/PNG/#11IHDR
128 switch ( ord( substr( $buf, 9, 1 ) ) ) {
129 case 0:
130 $colorType = 'greyscale';
131 break;
132 case 2:
133 $colorType = 'truecolour';
134 break;
135 case 3:
136 $colorType = 'index-coloured';
137 break;
138 case 4:
139 $colorType = 'greyscale-alpha';
140 break;
141 case 6:
142 $colorType = 'truecolour-alpha';
143 break;
144 default:
145 $colorType = 'unknown';
146 break;
147 }
148 } elseif ( $chunk_type == "acTL" ) {
149 $buf = fread( $fh, $chunk_size );
150 if ( !$buf || strlen( $buf ) < $chunk_size || $chunk_size < 4 ) {
151 throw new Exception( __METHOD__ . ": Read error" );
152 }
153
154 $actl = unpack( "Nframes/Nplays", $buf );
155 $frameCount = $actl['frames'];
156 $loopCount = $actl['plays'];
157 } elseif ( $chunk_type == "fcTL" ) {
158 $buf = self::read( $fh, $chunk_size );
159 if ( !$buf || strlen( $buf ) < $chunk_size ) {
160 throw new Exception( __METHOD__ . ": Read error" );
161 }
162 $buf = substr( $buf, 20 );
163 if ( strlen( $buf ) < 4 ) {
164 throw new Exception( __METHOD__ . ": Read error" );
165 }
166
167 $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
168 if ( $fctldur['delay_den'] == 0 ) {
169 $fctldur['delay_den'] = 100;
170 }
171 if ( $fctldur['delay_num'] ) {
172 $duration += $fctldur['delay_num'] / $fctldur['delay_den'];
173 }
174 } elseif ( $chunk_type == "iTXt" ) {
175 // Extracts iTXt chunks, uncompressing if necessary.
176 $buf = self::read( $fh, $chunk_size );
177 $items = [];
178 if ( preg_match(
179 '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
180 $buf, $items )
181 ) {
182 /* $items[1] = text chunk name, $items[2] = compressed flag,
183 * $items[3] = lang code (or ""), $items[4]= compression type.
184 * $items[5] = content
185 */
186
187 // Theoretically should be case-sensitive, but in practise...
188 $items[1] = strtolower( $items[1] );
189 if ( !isset( self::$textChunks[$items[1]] ) ) {
190 // Only extract textual chunks on our list.
191 fseek( $fh, self::$crcSize, SEEK_CUR );
192 continue;
193 }
194
195 $items[3] = strtolower( $items[3] );
196 if ( $items[3] == '' ) {
197 // if no lang specified use x-default like in xmp.
198 $items[3] = 'x-default';
199 }
200
201 // if compressed
202 if ( $items[2] == "\x01" ) {
203 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
204 MediaWiki\suppressWarnings();
205 $items[5] = gzuncompress( $items[5] );
206 MediaWiki\restoreWarnings();
207
208 if ( $items[5] === false ) {
209 // decompression failed
210 wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] . "\n" );
211 fseek( $fh, self::$crcSize, SEEK_CUR );
212 continue;
213 }
214 } else {
215 wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
216 . " or potentially invalid compression method\n" );
217 fseek( $fh, self::$crcSize, SEEK_CUR );
218 continue;
219 }
220 }
221 $finalKeyword = self::$textChunks[$items[1]];
222 $text[$finalKeyword][$items[3]] = $items[5];
223 $text[$finalKeyword]['_type'] = 'lang';
224 } else {
225 // Error reading iTXt chunk
226 throw new Exception( __METHOD__ . ": Read error on iTXt chunk" );
227 }
228 } elseif ( $chunk_type == 'tEXt' ) {
229 $buf = self::read( $fh, $chunk_size );
230
231 // In case there is no \x00 which will make explode fail.
232 if ( strpos( $buf, "\x00" ) === false ) {
233 throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
234 }
235
236 list( $keyword, $content ) = explode( "\x00", $buf, 2 );
237 if ( $keyword === '' || $content === '' ) {
238 throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
239 }
240
241 // Theoretically should be case-sensitive, but in practise...
242 $keyword = strtolower( $keyword );
243 if ( !isset( self::$textChunks[$keyword] ) ) {
244 // Don't recognize chunk, so skip.
245 fseek( $fh, self::$crcSize, SEEK_CUR );
246 continue;
247 }
248 MediaWiki\suppressWarnings();
249 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
250 MediaWiki\restoreWarnings();
251
252 if ( $content === false ) {
253 throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
254 }
255
256 $finalKeyword = self::$textChunks[$keyword];
257 $text[$finalKeyword]['x-default'] = $content;
258 $text[$finalKeyword]['_type'] = 'lang';
259 } elseif ( $chunk_type == 'zTXt' ) {
260 if ( function_exists( 'gzuncompress' ) ) {
261 $buf = self::read( $fh, $chunk_size );
262
263 // In case there is no \x00 which will make explode fail.
264 if ( strpos( $buf, "\x00" ) === false ) {
265 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
266 }
267
268 list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
269 if ( $keyword === '' || $postKeyword === '' ) {
270 throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
271 }
272 // Theoretically should be case-sensitive, but in practise...
273 $keyword = strtolower( $keyword );
274
275 if ( !isset( self::$textChunks[$keyword] ) ) {
276 // Don't recognize chunk, so skip.
277 fseek( $fh, self::$crcSize, SEEK_CUR );
278 continue;
279 }
280 $compression = substr( $postKeyword, 0, 1 );
281 $content = substr( $postKeyword, 1 );
282 if ( $compression !== "\x00" ) {
283 wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping.\n" );
284 fseek( $fh, self::$crcSize, SEEK_CUR );
285 continue;
286 }
287
288 MediaWiki\suppressWarnings();
289 $content = gzuncompress( $content );
290 MediaWiki\restoreWarnings();
291
292 if ( $content === false ) {
293 // decompression failed
294 wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword . "\n" );
295 fseek( $fh, self::$crcSize, SEEK_CUR );
296 continue;
297 }
298
299 MediaWiki\suppressWarnings();
300 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
301 MediaWiki\restoreWarnings();
302
303 if ( $content === false ) {
304 throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
305 }
306
307 $finalKeyword = self::$textChunks[$keyword];
308 $text[$finalKeyword]['x-default'] = $content;
309 $text[$finalKeyword]['_type'] = 'lang';
310 } else {
311 wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping.\n" );
312 fseek( $fh, $chunk_size, SEEK_CUR );
313 }
314 } elseif ( $chunk_type == 'tIME' ) {
315 // last mod timestamp.
316 if ( $chunk_size !== 7 ) {
317 throw new Exception( __METHOD__ . ": tIME wrong size" );
318 }
319 $buf = self::read( $fh, $chunk_size );
320 if ( !$buf || strlen( $buf ) < $chunk_size ) {
321 throw new Exception( __METHOD__ . ": Read error" );
322 }
323
324 // Note: spec says this should be UTC.
325 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
326 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
327 $t['y'], $t['m'], $t['d'], $t['h'],
328 $t['min'], $t['s'] );
329
330 $exifTime = wfTimestamp( TS_EXIF, $strTime );
331
332 if ( $exifTime ) {
333 $text['DateTime'] = $exifTime;
334 }
335 } elseif ( $chunk_type == 'pHYs' ) {
336 // how big pixels are (dots per meter).
337 if ( $chunk_size !== 9 ) {
338 throw new Exception( __METHOD__ . ": pHYs wrong size" );
339 }
340
341 $buf = self::read( $fh, $chunk_size );
342 if ( !$buf || strlen( $buf ) < $chunk_size ) {
343 throw new Exception( __METHOD__ . ": Read error" );
344 }
345
346 $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
347 if ( $dim['unit'] == 1 ) {
348 // Need to check for negative because php
349 // doesn't deal with super-large unsigned 32-bit ints well
350 if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
351 // unit is meters
352 // (as opposed to 0 = undefined )
353 $text['XResolution'] = $dim['width']
354 . '/100';
355 $text['YResolution'] = $dim['height']
356 . '/100';
357 $text['ResolutionUnit'] = 3;
358 // 3 = dots per cm (from Exif).
359 }
360 }
361 } elseif ( $chunk_type == "IEND" ) {
362 break;
363 } else {
364 fseek( $fh, $chunk_size, SEEK_CUR );
365 }
366 fseek( $fh, self::$crcSize, SEEK_CUR );
367 }
368 fclose( $fh );
369
370 if ( $loopCount > 1 ) {
371 $duration *= $loopCount;
372 }
373
374 if ( isset( $text['DateTimeDigitized'] ) ) {
375 // Convert date format from rfc2822 to exif.
376 foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
377 if ( $name === '_type' ) {
378 continue;
379 }
380
381 // @todo FIXME: Currently timezones are ignored.
382 // possibly should be wfTimestamp's
383 // responsibility. (at least for numeric TZ)
384 $formatted = wfTimestamp( TS_EXIF, $value );
385 if ( $formatted ) {
386 // Only change if we could convert the
387 // date.
388 // The png standard says it should be
389 // in rfc2822 format, but not required.
390 // In general for the exif stuff we
391 // prettify the date if we can, but we
392 // display as-is if we cannot or if
393 // it is invalid.
394 // So do the same here.
395
396 $value = $formatted;
397 }
398 }
399 }
400
401 return [
402 'frameCount' => $frameCount,
403 'loopCount' => $loopCount,
404 'duration' => $duration,
405 'text' => $text,
406 'bitDepth' => $bitDepth,
407 'colorType' => $colorType,
408 ];
409 }
410
411 /**
412 * Read a chunk, checking to make sure its not too big.
413 *
414 * @param resource $fh The file handle
415 * @param int $size Size in bytes.
416 * @throws Exception If too big
417 * @return string The chunk.
418 */
419 private static function read( $fh, $size ) {
420 if ( $size > self::MAX_CHUNK_SIZE ) {
421 throw new Exception( __METHOD__ . ': Chunk size of ' . $size .
422 ' too big. Max size is: ' . self::MAX_CHUNK_SIZE );
423 }
424
425 return fread( $fh, $size );
426 }
427 }