Merge "Add support for PHP7 random_bytes in favor of mcrypt_create_iv"
[lhc/web/wiklou.git] / includes / media / FormatMetadata.php
1 <?php
2 /**
3 * Formatting of image metadata values into human readable form.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @ingroup Media
21 * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
22 * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason, 2009 Brent Garber, 2010 Brian Wolff
23 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
24 * @see http://exif.org/Exif2-2.PDF The Exif 2.2 specification
25 * @file
26 */
27 use Wikimedia\Timestamp\TimestampException;
28
29 /**
30 * Format Image metadata values into a human readable form.
31 *
32 * Note lots of these messages use the prefix 'exif' even though
33 * they may not be exif properties. For example 'exif-ImageDescription'
34 * can be the Exif ImageDescription, or it could be the iptc-iim caption
35 * property, or it could be the xmp dc:description property. This
36 * is because these messages should be independent of how the data is
37 * stored, sine the user doesn't care if the description is stored in xmp,
38 * exif, etc only that its a description. (Additionally many of these properties
39 * are merged together following the MWG standard, such that for example,
40 * exif properties override XMP properties that mean the same thing if
41 * there is a conflict).
42 *
43 * It should perhaps use a prefix like 'metadata' instead, but there
44 * is already a large number of messages using the 'exif' prefix.
45 *
46 * @ingroup Media
47 * @since 1.23 the class extends ContextSource and various formerly-public
48 * internal methods are private
49 */
50 class FormatMetadata extends ContextSource {
51 /**
52 * Only output a single language for multi-language fields
53 * @var bool
54 * @since 1.23
55 */
56 protected $singleLang = false;
57
58 /**
59 * Trigger only outputting single language for multilanguage fields
60 *
61 * @param bool $val
62 * @since 1.23
63 */
64 public function setSingleLanguage( $val ) {
65 $this->singleLang = $val;
66 }
67
68 /**
69 * Numbers given by Exif user agents are often magical, that is they
70 * should be replaced by a detailed explanation depending on their
71 * value which most of the time are plain integers. This function
72 * formats Exif (and other metadata) values into human readable form.
73 *
74 * This is the usual entry point for this class.
75 *
76 * @param array $tags The Exif data to format ( as returned by
77 * Exif::getFilteredData() or BitmapMetadataHandler )
78 * @param bool|IContextSource $context Context to use (optional)
79 * @return array
80 */
81 public static function getFormattedData( $tags, $context = false ) {
82 $obj = new FormatMetadata;
83 if ( $context ) {
84 $obj->setContext( $context );
85 }
86
87 return $obj->makeFormattedData( $tags );
88 }
89
90 /**
91 * Numbers given by Exif user agents are often magical, that is they
92 * should be replaced by a detailed explanation depending on their
93 * value which most of the time are plain integers. This function
94 * formats Exif (and other metadata) values into human readable form.
95 *
96 * @param array $tags The Exif data to format ( as returned by
97 * Exif::getFilteredData() or BitmapMetadataHandler )
98 * @return array
99 * @since 1.23
100 */
101 public function makeFormattedData( $tags ) {
102 $resolutionunit = !isset( $tags['ResolutionUnit'] ) || $tags['ResolutionUnit'] == 2 ? 2 : 3;
103 unset( $tags['ResolutionUnit'] );
104
105 foreach ( $tags as $tag => &$vals ) {
106 // This seems ugly to wrap non-array's in an array just to unwrap again,
107 // especially when most of the time it is not an array
108 if ( !is_array( $tags[$tag] ) ) {
109 $vals = [ $vals ];
110 }
111
112 // _type is a special value to say what array type
113 if ( isset( $tags[$tag]['_type'] ) ) {
114 $type = $tags[$tag]['_type'];
115 unset( $vals['_type'] );
116 } else {
117 $type = 'ul'; // default unordered list.
118 }
119
120 // This is done differently as the tag is an array.
121 if ( $tag == 'GPSTimeStamp' && count( $vals ) === 3 ) {
122 // hour min sec array
123
124 $h = explode( '/', $vals[0] );
125 $m = explode( '/', $vals[1] );
126 $s = explode( '/', $vals[2] );
127
128 // this should already be validated
129 // when loaded from file, but it could
130 // come from a foreign repo, so be
131 // paranoid.
132 if ( !isset( $h[1] )
133 || !isset( $m[1] )
134 || !isset( $s[1] )
135 || $h[1] == 0
136 || $m[1] == 0
137 || $s[1] == 0
138 ) {
139 continue;
140 }
141 $tags[$tag] = str_pad( intval( $h[0] / $h[1] ), 2, '0', STR_PAD_LEFT )
142 . ':' . str_pad( intval( $m[0] / $m[1] ), 2, '0', STR_PAD_LEFT )
143 . ':' . str_pad( intval( $s[0] / $s[1] ), 2, '0', STR_PAD_LEFT );
144
145 try {
146 $time = wfTimestamp( TS_MW, '1971:01:01 ' . $tags[$tag] );
147 // the 1971:01:01 is just a placeholder, and not shown to user.
148 if ( $time && intval( $time ) > 0 ) {
149 $tags[$tag] = $this->getLanguage()->time( $time );
150 }
151 } catch ( TimestampException $e ) {
152 // This shouldn't happen, but we've seen bad formats
153 // such as 4-digit seconds in the wild.
154 // leave $tags[$tag] as-is
155 }
156 continue;
157 }
158
159 // The contact info is a multi-valued field
160 // instead of the other props which are single
161 // valued (mostly) so handle as a special case.
162 if ( $tag === 'Contact' ) {
163 $vals = $this->collapseContactInfo( $vals );
164 continue;
165 }
166
167 foreach ( $vals as &$val ) {
168 switch ( $tag ) {
169 case 'Compression':
170 switch ( $val ) {
171 case 1:
172 case 2:
173 case 3:
174 case 4:
175 case 5:
176 case 6:
177 case 7:
178 case 8:
179 case 32773:
180 case 32946:
181 case 34712:
182 $val = $this->exifMsg( $tag, $val );
183 break;
184 default:
185 /* If not recognized, display as is. */
186 break;
187 }
188 break;
189
190 case 'PhotometricInterpretation':
191 switch ( $val ) {
192 case 0:
193 case 1:
194 case 2:
195 case 3:
196 case 4:
197 case 5:
198 case 6:
199 case 8:
200 case 9:
201 case 10:
202 case 32803:
203 case 34892:
204 $val = $this->exifMsg( $tag, $val );
205 break;
206 default:
207 /* If not recognized, display as is. */
208 break;
209 }
210 break;
211
212 case 'Orientation':
213 switch ( $val ) {
214 case 1:
215 case 2:
216 case 3:
217 case 4:
218 case 5:
219 case 6:
220 case 7:
221 case 8:
222 $val = $this->exifMsg( $tag, $val );
223 break;
224 default:
225 /* If not recognized, display as is. */
226 break;
227 }
228 break;
229
230 case 'PlanarConfiguration':
231 switch ( $val ) {
232 case 1:
233 case 2:
234 $val = $this->exifMsg( $tag, $val );
235 break;
236 default:
237 /* If not recognized, display as is. */
238 break;
239 }
240 break;
241
242 // TODO: YCbCrSubSampling
243 case 'YCbCrPositioning':
244 switch ( $val ) {
245 case 1:
246 case 2:
247 $val = $this->exifMsg( $tag, $val );
248 break;
249 default:
250 /* If not recognized, display as is. */
251 break;
252 }
253 break;
254
255 case 'XResolution':
256 case 'YResolution':
257 switch ( $resolutionunit ) {
258 case 2:
259 $val = $this->exifMsg( 'XYResolution', 'i', $this->formatNum( $val ) );
260 break;
261 case 3:
262 $val = $this->exifMsg( 'XYResolution', 'c', $this->formatNum( $val ) );
263 break;
264 default:
265 /* If not recognized, display as is. */
266 break;
267 }
268 break;
269
270 // TODO: YCbCrCoefficients #p27 (see annex E)
271 case 'ExifVersion':
272 case 'FlashpixVersion':
273 $val = "$val" / 100;
274 break;
275
276 case 'ColorSpace':
277 switch ( $val ) {
278 case 1:
279 case 65535:
280 $val = $this->exifMsg( $tag, $val );
281 break;
282 default:
283 /* If not recognized, display as is. */
284 break;
285 }
286 break;
287
288 case 'ComponentsConfiguration':
289 switch ( $val ) {
290 case 0:
291 case 1:
292 case 2:
293 case 3:
294 case 4:
295 case 5:
296 case 6:
297 $val = $this->exifMsg( $tag, $val );
298 break;
299 default:
300 /* If not recognized, display as is. */
301 break;
302 }
303 break;
304
305 case 'DateTime':
306 case 'DateTimeOriginal':
307 case 'DateTimeDigitized':
308 case 'DateTimeReleased':
309 case 'DateTimeExpires':
310 case 'GPSDateStamp':
311 case 'dc-date':
312 case 'DateTimeMetadata':
313 if ( $val == '0000:00:00 00:00:00' || $val == ' : : : : ' ) {
314 $val = $this->msg( 'exif-unknowndate' )->text();
315 } elseif ( preg_match(
316 '/^(?:\d{4}):(?:\d\d):(?:\d\d) (?:\d\d):(?:\d\d):(?:\d\d)$/D',
317 $val
318 ) ) {
319 // Full date.
320 $time = wfTimestamp( TS_MW, $val );
321 if ( $time && intval( $time ) > 0 ) {
322 $val = $this->getLanguage()->timeanddate( $time );
323 }
324 } elseif ( preg_match( '/^(?:\d{4}):(?:\d\d):(?:\d\d) (?:\d\d):(?:\d\d)$/D', $val ) ) {
325 // No second field. Still format the same
326 // since timeanddate doesn't include seconds anyways,
327 // but second still available in api
328 $time = wfTimestamp( TS_MW, $val . ':00' );
329 if ( $time && intval( $time ) > 0 ) {
330 $val = $this->getLanguage()->timeanddate( $time );
331 }
332 } elseif ( preg_match( '/^(?:\d{4}):(?:\d\d):(?:\d\d)$/D', $val ) ) {
333 // If only the date but not the time is filled in.
334 $time = wfTimestamp( TS_MW, substr( $val, 0, 4 )
335 . substr( $val, 5, 2 )
336 . substr( $val, 8, 2 )
337 . '000000' );
338 if ( $time && intval( $time ) > 0 ) {
339 $val = $this->getLanguage()->date( $time );
340 }
341 }
342 // else it will just output $val without formatting it.
343 break;
344
345 case 'ExposureProgram':
346 switch ( $val ) {
347 case 0:
348 case 1:
349 case 2:
350 case 3:
351 case 4:
352 case 5:
353 case 6:
354 case 7:
355 case 8:
356 $val = $this->exifMsg( $tag, $val );
357 break;
358 default:
359 /* If not recognized, display as is. */
360 break;
361 }
362 break;
363
364 case 'SubjectDistance':
365 $val = $this->exifMsg( $tag, '', $this->formatNum( $val ) );
366 break;
367
368 case 'MeteringMode':
369 switch ( $val ) {
370 case 0:
371 case 1:
372 case 2:
373 case 3:
374 case 4:
375 case 5:
376 case 6:
377 case 7:
378 case 255:
379 $val = $this->exifMsg( $tag, $val );
380 break;
381 default:
382 /* If not recognized, display as is. */
383 break;
384 }
385 break;
386
387 case 'LightSource':
388 switch ( $val ) {
389 case 0:
390 case 1:
391 case 2:
392 case 3:
393 case 4:
394 case 9:
395 case 10:
396 case 11:
397 case 12:
398 case 13:
399 case 14:
400 case 15:
401 case 17:
402 case 18:
403 case 19:
404 case 20:
405 case 21:
406 case 22:
407 case 23:
408 case 24:
409 case 255:
410 $val = $this->exifMsg( $tag, $val );
411 break;
412 default:
413 /* If not recognized, display as is. */
414 break;
415 }
416 break;
417
418 case 'Flash':
419 $flashDecode = [
420 'fired' => $val & 0b00000001,
421 'return' => ( $val & 0b00000110 ) >> 1,
422 'mode' => ( $val & 0b00011000 ) >> 3,
423 'function' => ( $val & 0b00100000 ) >> 5,
424 'redeye' => ( $val & 0b01000000 ) >> 6,
425 // 'reserved' => ( $val & 0b10000000 ) >> 7,
426 ];
427 $flashMsgs = [];
428 # We do not need to handle unknown values since all are used.
429 foreach ( $flashDecode as $subTag => $subValue ) {
430 # We do not need any message for zeroed values.
431 if ( $subTag != 'fired' && $subValue == 0 ) {
432 continue;
433 }
434 $fullTag = $tag . '-' . $subTag;
435 $flashMsgs[] = $this->exifMsg( $fullTag, $subValue );
436 }
437 $val = $this->getLanguage()->commaList( $flashMsgs );
438 break;
439
440 case 'FocalPlaneResolutionUnit':
441 switch ( $val ) {
442 case 2:
443 $val = $this->exifMsg( $tag, $val );
444 break;
445 default:
446 /* If not recognized, display as is. */
447 break;
448 }
449 break;
450
451 case 'SensingMethod':
452 switch ( $val ) {
453 case 1:
454 case 2:
455 case 3:
456 case 4:
457 case 5:
458 case 7:
459 case 8:
460 $val = $this->exifMsg( $tag, $val );
461 break;
462 default:
463 /* If not recognized, display as is. */
464 break;
465 }
466 break;
467
468 case 'FileSource':
469 switch ( $val ) {
470 case 3:
471 $val = $this->exifMsg( $tag, $val );
472 break;
473 default:
474 /* If not recognized, display as is. */
475 break;
476 }
477 break;
478
479 case 'SceneType':
480 switch ( $val ) {
481 case 1:
482 $val = $this->exifMsg( $tag, $val );
483 break;
484 default:
485 /* If not recognized, display as is. */
486 break;
487 }
488 break;
489
490 case 'CustomRendered':
491 switch ( $val ) {
492 case 0:
493 case 1:
494 $val = $this->exifMsg( $tag, $val );
495 break;
496 default:
497 /* If not recognized, display as is. */
498 break;
499 }
500 break;
501
502 case 'ExposureMode':
503 switch ( $val ) {
504 case 0:
505 case 1:
506 case 2:
507 $val = $this->exifMsg( $tag, $val );
508 break;
509 default:
510 /* If not recognized, display as is. */
511 break;
512 }
513 break;
514
515 case 'WhiteBalance':
516 switch ( $val ) {
517 case 0:
518 case 1:
519 $val = $this->exifMsg( $tag, $val );
520 break;
521 default:
522 /* If not recognized, display as is. */
523 break;
524 }
525 break;
526
527 case 'SceneCaptureType':
528 switch ( $val ) {
529 case 0:
530 case 1:
531 case 2:
532 case 3:
533 $val = $this->exifMsg( $tag, $val );
534 break;
535 default:
536 /* If not recognized, display as is. */
537 break;
538 }
539 break;
540
541 case 'GainControl':
542 switch ( $val ) {
543 case 0:
544 case 1:
545 case 2:
546 case 3:
547 case 4:
548 $val = $this->exifMsg( $tag, $val );
549 break;
550 default:
551 /* If not recognized, display as is. */
552 break;
553 }
554 break;
555
556 case 'Contrast':
557 switch ( $val ) {
558 case 0:
559 case 1:
560 case 2:
561 $val = $this->exifMsg( $tag, $val );
562 break;
563 default:
564 /* If not recognized, display as is. */
565 break;
566 }
567 break;
568
569 case 'Saturation':
570 switch ( $val ) {
571 case 0:
572 case 1:
573 case 2:
574 $val = $this->exifMsg( $tag, $val );
575 break;
576 default:
577 /* If not recognized, display as is. */
578 break;
579 }
580 break;
581
582 case 'Sharpness':
583 switch ( $val ) {
584 case 0:
585 case 1:
586 case 2:
587 $val = $this->exifMsg( $tag, $val );
588 break;
589 default:
590 /* If not recognized, display as is. */
591 break;
592 }
593 break;
594
595 case 'SubjectDistanceRange':
596 switch ( $val ) {
597 case 0:
598 case 1:
599 case 2:
600 case 3:
601 $val = $this->exifMsg( $tag, $val );
602 break;
603 default:
604 /* If not recognized, display as is. */
605 break;
606 }
607 break;
608
609 // The GPS...Ref values are kept for compatibility, probably won't be reached.
610 case 'GPSLatitudeRef':
611 case 'GPSDestLatitudeRef':
612 switch ( $val ) {
613 case 'N':
614 case 'S':
615 $val = $this->exifMsg( 'GPSLatitude', $val );
616 break;
617 default:
618 /* If not recognized, display as is. */
619 break;
620 }
621 break;
622
623 case 'GPSLongitudeRef':
624 case 'GPSDestLongitudeRef':
625 switch ( $val ) {
626 case 'E':
627 case 'W':
628 $val = $this->exifMsg( 'GPSLongitude', $val );
629 break;
630 default:
631 /* If not recognized, display as is. */
632 break;
633 }
634 break;
635
636 case 'GPSAltitude':
637 if ( $val < 0 ) {
638 $val = $this->exifMsg( 'GPSAltitude', 'below-sealevel', $this->formatNum( -$val, 3 ) );
639 } else {
640 $val = $this->exifMsg( 'GPSAltitude', 'above-sealevel', $this->formatNum( $val, 3 ) );
641 }
642 break;
643
644 case 'GPSStatus':
645 switch ( $val ) {
646 case 'A':
647 case 'V':
648 $val = $this->exifMsg( $tag, $val );
649 break;
650 default:
651 /* If not recognized, display as is. */
652 break;
653 }
654 break;
655
656 case 'GPSMeasureMode':
657 switch ( $val ) {
658 case 2:
659 case 3:
660 $val = $this->exifMsg( $tag, $val );
661 break;
662 default:
663 /* If not recognized, display as is. */
664 break;
665 }
666 break;
667
668 case 'GPSTrackRef':
669 case 'GPSImgDirectionRef':
670 case 'GPSDestBearingRef':
671 switch ( $val ) {
672 case 'T':
673 case 'M':
674 $val = $this->exifMsg( 'GPSDirection', $val );
675 break;
676 default:
677 /* If not recognized, display as is. */
678 break;
679 }
680 break;
681
682 case 'GPSLatitude':
683 case 'GPSDestLatitude':
684 $val = $this->formatCoords( $val, 'latitude' );
685 break;
686 case 'GPSLongitude':
687 case 'GPSDestLongitude':
688 $val = $this->formatCoords( $val, 'longitude' );
689 break;
690
691 case 'GPSSpeedRef':
692 switch ( $val ) {
693 case 'K':
694 case 'M':
695 case 'N':
696 $val = $this->exifMsg( 'GPSSpeed', $val );
697 break;
698 default:
699 /* If not recognized, display as is. */
700 break;
701 }
702 break;
703
704 case 'GPSDestDistanceRef':
705 switch ( $val ) {
706 case 'K':
707 case 'M':
708 case 'N':
709 $val = $this->exifMsg( 'GPSDestDistance', $val );
710 break;
711 default:
712 /* If not recognized, display as is. */
713 break;
714 }
715 break;
716
717 case 'GPSDOP':
718 // See https://en.wikipedia.org/wiki/Dilution_of_precision_(GPS)
719 if ( $val <= 2 ) {
720 $val = $this->exifMsg( $tag, 'excellent', $this->formatNum( $val ) );
721 } elseif ( $val <= 5 ) {
722 $val = $this->exifMsg( $tag, 'good', $this->formatNum( $val ) );
723 } elseif ( $val <= 10 ) {
724 $val = $this->exifMsg( $tag, 'moderate', $this->formatNum( $val ) );
725 } elseif ( $val <= 20 ) {
726 $val = $this->exifMsg( $tag, 'fair', $this->formatNum( $val ) );
727 } else {
728 $val = $this->exifMsg( $tag, 'poor', $this->formatNum( $val ) );
729 }
730 break;
731
732 // This is not in the Exif standard, just a special
733 // case for our purposes which enables wikis to wikify
734 // the make, model and software name to link to their articles.
735 case 'Make':
736 case 'Model':
737 $val = $this->exifMsg( $tag, '', $val );
738 break;
739
740 case 'Software':
741 if ( is_array( $val ) ) {
742 // if its a software, version array.
743 $val = $this->msg( 'exif-software-version-value', $val[0], $val[1] )->text();
744 } else {
745 $val = $this->exifMsg( $tag, '', $val );
746 }
747 break;
748
749 case 'ExposureTime':
750 // Show the pretty fraction as well as decimal version
751 $val = $this->msg( 'exif-exposuretime-format',
752 $this->formatFraction( $val ), $this->formatNum( $val ) )->text();
753 break;
754 case 'ISOSpeedRatings':
755 // If its = 65535 that means its at the
756 // limit of the size of Exif::short and
757 // is really higher.
758 if ( $val == '65535' ) {
759 $val = $this->exifMsg( $tag, 'overflow' );
760 } else {
761 $val = $this->formatNum( $val );
762 }
763 break;
764 case 'FNumber':
765 $val = $this->msg( 'exif-fnumber-format',
766 $this->formatNum( $val ) )->text();
767 break;
768
769 case 'FocalLength':
770 case 'FocalLengthIn35mmFilm':
771 $val = $this->msg( 'exif-focallength-format',
772 $this->formatNum( $val ) )->text();
773 break;
774
775 case 'MaxApertureValue':
776 if ( strpos( $val, '/' ) !== false ) {
777 // need to expand this earlier to calculate fNumber
778 list( $n, $d ) = explode( '/', $val );
779 if ( is_numeric( $n ) && is_numeric( $d ) ) {
780 $val = $n / $d;
781 }
782 }
783 if ( is_numeric( $val ) ) {
784 $fNumber = pow( 2, $val / 2 );
785 if ( $fNumber !== false ) {
786 $val = $this->msg( 'exif-maxaperturevalue-value',
787 $this->formatNum( $val ),
788 $this->formatNum( $fNumber, 2 )
789 )->text();
790 }
791 }
792 break;
793
794 case 'iimCategory':
795 switch ( strtolower( $val ) ) {
796 // See pg 29 of IPTC photo
797 // metadata standard.
798 case 'ace':
799 case 'clj':
800 case 'dis':
801 case 'fin':
802 case 'edu':
803 case 'evn':
804 case 'hth':
805 case 'hum':
806 case 'lab':
807 case 'lif':
808 case 'pol':
809 case 'rel':
810 case 'sci':
811 case 'soi':
812 case 'spo':
813 case 'war':
814 case 'wea':
815 $val = $this->exifMsg(
816 'iimcategory',
817 $val
818 );
819 }
820 break;
821 case 'SubjectNewsCode':
822 // Essentially like iimCategory.
823 // 8 (numeric) digit hierarchical
824 // classification. We decode the
825 // first 2 digits, which provide
826 // a broad category.
827 $val = $this->convertNewsCode( $val );
828 break;
829 case 'Urgency':
830 // 1-8 with 1 being highest, 5 normal
831 // 0 is reserved, and 9 is 'user-defined'.
832 $urgency = '';
833 if ( $val == 0 || $val == 9 ) {
834 $urgency = 'other';
835 } elseif ( $val < 5 && $val > 1 ) {
836 $urgency = 'high';
837 } elseif ( $val == 5 ) {
838 $urgency = 'normal';
839 } elseif ( $val <= 8 && $val > 5 ) {
840 $urgency = 'low';
841 }
842
843 if ( $urgency !== '' ) {
844 $val = $this->exifMsg( 'urgency',
845 $urgency, $val
846 );
847 }
848 break;
849
850 // Things that have a unit of pixels.
851 case 'OriginalImageHeight':
852 case 'OriginalImageWidth':
853 case 'PixelXDimension':
854 case 'PixelYDimension':
855 case 'ImageWidth':
856 case 'ImageLength':
857 $val = $this->formatNum( $val ) . ' ' . $this->msg( 'unit-pixel' )->text();
858 break;
859
860 // Do not transform fields with pure text.
861 // For some languages the formatNum()
862 // conversion results to wrong output like
863 // foo,bar@example,com or foo٫bar@example٫com.
864 // Also some 'numeric' things like Scene codes
865 // are included here as we really don't want
866 // commas inserted.
867 case 'ImageDescription':
868 case 'UserComment':
869 case 'Artist':
870 case 'Copyright':
871 case 'RelatedSoundFile':
872 case 'ImageUniqueID':
873 case 'SpectralSensitivity':
874 case 'GPSSatellites':
875 case 'GPSVersionID':
876 case 'GPSMapDatum':
877 case 'Keywords':
878 case 'WorldRegionDest':
879 case 'CountryDest':
880 case 'CountryCodeDest':
881 case 'ProvinceOrStateDest':
882 case 'CityDest':
883 case 'SublocationDest':
884 case 'WorldRegionCreated':
885 case 'CountryCreated':
886 case 'CountryCodeCreated':
887 case 'ProvinceOrStateCreated':
888 case 'CityCreated':
889 case 'SublocationCreated':
890 case 'ObjectName':
891 case 'SpecialInstructions':
892 case 'Headline':
893 case 'Credit':
894 case 'Source':
895 case 'EditStatus':
896 case 'FixtureIdentifier':
897 case 'LocationDest':
898 case 'LocationDestCode':
899 case 'Writer':
900 case 'JPEGFileComment':
901 case 'iimSupplementalCategory':
902 case 'OriginalTransmissionRef':
903 case 'Identifier':
904 case 'dc-contributor':
905 case 'dc-coverage':
906 case 'dc-publisher':
907 case 'dc-relation':
908 case 'dc-rights':
909 case 'dc-source':
910 case 'dc-type':
911 case 'Lens':
912 case 'SerialNumber':
913 case 'CameraOwnerName':
914 case 'Label':
915 case 'Nickname':
916 case 'RightsCertificate':
917 case 'CopyrightOwner':
918 case 'UsageTerms':
919 case 'WebStatement':
920 case 'OriginalDocumentID':
921 case 'LicenseUrl':
922 case 'MorePermissionsUrl':
923 case 'AttributionUrl':
924 case 'PreferredAttributionName':
925 case 'PNGFileComment':
926 case 'Disclaimer':
927 case 'ContentWarning':
928 case 'GIFFileComment':
929 case 'SceneCode':
930 case 'IntellectualGenre':
931 case 'Event':
932 case 'OrginisationInImage':
933 case 'PersonInImage':
934
935 $val = htmlspecialchars( $val );
936 break;
937
938 case 'ObjectCycle':
939 switch ( $val ) {
940 case 'a':
941 case 'p':
942 case 'b':
943 $val = $this->exifMsg( $tag, $val );
944 break;
945 default:
946 $val = htmlspecialchars( $val );
947 break;
948 }
949 break;
950 case 'Copyrighted':
951 switch ( $val ) {
952 case 'True':
953 case 'False':
954 $val = $this->exifMsg( $tag, $val );
955 break;
956 }
957 break;
958 case 'Rating':
959 if ( $val == '-1' ) {
960 $val = $this->exifMsg( $tag, 'rejected' );
961 } else {
962 $val = $this->formatNum( $val );
963 }
964 break;
965
966 case 'LanguageCode':
967 $lang = Language::fetchLanguageName( strtolower( $val ), $this->getLanguage()->getCode() );
968 if ( $lang ) {
969 $val = htmlspecialchars( $lang );
970 } else {
971 $val = htmlspecialchars( $val );
972 }
973 break;
974
975 default:
976 $val = $this->formatNum( $val );
977 break;
978 }
979 }
980 // End formatting values, start flattening arrays.
981 $vals = $this->flattenArrayReal( $vals, $type );
982 }
983
984 return $tags;
985 }
986
987 /**
988 * Flatten an array, using the content language for any messages.
989 *
990 * @param array $vals Array of values
991 * @param string $type Type of array (either lang, ul, ol).
992 * lang = language assoc array with keys being the lang code
993 * ul = unordered list, ol = ordered list
994 * type can also come from the '_type' member of $vals.
995 * @param bool $noHtml If to avoid returning anything resembling HTML.
996 * (Ugly hack for backwards compatibility with old MediaWiki).
997 * @param bool|IContextSource $context
998 * @return string Single value (in wiki-syntax).
999 * @since 1.23
1000 */
1001 public static function flattenArrayContentLang( $vals, $type = 'ul',
1002 $noHtml = false, $context = false
1003 ) {
1004 global $wgContLang;
1005 $obj = new FormatMetadata;
1006 if ( $context ) {
1007 $obj->setContext( $context );
1008 }
1009 $context = new DerivativeContext( $obj->getContext() );
1010 $context->setLanguage( $wgContLang );
1011 $obj->setContext( $context );
1012
1013 return $obj->flattenArrayReal( $vals, $type, $noHtml );
1014 }
1015
1016 /**
1017 * A function to collapse multivalued tags into a single value.
1018 * This turns an array of (for example) authors into a bulleted list.
1019 *
1020 * This is public on the basis it might be useful outside of this class.
1021 *
1022 * @param array $vals Array of values
1023 * @param string $type Type of array (either lang, ul, ol).
1024 * lang = language assoc array with keys being the lang code
1025 * ul = unordered list, ol = ordered list
1026 * type can also come from the '_type' member of $vals.
1027 * @param bool $noHtml If to avoid returning anything resembling HTML.
1028 * (Ugly hack for backwards compatibility with old mediawiki).
1029 * @return string Single value (in wiki-syntax).
1030 * @since 1.23
1031 */
1032 public function flattenArrayReal( $vals, $type = 'ul', $noHtml = false ) {
1033 if ( !is_array( $vals ) ) {
1034 return $vals; // do nothing if not an array;
1035 }
1036
1037 if ( isset( $vals['_type'] ) ) {
1038 $type = $vals['_type'];
1039 unset( $vals['_type'] );
1040 }
1041
1042 if ( !is_array( $vals ) ) {
1043 return $vals; // do nothing if not an array;
1044 } elseif ( count( $vals ) === 1 && $type !== 'lang' ) {
1045 return $vals[0];
1046 } elseif ( count( $vals ) === 0 ) {
1047 wfDebug( __METHOD__ . " metadata array with 0 elements!\n" );
1048
1049 return ""; // paranoia. This should never happen
1050 } else {
1051 /* @todo FIXME: This should hide some of the list entries if there are
1052 * say more than four. Especially if a field is translated into 20
1053 * languages, we don't want to show them all by default
1054 */
1055 switch ( $type ) {
1056 case 'lang':
1057 // Display default, followed by ContLang,
1058 // followed by the rest in no particular
1059 // order.
1060
1061 // Todo: hide some items if really long list.
1062
1063 $content = '';
1064
1065 $priorityLanguages = $this->getPriorityLanguages();
1066 $defaultItem = false;
1067 $defaultLang = false;
1068
1069 // If default is set, save it for later,
1070 // as we don't know if it's equal to
1071 // one of the lang codes. (In xmp
1072 // you specify the language for a
1073 // default property by having both
1074 // a default prop, and one in the language
1075 // that are identical)
1076 if ( isset( $vals['x-default'] ) ) {
1077 $defaultItem = $vals['x-default'];
1078 unset( $vals['x-default'] );
1079 }
1080 foreach ( $priorityLanguages as $pLang ) {
1081 if ( isset( $vals[$pLang] ) ) {
1082 $isDefault = false;
1083 if ( $vals[$pLang] === $defaultItem ) {
1084 $defaultItem = false;
1085 $isDefault = true;
1086 }
1087 $content .= $this->langItem(
1088 $vals[$pLang], $pLang,
1089 $isDefault, $noHtml );
1090
1091 unset( $vals[$pLang] );
1092
1093 if ( $this->singleLang ) {
1094 return Html::rawElement( 'span',
1095 [ 'lang' => $pLang ], $vals[$pLang] );
1096 }
1097 }
1098 }
1099
1100 // Now do the rest.
1101 foreach ( $vals as $lang => $item ) {
1102 if ( $item === $defaultItem ) {
1103 $defaultLang = $lang;
1104 continue;
1105 }
1106 $content .= $this->langItem( $item,
1107 $lang, false, $noHtml );
1108 if ( $this->singleLang ) {
1109 return Html::rawElement( 'span',
1110 [ 'lang' => $lang ], $item );
1111 }
1112 }
1113 if ( $defaultItem !== false ) {
1114 $content = $this->langItem( $defaultItem,
1115 $defaultLang, true, $noHtml ) .
1116 $content;
1117 if ( $this->singleLang ) {
1118 return $defaultItem;
1119 }
1120 }
1121 if ( $noHtml ) {
1122 return $content;
1123 }
1124
1125 return '<ul class="metadata-langlist">' .
1126 $content .
1127 '</ul>';
1128 case 'ol':
1129 if ( $noHtml ) {
1130 return "\n#" . implode( "\n#", $vals );
1131 }
1132
1133 return "<ol><li>" . implode( "</li>\n<li>", $vals ) . '</li></ol>';
1134 case 'ul':
1135 default:
1136 if ( $noHtml ) {
1137 return "\n*" . implode( "\n*", $vals );
1138 }
1139
1140 return "<ul><li>" . implode( "</li>\n<li>", $vals ) . '</li></ul>';
1141 }
1142 }
1143 }
1144
1145 /** Helper function for creating lists of translations.
1146 *
1147 * @param string $value Value (this is not escaped)
1148 * @param string $lang Lang code of item or false
1149 * @param bool $default If it is default value.
1150 * @param bool $noHtml If to avoid html (for back-compat)
1151 * @throws MWException
1152 * @return string Language item (Note: despite how this looks, this is
1153 * treated as wikitext, not as HTML).
1154 */
1155 private function langItem( $value, $lang, $default = false, $noHtml = false ) {
1156 if ( $lang === false && $default === false ) {
1157 throw new MWException( '$lang and $default cannot both '
1158 . 'be false.' );
1159 }
1160
1161 if ( $noHtml ) {
1162 $wrappedValue = $value;
1163 } else {
1164 $wrappedValue = '<span class="mw-metadata-lang-value">'
1165 . $value . '</span>';
1166 }
1167
1168 if ( $lang === false ) {
1169 $msg = $this->msg( 'metadata-langitem-default', $wrappedValue );
1170 if ( $noHtml ) {
1171 return $msg->text() . "\n\n";
1172 } /* else */
1173
1174 return '<li class="mw-metadata-lang-default">'
1175 . $msg->text()
1176 . "</li>\n";
1177 }
1178
1179 $lowLang = strtolower( $lang );
1180 $langName = Language::fetchLanguageName( $lowLang );
1181 if ( $langName === '' ) {
1182 // try just the base language name. (aka en-US -> en ).
1183 list( $langPrefix ) = explode( '-', $lowLang, 2 );
1184 $langName = Language::fetchLanguageName( $langPrefix );
1185 if ( $langName === '' ) {
1186 // give up.
1187 $langName = $lang;
1188 }
1189 }
1190 // else we have a language specified
1191
1192 $msg = $this->msg( 'metadata-langitem', $wrappedValue, $langName, $lang );
1193 if ( $noHtml ) {
1194 return '*' . $msg->text();
1195 } /* else: */
1196
1197 $item = '<li class="mw-metadata-lang-code-'
1198 . $lang;
1199 if ( $default ) {
1200 $item .= ' mw-metadata-lang-default';
1201 }
1202 $item .= '" lang="' . $lang . '">';
1203 $item .= $msg->text();
1204 $item .= "</li>\n";
1205
1206 return $item;
1207 }
1208
1209 /**
1210 * Convenience function for getFormattedData()
1211 *
1212 * @param string $tag The tag name to pass on
1213 * @param string $val The value of the tag
1214 * @param string $arg An argument to pass ($1)
1215 * @param string $arg2 A 2nd argument to pass ($2)
1216 * @return string The text content of "exif-$tag-$val" message in lower case
1217 */
1218 private function exifMsg( $tag, $val, $arg = null, $arg2 = null ) {
1219 global $wgContLang;
1220
1221 if ( $val === '' ) {
1222 $val = 'value';
1223 }
1224
1225 return $this->msg( $wgContLang->lc( "exif-$tag-$val" ), $arg, $arg2 )->text();
1226 }
1227
1228 /**
1229 * Format a number, convert numbers from fractions into floating point
1230 * numbers, joins arrays of numbers with commas.
1231 *
1232 * @param mixed $num The value to format
1233 * @param float|int|bool $round Digits to round to or false.
1234 * @return mixed A floating point number or whatever we were fed
1235 */
1236 private function formatNum( $num, $round = false ) {
1237 $m = [];
1238 if ( is_array( $num ) ) {
1239 $out = [];
1240 foreach ( $num as $number ) {
1241 $out[] = $this->formatNum( $number );
1242 }
1243
1244 return $this->getLanguage()->commaList( $out );
1245 }
1246 if ( preg_match( '/^(-?\d+)\/(\d+)$/', $num, $m ) ) {
1247 if ( $m[2] != 0 ) {
1248 $newNum = $m[1] / $m[2];
1249 if ( $round !== false ) {
1250 $newNum = round( $newNum, $round );
1251 }
1252 } else {
1253 $newNum = $num;
1254 }
1255
1256 return $this->getLanguage()->formatNum( $newNum );
1257 } else {
1258 if ( is_numeric( $num ) && $round !== false ) {
1259 $num = round( $num, $round );
1260 }
1261
1262 return $this->getLanguage()->formatNum( $num );
1263 }
1264 }
1265
1266 /**
1267 * Format a rational number, reducing fractions
1268 *
1269 * @param mixed $num The value to format
1270 * @return mixed A floating point number or whatever we were fed
1271 */
1272 private function formatFraction( $num ) {
1273 $m = [];
1274 if ( preg_match( '/^(-?\d+)\/(\d+)$/', $num, $m ) ) {
1275 $numerator = intval( $m[1] );
1276 $denominator = intval( $m[2] );
1277 $gcd = $this->gcd( abs( $numerator ), $denominator );
1278 if ( $gcd != 0 ) {
1279 // 0 shouldn't happen! ;)
1280 return $this->formatNum( $numerator / $gcd ) . '/' . $this->formatNum( $denominator / $gcd );
1281 }
1282 }
1283
1284 return $this->formatNum( $num );
1285 }
1286
1287 /**
1288 * Calculate the greatest common divisor of two integers.
1289 *
1290 * @param int $a Numerator
1291 * @param int $b Denominator
1292 * @return int
1293 */
1294 private function gcd( $a, $b ) {
1295 /*
1296 // https://en.wikipedia.org/wiki/Euclidean_algorithm
1297 // Recursive form would be:
1298 if( $b == 0 )
1299 return $a;
1300 else
1301 return gcd( $b, $a % $b );
1302 */
1303 while ( $b != 0 ) {
1304 $remainder = $a % $b;
1305
1306 // tail recursion...
1307 $a = $b;
1308 $b = $remainder;
1309 }
1310
1311 return $a;
1312 }
1313
1314 /**
1315 * Fetch the human readable version of a news code.
1316 * A news code is an 8 digit code. The first two
1317 * digits are a general classification, so we just
1318 * translate that.
1319 *
1320 * Note, leading 0's are significant, so this is
1321 * a string, not an int.
1322 *
1323 * @param string $val The 8 digit news code.
1324 * @return string The human readable form
1325 */
1326 private function convertNewsCode( $val ) {
1327 if ( !preg_match( '/^\d{8}$/D', $val ) ) {
1328 // Not a valid news code.
1329 return $val;
1330 }
1331 $cat = '';
1332 switch ( substr( $val, 0, 2 ) ) {
1333 case '01':
1334 $cat = 'ace';
1335 break;
1336 case '02':
1337 $cat = 'clj';
1338 break;
1339 case '03':
1340 $cat = 'dis';
1341 break;
1342 case '04':
1343 $cat = 'fin';
1344 break;
1345 case '05':
1346 $cat = 'edu';
1347 break;
1348 case '06':
1349 $cat = 'evn';
1350 break;
1351 case '07':
1352 $cat = 'hth';
1353 break;
1354 case '08':
1355 $cat = 'hum';
1356 break;
1357 case '09':
1358 $cat = 'lab';
1359 break;
1360 case '10':
1361 $cat = 'lif';
1362 break;
1363 case '11':
1364 $cat = 'pol';
1365 break;
1366 case '12':
1367 $cat = 'rel';
1368 break;
1369 case '13':
1370 $cat = 'sci';
1371 break;
1372 case '14':
1373 $cat = 'soi';
1374 break;
1375 case '15':
1376 $cat = 'spo';
1377 break;
1378 case '16':
1379 $cat = 'war';
1380 break;
1381 case '17':
1382 $cat = 'wea';
1383 break;
1384 }
1385 if ( $cat !== '' ) {
1386 $catMsg = $this->exifMsg( 'iimcategory', $cat );
1387 $val = $this->exifMsg( 'subjectnewscode', '', $val, $catMsg );
1388 }
1389
1390 return $val;
1391 }
1392
1393 /**
1394 * Format a coordinate value, convert numbers from floating point
1395 * into degree minute second representation.
1396 *
1397 * @param int $coord Degrees, minutes and seconds
1398 * @param string $type Latitude or longitude (for if its a NWS or E)
1399 * @return mixed A floating point number or whatever we were fed
1400 */
1401 private function formatCoords( $coord, $type ) {
1402 $ref = '';
1403 if ( $coord < 0 ) {
1404 $nCoord = -$coord;
1405 if ( $type === 'latitude' ) {
1406 $ref = 'S';
1407 } elseif ( $type === 'longitude' ) {
1408 $ref = 'W';
1409 }
1410 } else {
1411 $nCoord = $coord;
1412 if ( $type === 'latitude' ) {
1413 $ref = 'N';
1414 } elseif ( $type === 'longitude' ) {
1415 $ref = 'E';
1416 }
1417 }
1418
1419 $deg = floor( $nCoord );
1420 $min = floor( ( $nCoord - $deg ) * 60.0 );
1421 $sec = round( ( ( $nCoord - $deg ) - $min / 60 ) * 3600, 2 );
1422
1423 $deg = $this->formatNum( $deg );
1424 $min = $this->formatNum( $min );
1425 $sec = $this->formatNum( $sec );
1426
1427 return $this->msg( 'exif-coordinate-format', $deg, $min, $sec, $ref, $coord )->text();
1428 }
1429
1430 /**
1431 * Format the contact info field into a single value.
1432 *
1433 * This function might be called from
1434 * JpegHandler::convertMetadataVersion which is why it is
1435 * public.
1436 *
1437 * @param array $vals Array with fields of the ContactInfo
1438 * struct defined in the IPTC4XMP spec. Or potentially
1439 * an array with one element that is a free form text
1440 * value from the older iptc iim 1:118 prop.
1441 * @return string HTML-ish looking wikitext
1442 * @since 1.23 no longer static
1443 */
1444 public function collapseContactInfo( $vals ) {
1445 if ( !( isset( $vals['CiAdrExtadr'] )
1446 || isset( $vals['CiAdrCity'] )
1447 || isset( $vals['CiAdrCtry'] )
1448 || isset( $vals['CiEmailWork'] )
1449 || isset( $vals['CiTelWork'] )
1450 || isset( $vals['CiAdrPcode'] )
1451 || isset( $vals['CiAdrRegion'] )
1452 || isset( $vals['CiUrlWork'] )
1453 ) ) {
1454 // We don't have any sub-properties
1455 // This could happen if its using old
1456 // iptc that just had this as a free-form
1457 // text value.
1458 // Note: We run this through htmlspecialchars
1459 // partially to be consistent, and partially
1460 // because people often insert >, etc into
1461 // the metadata which should not be interpreted
1462 // but we still want to auto-link urls.
1463 foreach ( $vals as &$val ) {
1464 $val = htmlspecialchars( $val );
1465 }
1466
1467 return $this->flattenArrayReal( $vals );
1468 } else {
1469 // We have a real ContactInfo field.
1470 // Its unclear if all these fields have to be
1471 // set, so assume they do not.
1472 $url = $tel = $street = $city = $country = '';
1473 $email = $postal = $region = '';
1474
1475 // Also note, some of the class names this uses
1476 // are similar to those used by hCard. This is
1477 // mostly because they're sensible names. This
1478 // does not (and does not attempt to) output
1479 // stuff in the hCard microformat. However it
1480 // might output in the adr microformat.
1481
1482 if ( isset( $vals['CiAdrExtadr'] ) ) {
1483 // Todo: This can potentially be multi-line.
1484 // Need to check how that works in XMP.
1485 $street = '<span class="extended-address">'
1486 . htmlspecialchars(
1487 $vals['CiAdrExtadr'] )
1488 . '</span>';
1489 }
1490 if ( isset( $vals['CiAdrCity'] ) ) {
1491 $city = '<span class="locality">'
1492 . htmlspecialchars( $vals['CiAdrCity'] )
1493 . '</span>';
1494 }
1495 if ( isset( $vals['CiAdrCtry'] ) ) {
1496 $country = '<span class="country-name">'
1497 . htmlspecialchars( $vals['CiAdrCtry'] )
1498 . '</span>';
1499 }
1500 if ( isset( $vals['CiEmailWork'] ) ) {
1501 $emails = [];
1502 // Have to split multiple emails at commas/new lines.
1503 $splitEmails = explode( "\n", $vals['CiEmailWork'] );
1504 foreach ( $splitEmails as $e1 ) {
1505 // Also split on comma
1506 foreach ( explode( ',', $e1 ) as $e2 ) {
1507 $finalEmail = trim( $e2 );
1508 if ( $finalEmail == ',' || $finalEmail == '' ) {
1509 continue;
1510 }
1511 if ( strpos( $finalEmail, '<' ) !== false ) {
1512 // Don't do fancy formatting to
1513 // "My name" <foo@bar.com> style stuff
1514 $emails[] = $finalEmail;
1515 } else {
1516 $emails[] = '[mailto:'
1517 . $finalEmail
1518 . ' <span class="email">'
1519 . $finalEmail
1520 . '</span>]';
1521 }
1522 }
1523 }
1524 $email = implode( ', ', $emails );
1525 }
1526 if ( isset( $vals['CiTelWork'] ) ) {
1527 $tel = '<span class="tel">'
1528 . htmlspecialchars( $vals['CiTelWork'] )
1529 . '</span>';
1530 }
1531 if ( isset( $vals['CiAdrPcode'] ) ) {
1532 $postal = '<span class="postal-code">'
1533 . htmlspecialchars(
1534 $vals['CiAdrPcode'] )
1535 . '</span>';
1536 }
1537 if ( isset( $vals['CiAdrRegion'] ) ) {
1538 // Note this is province/state.
1539 $region = '<span class="region">'
1540 . htmlspecialchars(
1541 $vals['CiAdrRegion'] )
1542 . '</span>';
1543 }
1544 if ( isset( $vals['CiUrlWork'] ) ) {
1545 $url = '<span class="url">'
1546 . htmlspecialchars( $vals['CiUrlWork'] )
1547 . '</span>';
1548 }
1549
1550 return $this->msg( 'exif-contact-value', $email, $url,
1551 $street, $city, $region, $postal, $country,
1552 $tel )->text();
1553 }
1554 }
1555
1556 /**
1557 * Get a list of fields that are visible by default.
1558 *
1559 * @return array
1560 * @since 1.23
1561 */
1562 public static function getVisibleFields() {
1563 $fields = [];
1564 $lines = explode( "\n", wfMessage( 'metadata-fields' )->inContentLanguage()->text() );
1565 foreach ( $lines as $line ) {
1566 $matches = [];
1567 if ( preg_match( '/^\\*\s*(.*?)\s*$/', $line, $matches ) ) {
1568 $fields[] = $matches[1];
1569 }
1570 }
1571 $fields = array_map( 'strtolower', $fields );
1572
1573 return $fields;
1574 }
1575
1576 /**
1577 * Get an array of extended metadata. (See the imageinfo API for format.)
1578 *
1579 * @param File $file File to use
1580 * @return array [<property name> => ['value' => <value>]], or [] on error
1581 * @since 1.23
1582 */
1583 public function fetchExtendedMetadata( File $file ) {
1584 $cache = ObjectCache::getMainWANInstance();
1585
1586 // If revision deleted, exit immediately
1587 if ( $file->isDeleted( File::DELETED_FILE ) ) {
1588 return [];
1589 }
1590
1591 $cacheKey = wfMemcKey(
1592 'getExtendedMetadata',
1593 $this->getLanguage()->getCode(),
1594 (int)$this->singleLang,
1595 $file->getSha1()
1596 );
1597
1598 $cachedValue = $cache->get( $cacheKey );
1599 if (
1600 $cachedValue
1601 && Hooks::run( 'ValidateExtendedMetadataCache', [ $cachedValue['timestamp'], $file ] )
1602 ) {
1603 $extendedMetadata = $cachedValue['data'];
1604 } else {
1605 $maxCacheTime = ( $file instanceof ForeignAPIFile ) ? 60 * 60 * 12 : 60 * 60 * 24 * 30;
1606 $fileMetadata = $this->getExtendedMetadataFromFile( $file );
1607 $extendedMetadata = $this->getExtendedMetadataFromHook( $file, $fileMetadata, $maxCacheTime );
1608 if ( $this->singleLang ) {
1609 $this->resolveMultilangMetadata( $extendedMetadata );
1610 }
1611 $this->discardMultipleValues( $extendedMetadata );
1612 // Make sure the metadata won't break the API when an XML format is used.
1613 // This is an API-specific function so it would be cleaner to call it from
1614 // outside fetchExtendedMetadata, but this way we don't need to redo the
1615 // computation on a cache hit.
1616 $this->sanitizeArrayForAPI( $extendedMetadata );
1617 $valueToCache = [ 'data' => $extendedMetadata, 'timestamp' => wfTimestampNow() ];
1618 $cache->set( $cacheKey, $valueToCache, $maxCacheTime );
1619 }
1620
1621 return $extendedMetadata;
1622 }
1623
1624 /**
1625 * Get file-based metadata in standardized format.
1626 *
1627 * Note that for a remote file, this might return metadata supplied by extensions.
1628 *
1629 * @param File $file File to use
1630 * @return array [<property name> => ['value' => <value>]], or [] on error
1631 * @since 1.23
1632 */
1633 protected function getExtendedMetadataFromFile( File $file ) {
1634 // If this is a remote file accessed via an API request, we already
1635 // have remote metadata so we just ignore any local one
1636 if ( $file instanceof ForeignAPIFile ) {
1637 // In case of error we pretend no metadata - this will get cached.
1638 // Might or might not be a good idea.
1639 return $file->getExtendedMetadata() ?: [];
1640 }
1641
1642 $uploadDate = wfTimestamp( TS_ISO_8601, $file->getTimestamp() );
1643
1644 $fileMetadata = [
1645 // This is modification time, which is close to "upload" time.
1646 'DateTime' => [
1647 'value' => $uploadDate,
1648 'source' => 'mediawiki-metadata',
1649 ],
1650 ];
1651
1652 $title = $file->getTitle();
1653 if ( $title ) {
1654 $text = $title->getText();
1655 $pos = strrpos( $text, '.' );
1656
1657 if ( $pos ) {
1658 $name = substr( $text, 0, $pos );
1659 } else {
1660 $name = $text;
1661 }
1662
1663 $fileMetadata['ObjectName'] = [
1664 'value' => $name,
1665 'source' => 'mediawiki-metadata',
1666 ];
1667 }
1668
1669 return $fileMetadata;
1670 }
1671
1672 /**
1673 * Get additional metadata from hooks in standardized format.
1674 *
1675 * @param File $file File to use
1676 * @param array $extendedMetadata
1677 * @param int $maxCacheTime Hook handlers might use this parameter to override cache time
1678 *
1679 * @return array [<property name> => ['value' => <value>]], or [] on error
1680 * @since 1.23
1681 */
1682 protected function getExtendedMetadataFromHook( File $file, array $extendedMetadata,
1683 &$maxCacheTime
1684 ) {
1685 Hooks::run( 'GetExtendedMetadata', [
1686 &$extendedMetadata,
1687 $file,
1688 $this->getContext(),
1689 $this->singleLang,
1690 &$maxCacheTime
1691 ] );
1692
1693 $visible = array_flip( self::getVisibleFields() );
1694 foreach ( $extendedMetadata as $key => $value ) {
1695 if ( !isset( $visible[strtolower( $key )] ) ) {
1696 $extendedMetadata[$key]['hidden'] = '';
1697 }
1698 }
1699
1700 return $extendedMetadata;
1701 }
1702
1703 /**
1704 * Turns an XMP-style multilang array into a single value.
1705 * If the value is not a multilang array, it is returned unchanged.
1706 * See mediawiki.org/wiki/Manual:File_metadata_handling#Multi-language_array_format
1707 * @param mixed $value
1708 * @return mixed Value in best language, null if there were no languages at all
1709 * @since 1.23
1710 */
1711 protected function resolveMultilangValue( $value ) {
1712 if (
1713 !is_array( $value )
1714 || !isset( $value['_type'] )
1715 || $value['_type'] != 'lang'
1716 ) {
1717 return $value; // do nothing if not a multilang array
1718 }
1719
1720 // choose the language best matching user or site settings
1721 $priorityLanguages = $this->getPriorityLanguages();
1722 foreach ( $priorityLanguages as $lang ) {
1723 if ( isset( $value[$lang] ) ) {
1724 return $value[$lang];
1725 }
1726 }
1727
1728 // otherwise go with the default language, if set
1729 if ( isset( $value['x-default'] ) ) {
1730 return $value['x-default'];
1731 }
1732
1733 // otherwise just return any one language
1734 unset( $value['_type'] );
1735 if ( !empty( $value ) ) {
1736 return reset( $value );
1737 }
1738
1739 // this should not happen; signal error
1740 return null;
1741 }
1742
1743 /**
1744 * Turns an XMP-style multivalue array into a single value by dropping all but the first
1745 * value. If the value is not a multivalue array (or a multivalue array inside a multilang
1746 * array), it is returned unchanged.
1747 * See mediawiki.org/wiki/Manual:File_metadata_handling#Multi-language_array_format
1748 * @param mixed $value
1749 * @return mixed The value, or the first value if there were multiple ones
1750 * @since 1.25
1751 */
1752 protected function resolveMultivalueValue( $value ) {
1753 if ( !is_array( $value ) ) {
1754 return $value;
1755 } elseif ( isset( $value['_type'] ) && $value['_type'] === 'lang' ) {
1756 // if this is a multilang array, process fields separately
1757 $newValue = [];
1758 foreach ( $value as $k => $v ) {
1759 $newValue[$k] = $this->resolveMultivalueValue( $v );
1760 }
1761 return $newValue;
1762 } else { // _type is 'ul' or 'ol' or missing in which case it defaults to 'ul'
1763 list( $k, $v ) = each( $value );
1764 if ( $k === '_type' ) {
1765 $v = current( $value );
1766 }
1767 return $v;
1768 }
1769 }
1770
1771 /**
1772 * Takes an array returned by the getExtendedMetadata* functions,
1773 * and resolves multi-language values in it.
1774 * @param array $metadata
1775 * @since 1.23
1776 */
1777 protected function resolveMultilangMetadata( &$metadata ) {
1778 if ( !is_array( $metadata ) ) {
1779 return;
1780 }
1781 foreach ( $metadata as &$field ) {
1782 if ( isset( $field['value'] ) ) {
1783 $field['value'] = $this->resolveMultilangValue( $field['value'] );
1784 }
1785 }
1786 }
1787
1788 /**
1789 * Takes an array returned by the getExtendedMetadata* functions,
1790 * and turns all fields into single-valued ones by dropping extra values.
1791 * @param array $metadata
1792 * @since 1.25
1793 */
1794 protected function discardMultipleValues( &$metadata ) {
1795 if ( !is_array( $metadata ) ) {
1796 return;
1797 }
1798 foreach ( $metadata as $key => &$field ) {
1799 if ( $key === 'Software' || $key === 'Contact' ) {
1800 // we skip some fields which have composite values. They are not particularly interesting
1801 // and you can get them via the metadata / commonmetadata APIs anyway.
1802 continue;
1803 }
1804 if ( isset( $field['value'] ) ) {
1805 $field['value'] = $this->resolveMultivalueValue( $field['value'] );
1806 }
1807 }
1808 }
1809
1810 /**
1811 * Makes sure the given array is a valid API response fragment
1812 * @param array $arr
1813 */
1814 protected function sanitizeArrayForAPI( &$arr ) {
1815 if ( !is_array( $arr ) ) {
1816 return;
1817 }
1818
1819 $counter = 1;
1820 foreach ( $arr as $key => &$value ) {
1821 $sanitizedKey = $this->sanitizeKeyForAPI( $key );
1822 if ( $sanitizedKey !== $key ) {
1823 if ( isset( $arr[$sanitizedKey] ) ) {
1824 // Make the sanitized keys hopefully unique.
1825 // To make it definitely unique would be too much effort, given that
1826 // sanitizing is only needed for misformatted metadata anyway, but
1827 // this at least covers the case when $arr is numeric.
1828 $sanitizedKey .= $counter;
1829 ++$counter;
1830 }
1831 $arr[$sanitizedKey] = $arr[$key];
1832 unset( $arr[$key] );
1833 }
1834 if ( is_array( $value ) ) {
1835 $this->sanitizeArrayForAPI( $value );
1836 }
1837 }
1838
1839 // Handle API metadata keys (particularly "_type")
1840 $keys = array_filter( array_keys( $arr ), 'ApiResult::isMetadataKey' );
1841 if ( $keys ) {
1842 ApiResult::setPreserveKeysList( $arr, $keys );
1843 }
1844 }
1845
1846 /**
1847 * Turns a string into a valid API identifier.
1848 * @param string $key
1849 * @return string
1850 * @since 1.23
1851 */
1852 protected function sanitizeKeyForAPI( $key ) {
1853 // drop all characters which are not valid in an XML tag name
1854 // a bunch of non-ASCII letters would be valid but probably won't
1855 // be used so we take the easy way
1856 $key = preg_replace( '/[^a-zA-z0-9_:.-]/', '', $key );
1857 // drop characters which are invalid at the first position
1858 $key = preg_replace( '/^[\d-.]+/', '', $key );
1859
1860 if ( $key == '' ) {
1861 $key = '_';
1862 }
1863
1864 // special case for an internal keyword
1865 if ( $key == '_element' ) {
1866 $key = 'element';
1867 }
1868
1869 return $key;
1870 }
1871
1872 /**
1873 * Returns a list of languages (first is best) to use when formatting multilang fields,
1874 * based on user and site preferences.
1875 * @return array
1876 * @since 1.23
1877 */
1878 protected function getPriorityLanguages() {
1879 $priorityLanguages =
1880 Language::getFallbacksIncludingSiteLanguage( $this->getLanguage()->getCode() );
1881 $priorityLanguages = array_merge(
1882 (array)$this->getLanguage()->getCode(),
1883 $priorityLanguages[0],
1884 $priorityLanguages[1]
1885 );
1886
1887 return $priorityLanguages;
1888 }
1889 }