Merge "benchmarks: Convert benchmarkHooks to use Benchmarker"
[lhc/web/wiklou.git] / includes / media / FormatMetadata.php
1 <?php
2 /**
3 * Formatting of image metadata values into human readable form.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @ingroup Media
21 * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
22 * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason, 2009 Brent Garber, 2010 Brian Wolff
23 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
24 * @see http://exif.org/Exif2-2.PDF The Exif 2.2 specification
25 * @file
26 */
27 use Wikimedia\Timestamp\TimestampException;
28
29 /**
30 * Format Image metadata values into a human readable form.
31 *
32 * Note lots of these messages use the prefix 'exif' even though
33 * they may not be exif properties. For example 'exif-ImageDescription'
34 * can be the Exif ImageDescription, or it could be the iptc-iim caption
35 * property, or it could be the xmp dc:description property. This
36 * is because these messages should be independent of how the data is
37 * stored, sine the user doesn't care if the description is stored in xmp,
38 * exif, etc only that its a description. (Additionally many of these properties
39 * are merged together following the MWG standard, such that for example,
40 * exif properties override XMP properties that mean the same thing if
41 * there is a conflict).
42 *
43 * It should perhaps use a prefix like 'metadata' instead, but there
44 * is already a large number of messages using the 'exif' prefix.
45 *
46 * @ingroup Media
47 * @since 1.23 the class extends ContextSource and various formerly-public
48 * internal methods are private
49 */
50 class FormatMetadata extends ContextSource {
51 /**
52 * Only output a single language for multi-language fields
53 * @var bool
54 * @since 1.23
55 */
56 protected $singleLang = false;
57
58 /**
59 * Trigger only outputting single language for multilanguage fields
60 *
61 * @param bool $val
62 * @since 1.23
63 */
64 public function setSingleLanguage( $val ) {
65 $this->singleLang = $val;
66 }
67
68 /**
69 * Numbers given by Exif user agents are often magical, that is they
70 * should be replaced by a detailed explanation depending on their
71 * value which most of the time are plain integers. This function
72 * formats Exif (and other metadata) values into human readable form.
73 *
74 * This is the usual entry point for this class.
75 *
76 * @param array $tags The Exif data to format ( as returned by
77 * Exif::getFilteredData() or BitmapMetadataHandler )
78 * @param bool|IContextSource $context Context to use (optional)
79 * @return array
80 */
81 public static function getFormattedData( $tags, $context = false ) {
82 $obj = new FormatMetadata;
83 if ( $context ) {
84 $obj->setContext( $context );
85 }
86
87 return $obj->makeFormattedData( $tags );
88 }
89
90 /**
91 * Numbers given by Exif user agents are often magical, that is they
92 * should be replaced by a detailed explanation depending on their
93 * value which most of the time are plain integers. This function
94 * formats Exif (and other metadata) values into human readable form.
95 *
96 * @param array $tags The Exif data to format ( as returned by
97 * Exif::getFilteredData() or BitmapMetadataHandler )
98 * @return array
99 * @since 1.23
100 */
101 public function makeFormattedData( $tags ) {
102 $resolutionunit = !isset( $tags['ResolutionUnit'] ) || $tags['ResolutionUnit'] == 2 ? 2 : 3;
103 unset( $tags['ResolutionUnit'] );
104 // Width and height are for internal use and already available & displayed outside of metadata
105 unset( $tags['Width'] );
106 unset( $tags['Height'] );
107
108 foreach ( $tags as $tag => &$vals ) {
109 // This seems ugly to wrap non-array's in an array just to unwrap again,
110 // especially when most of the time it is not an array
111 if ( !is_array( $tags[$tag] ) ) {
112 $vals = [ $vals ];
113 }
114
115 // _type is a special value to say what array type
116 if ( isset( $tags[$tag]['_type'] ) ) {
117 $type = $tags[$tag]['_type'];
118 unset( $vals['_type'] );
119 } else {
120 $type = 'ul'; // default unordered list.
121 }
122
123 // This is done differently as the tag is an array.
124 if ( $tag == 'GPSTimeStamp' && count( $vals ) === 3 ) {
125 // hour min sec array
126
127 $h = explode( '/', $vals[0] );
128 $m = explode( '/', $vals[1] );
129 $s = explode( '/', $vals[2] );
130
131 // this should already be validated
132 // when loaded from file, but it could
133 // come from a foreign repo, so be
134 // paranoid.
135 if ( !isset( $h[1] )
136 || !isset( $m[1] )
137 || !isset( $s[1] )
138 || $h[1] == 0
139 || $m[1] == 0
140 || $s[1] == 0
141 ) {
142 continue;
143 }
144 $tags[$tag] = str_pad( intval( $h[0] / $h[1] ), 2, '0', STR_PAD_LEFT )
145 . ':' . str_pad( intval( $m[0] / $m[1] ), 2, '0', STR_PAD_LEFT )
146 . ':' . str_pad( intval( $s[0] / $s[1] ), 2, '0', STR_PAD_LEFT );
147
148 try {
149 $time = wfTimestamp( TS_MW, '1971:01:01 ' . $tags[$tag] );
150 // the 1971:01:01 is just a placeholder, and not shown to user.
151 if ( $time && intval( $time ) > 0 ) {
152 $tags[$tag] = $this->getLanguage()->time( $time );
153 }
154 } catch ( TimestampException $e ) {
155 // This shouldn't happen, but we've seen bad formats
156 // such as 4-digit seconds in the wild.
157 // leave $tags[$tag] as-is
158 }
159 continue;
160 }
161
162 // The contact info is a multi-valued field
163 // instead of the other props which are single
164 // valued (mostly) so handle as a special case.
165 if ( $tag === 'Contact' ) {
166 $vals = $this->collapseContactInfo( $vals );
167 continue;
168 }
169
170 foreach ( $vals as &$val ) {
171 switch ( $tag ) {
172 case 'Compression':
173 switch ( $val ) {
174 case 1:
175 case 2:
176 case 3:
177 case 4:
178 case 5:
179 case 6:
180 case 7:
181 case 8:
182 case 32773:
183 case 32946:
184 case 34712:
185 $val = $this->exifMsg( $tag, $val );
186 break;
187 default:
188 /* If not recognized, display as is. */
189 break;
190 }
191 break;
192
193 case 'PhotometricInterpretation':
194 switch ( $val ) {
195 case 0:
196 case 1:
197 case 2:
198 case 3:
199 case 4:
200 case 5:
201 case 6:
202 case 8:
203 case 9:
204 case 10:
205 case 32803:
206 case 34892:
207 $val = $this->exifMsg( $tag, $val );
208 break;
209 default:
210 /* If not recognized, display as is. */
211 break;
212 }
213 break;
214
215 case 'Orientation':
216 switch ( $val ) {
217 case 1:
218 case 2:
219 case 3:
220 case 4:
221 case 5:
222 case 6:
223 case 7:
224 case 8:
225 $val = $this->exifMsg( $tag, $val );
226 break;
227 default:
228 /* If not recognized, display as is. */
229 break;
230 }
231 break;
232
233 case 'PlanarConfiguration':
234 switch ( $val ) {
235 case 1:
236 case 2:
237 $val = $this->exifMsg( $tag, $val );
238 break;
239 default:
240 /* If not recognized, display as is. */
241 break;
242 }
243 break;
244
245 // TODO: YCbCrSubSampling
246 case 'YCbCrPositioning':
247 switch ( $val ) {
248 case 1:
249 case 2:
250 $val = $this->exifMsg( $tag, $val );
251 break;
252 default:
253 /* If not recognized, display as is. */
254 break;
255 }
256 break;
257
258 case 'XResolution':
259 case 'YResolution':
260 switch ( $resolutionunit ) {
261 case 2:
262 $val = $this->exifMsg( 'XYResolution', 'i', $this->formatNum( $val ) );
263 break;
264 case 3:
265 $val = $this->exifMsg( 'XYResolution', 'c', $this->formatNum( $val ) );
266 break;
267 default:
268 /* If not recognized, display as is. */
269 break;
270 }
271 break;
272
273 // TODO: YCbCrCoefficients #p27 (see annex E)
274 case 'ExifVersion':
275 case 'FlashpixVersion':
276 $val = "$val" / 100;
277 break;
278
279 case 'ColorSpace':
280 switch ( $val ) {
281 case 1:
282 case 65535:
283 $val = $this->exifMsg( $tag, $val );
284 break;
285 default:
286 /* If not recognized, display as is. */
287 break;
288 }
289 break;
290
291 case 'ComponentsConfiguration':
292 switch ( $val ) {
293 case 0:
294 case 1:
295 case 2:
296 case 3:
297 case 4:
298 case 5:
299 case 6:
300 $val = $this->exifMsg( $tag, $val );
301 break;
302 default:
303 /* If not recognized, display as is. */
304 break;
305 }
306 break;
307
308 case 'DateTime':
309 case 'DateTimeOriginal':
310 case 'DateTimeDigitized':
311 case 'DateTimeReleased':
312 case 'DateTimeExpires':
313 case 'GPSDateStamp':
314 case 'dc-date':
315 case 'DateTimeMetadata':
316 if ( $val == '0000:00:00 00:00:00' || $val == ' : : : : ' ) {
317 $val = $this->msg( 'exif-unknowndate' )->text();
318 } elseif ( preg_match(
319 '/^(?:\d{4}):(?:\d\d):(?:\d\d) (?:\d\d):(?:\d\d):(?:\d\d)$/D',
320 $val
321 ) ) {
322 // Full date.
323 $time = wfTimestamp( TS_MW, $val );
324 if ( $time && intval( $time ) > 0 ) {
325 $val = $this->getLanguage()->timeanddate( $time );
326 }
327 } elseif ( preg_match( '/^(?:\d{4}):(?:\d\d):(?:\d\d) (?:\d\d):(?:\d\d)$/D', $val ) ) {
328 // No second field. Still format the same
329 // since timeanddate doesn't include seconds anyways,
330 // but second still available in api
331 $time = wfTimestamp( TS_MW, $val . ':00' );
332 if ( $time && intval( $time ) > 0 ) {
333 $val = $this->getLanguage()->timeanddate( $time );
334 }
335 } elseif ( preg_match( '/^(?:\d{4}):(?:\d\d):(?:\d\d)$/D', $val ) ) {
336 // If only the date but not the time is filled in.
337 $time = wfTimestamp( TS_MW, substr( $val, 0, 4 )
338 . substr( $val, 5, 2 )
339 . substr( $val, 8, 2 )
340 . '000000' );
341 if ( $time && intval( $time ) > 0 ) {
342 $val = $this->getLanguage()->date( $time );
343 }
344 }
345 // else it will just output $val without formatting it.
346 break;
347
348 case 'ExposureProgram':
349 switch ( $val ) {
350 case 0:
351 case 1:
352 case 2:
353 case 3:
354 case 4:
355 case 5:
356 case 6:
357 case 7:
358 case 8:
359 $val = $this->exifMsg( $tag, $val );
360 break;
361 default:
362 /* If not recognized, display as is. */
363 break;
364 }
365 break;
366
367 case 'SubjectDistance':
368 $val = $this->exifMsg( $tag, '', $this->formatNum( $val ) );
369 break;
370
371 case 'MeteringMode':
372 switch ( $val ) {
373 case 0:
374 case 1:
375 case 2:
376 case 3:
377 case 4:
378 case 5:
379 case 6:
380 case 7:
381 case 255:
382 $val = $this->exifMsg( $tag, $val );
383 break;
384 default:
385 /* If not recognized, display as is. */
386 break;
387 }
388 break;
389
390 case 'LightSource':
391 switch ( $val ) {
392 case 0:
393 case 1:
394 case 2:
395 case 3:
396 case 4:
397 case 9:
398 case 10:
399 case 11:
400 case 12:
401 case 13:
402 case 14:
403 case 15:
404 case 17:
405 case 18:
406 case 19:
407 case 20:
408 case 21:
409 case 22:
410 case 23:
411 case 24:
412 case 255:
413 $val = $this->exifMsg( $tag, $val );
414 break;
415 default:
416 /* If not recognized, display as is. */
417 break;
418 }
419 break;
420
421 case 'Flash':
422 $flashDecode = [
423 'fired' => $val & 0b00000001,
424 'return' => ( $val & 0b00000110 ) >> 1,
425 'mode' => ( $val & 0b00011000 ) >> 3,
426 'function' => ( $val & 0b00100000 ) >> 5,
427 'redeye' => ( $val & 0b01000000 ) >> 6,
428 // 'reserved' => ( $val & 0b10000000 ) >> 7,
429 ];
430 $flashMsgs = [];
431 # We do not need to handle unknown values since all are used.
432 foreach ( $flashDecode as $subTag => $subValue ) {
433 # We do not need any message for zeroed values.
434 if ( $subTag != 'fired' && $subValue == 0 ) {
435 continue;
436 }
437 $fullTag = $tag . '-' . $subTag;
438 $flashMsgs[] = $this->exifMsg( $fullTag, $subValue );
439 }
440 $val = $this->getLanguage()->commaList( $flashMsgs );
441 break;
442
443 case 'FocalPlaneResolutionUnit':
444 switch ( $val ) {
445 case 2:
446 $val = $this->exifMsg( $tag, $val );
447 break;
448 default:
449 /* If not recognized, display as is. */
450 break;
451 }
452 break;
453
454 case 'SensingMethod':
455 switch ( $val ) {
456 case 1:
457 case 2:
458 case 3:
459 case 4:
460 case 5:
461 case 7:
462 case 8:
463 $val = $this->exifMsg( $tag, $val );
464 break;
465 default:
466 /* If not recognized, display as is. */
467 break;
468 }
469 break;
470
471 case 'FileSource':
472 switch ( $val ) {
473 case 3:
474 $val = $this->exifMsg( $tag, $val );
475 break;
476 default:
477 /* If not recognized, display as is. */
478 break;
479 }
480 break;
481
482 case 'SceneType':
483 switch ( $val ) {
484 case 1:
485 $val = $this->exifMsg( $tag, $val );
486 break;
487 default:
488 /* If not recognized, display as is. */
489 break;
490 }
491 break;
492
493 case 'CustomRendered':
494 switch ( $val ) {
495 case 0:
496 case 1:
497 $val = $this->exifMsg( $tag, $val );
498 break;
499 default:
500 /* If not recognized, display as is. */
501 break;
502 }
503 break;
504
505 case 'ExposureMode':
506 switch ( $val ) {
507 case 0:
508 case 1:
509 case 2:
510 $val = $this->exifMsg( $tag, $val );
511 break;
512 default:
513 /* If not recognized, display as is. */
514 break;
515 }
516 break;
517
518 case 'WhiteBalance':
519 switch ( $val ) {
520 case 0:
521 case 1:
522 $val = $this->exifMsg( $tag, $val );
523 break;
524 default:
525 /* If not recognized, display as is. */
526 break;
527 }
528 break;
529
530 case 'SceneCaptureType':
531 switch ( $val ) {
532 case 0:
533 case 1:
534 case 2:
535 case 3:
536 $val = $this->exifMsg( $tag, $val );
537 break;
538 default:
539 /* If not recognized, display as is. */
540 break;
541 }
542 break;
543
544 case 'GainControl':
545 switch ( $val ) {
546 case 0:
547 case 1:
548 case 2:
549 case 3:
550 case 4:
551 $val = $this->exifMsg( $tag, $val );
552 break;
553 default:
554 /* If not recognized, display as is. */
555 break;
556 }
557 break;
558
559 case 'Contrast':
560 switch ( $val ) {
561 case 0:
562 case 1:
563 case 2:
564 $val = $this->exifMsg( $tag, $val );
565 break;
566 default:
567 /* If not recognized, display as is. */
568 break;
569 }
570 break;
571
572 case 'Saturation':
573 switch ( $val ) {
574 case 0:
575 case 1:
576 case 2:
577 $val = $this->exifMsg( $tag, $val );
578 break;
579 default:
580 /* If not recognized, display as is. */
581 break;
582 }
583 break;
584
585 case 'Sharpness':
586 switch ( $val ) {
587 case 0:
588 case 1:
589 case 2:
590 $val = $this->exifMsg( $tag, $val );
591 break;
592 default:
593 /* If not recognized, display as is. */
594 break;
595 }
596 break;
597
598 case 'SubjectDistanceRange':
599 switch ( $val ) {
600 case 0:
601 case 1:
602 case 2:
603 case 3:
604 $val = $this->exifMsg( $tag, $val );
605 break;
606 default:
607 /* If not recognized, display as is. */
608 break;
609 }
610 break;
611
612 // The GPS...Ref values are kept for compatibility, probably won't be reached.
613 case 'GPSLatitudeRef':
614 case 'GPSDestLatitudeRef':
615 switch ( $val ) {
616 case 'N':
617 case 'S':
618 $val = $this->exifMsg( 'GPSLatitude', $val );
619 break;
620 default:
621 /* If not recognized, display as is. */
622 break;
623 }
624 break;
625
626 case 'GPSLongitudeRef':
627 case 'GPSDestLongitudeRef':
628 switch ( $val ) {
629 case 'E':
630 case 'W':
631 $val = $this->exifMsg( 'GPSLongitude', $val );
632 break;
633 default:
634 /* If not recognized, display as is. */
635 break;
636 }
637 break;
638
639 case 'GPSAltitude':
640 if ( $val < 0 ) {
641 $val = $this->exifMsg( 'GPSAltitude', 'below-sealevel', $this->formatNum( -$val, 3 ) );
642 } else {
643 $val = $this->exifMsg( 'GPSAltitude', 'above-sealevel', $this->formatNum( $val, 3 ) );
644 }
645 break;
646
647 case 'GPSStatus':
648 switch ( $val ) {
649 case 'A':
650 case 'V':
651 $val = $this->exifMsg( $tag, $val );
652 break;
653 default:
654 /* If not recognized, display as is. */
655 break;
656 }
657 break;
658
659 case 'GPSMeasureMode':
660 switch ( $val ) {
661 case 2:
662 case 3:
663 $val = $this->exifMsg( $tag, $val );
664 break;
665 default:
666 /* If not recognized, display as is. */
667 break;
668 }
669 break;
670
671 case 'GPSTrackRef':
672 case 'GPSImgDirectionRef':
673 case 'GPSDestBearingRef':
674 switch ( $val ) {
675 case 'T':
676 case 'M':
677 $val = $this->exifMsg( 'GPSDirection', $val );
678 break;
679 default:
680 /* If not recognized, display as is. */
681 break;
682 }
683 break;
684
685 case 'GPSLatitude':
686 case 'GPSDestLatitude':
687 $val = $this->formatCoords( $val, 'latitude' );
688 break;
689 case 'GPSLongitude':
690 case 'GPSDestLongitude':
691 $val = $this->formatCoords( $val, 'longitude' );
692 break;
693
694 case 'GPSSpeedRef':
695 switch ( $val ) {
696 case 'K':
697 case 'M':
698 case 'N':
699 $val = $this->exifMsg( 'GPSSpeed', $val );
700 break;
701 default:
702 /* If not recognized, display as is. */
703 break;
704 }
705 break;
706
707 case 'GPSDestDistanceRef':
708 switch ( $val ) {
709 case 'K':
710 case 'M':
711 case 'N':
712 $val = $this->exifMsg( 'GPSDestDistance', $val );
713 break;
714 default:
715 /* If not recognized, display as is. */
716 break;
717 }
718 break;
719
720 case 'GPSDOP':
721 // See https://en.wikipedia.org/wiki/Dilution_of_precision_(GPS)
722 if ( $val <= 2 ) {
723 $val = $this->exifMsg( $tag, 'excellent', $this->formatNum( $val ) );
724 } elseif ( $val <= 5 ) {
725 $val = $this->exifMsg( $tag, 'good', $this->formatNum( $val ) );
726 } elseif ( $val <= 10 ) {
727 $val = $this->exifMsg( $tag, 'moderate', $this->formatNum( $val ) );
728 } elseif ( $val <= 20 ) {
729 $val = $this->exifMsg( $tag, 'fair', $this->formatNum( $val ) );
730 } else {
731 $val = $this->exifMsg( $tag, 'poor', $this->formatNum( $val ) );
732 }
733 break;
734
735 // This is not in the Exif standard, just a special
736 // case for our purposes which enables wikis to wikify
737 // the make, model and software name to link to their articles.
738 case 'Make':
739 case 'Model':
740 $val = $this->exifMsg( $tag, '', $val );
741 break;
742
743 case 'Software':
744 if ( is_array( $val ) ) {
745 // if its a software, version array.
746 $val = $this->msg( 'exif-software-version-value', $val[0], $val[1] )->text();
747 } else {
748 $val = $this->exifMsg( $tag, '', $val );
749 }
750 break;
751
752 case 'ExposureTime':
753 // Show the pretty fraction as well as decimal version
754 $val = $this->msg( 'exif-exposuretime-format',
755 $this->formatFraction( $val ), $this->formatNum( $val ) )->text();
756 break;
757 case 'ISOSpeedRatings':
758 // If its = 65535 that means its at the
759 // limit of the size of Exif::short and
760 // is really higher.
761 if ( $val == '65535' ) {
762 $val = $this->exifMsg( $tag, 'overflow' );
763 } else {
764 $val = $this->formatNum( $val );
765 }
766 break;
767 case 'FNumber':
768 $val = $this->msg( 'exif-fnumber-format',
769 $this->formatNum( $val ) )->text();
770 break;
771
772 case 'FocalLength':
773 case 'FocalLengthIn35mmFilm':
774 $val = $this->msg( 'exif-focallength-format',
775 $this->formatNum( $val ) )->text();
776 break;
777
778 case 'MaxApertureValue':
779 if ( strpos( $val, '/' ) !== false ) {
780 // need to expand this earlier to calculate fNumber
781 list( $n, $d ) = explode( '/', $val );
782 if ( is_numeric( $n ) && is_numeric( $d ) ) {
783 $val = $n / $d;
784 }
785 }
786 if ( is_numeric( $val ) ) {
787 $fNumber = pow( 2, $val / 2 );
788 if ( $fNumber !== false ) {
789 $val = $this->msg( 'exif-maxaperturevalue-value',
790 $this->formatNum( $val ),
791 $this->formatNum( $fNumber, 2 )
792 )->text();
793 }
794 }
795 break;
796
797 case 'iimCategory':
798 switch ( strtolower( $val ) ) {
799 // See pg 29 of IPTC photo
800 // metadata standard.
801 case 'ace':
802 case 'clj':
803 case 'dis':
804 case 'fin':
805 case 'edu':
806 case 'evn':
807 case 'hth':
808 case 'hum':
809 case 'lab':
810 case 'lif':
811 case 'pol':
812 case 'rel':
813 case 'sci':
814 case 'soi':
815 case 'spo':
816 case 'war':
817 case 'wea':
818 $val = $this->exifMsg(
819 'iimcategory',
820 $val
821 );
822 }
823 break;
824 case 'SubjectNewsCode':
825 // Essentially like iimCategory.
826 // 8 (numeric) digit hierarchical
827 // classification. We decode the
828 // first 2 digits, which provide
829 // a broad category.
830 $val = $this->convertNewsCode( $val );
831 break;
832 case 'Urgency':
833 // 1-8 with 1 being highest, 5 normal
834 // 0 is reserved, and 9 is 'user-defined'.
835 $urgency = '';
836 if ( $val == 0 || $val == 9 ) {
837 $urgency = 'other';
838 } elseif ( $val < 5 && $val > 1 ) {
839 $urgency = 'high';
840 } elseif ( $val == 5 ) {
841 $urgency = 'normal';
842 } elseif ( $val <= 8 && $val > 5 ) {
843 $urgency = 'low';
844 }
845
846 if ( $urgency !== '' ) {
847 $val = $this->exifMsg( 'urgency',
848 $urgency, $val
849 );
850 }
851 break;
852
853 // Things that have a unit of pixels.
854 case 'OriginalImageHeight':
855 case 'OriginalImageWidth':
856 case 'PixelXDimension':
857 case 'PixelYDimension':
858 case 'ImageWidth':
859 case 'ImageLength':
860 $val = $this->formatNum( $val ) . ' ' . $this->msg( 'unit-pixel' )->text();
861 break;
862
863 // Do not transform fields with pure text.
864 // For some languages the formatNum()
865 // conversion results to wrong output like
866 // foo,bar@example,com or foo٫bar@example٫com.
867 // Also some 'numeric' things like Scene codes
868 // are included here as we really don't want
869 // commas inserted.
870 case 'ImageDescription':
871 case 'UserComment':
872 case 'Artist':
873 case 'Copyright':
874 case 'RelatedSoundFile':
875 case 'ImageUniqueID':
876 case 'SpectralSensitivity':
877 case 'GPSSatellites':
878 case 'GPSVersionID':
879 case 'GPSMapDatum':
880 case 'Keywords':
881 case 'WorldRegionDest':
882 case 'CountryDest':
883 case 'CountryCodeDest':
884 case 'ProvinceOrStateDest':
885 case 'CityDest':
886 case 'SublocationDest':
887 case 'WorldRegionCreated':
888 case 'CountryCreated':
889 case 'CountryCodeCreated':
890 case 'ProvinceOrStateCreated':
891 case 'CityCreated':
892 case 'SublocationCreated':
893 case 'ObjectName':
894 case 'SpecialInstructions':
895 case 'Headline':
896 case 'Credit':
897 case 'Source':
898 case 'EditStatus':
899 case 'FixtureIdentifier':
900 case 'LocationDest':
901 case 'LocationDestCode':
902 case 'Writer':
903 case 'JPEGFileComment':
904 case 'iimSupplementalCategory':
905 case 'OriginalTransmissionRef':
906 case 'Identifier':
907 case 'dc-contributor':
908 case 'dc-coverage':
909 case 'dc-publisher':
910 case 'dc-relation':
911 case 'dc-rights':
912 case 'dc-source':
913 case 'dc-type':
914 case 'Lens':
915 case 'SerialNumber':
916 case 'CameraOwnerName':
917 case 'Label':
918 case 'Nickname':
919 case 'RightsCertificate':
920 case 'CopyrightOwner':
921 case 'UsageTerms':
922 case 'WebStatement':
923 case 'OriginalDocumentID':
924 case 'LicenseUrl':
925 case 'MorePermissionsUrl':
926 case 'AttributionUrl':
927 case 'PreferredAttributionName':
928 case 'PNGFileComment':
929 case 'Disclaimer':
930 case 'ContentWarning':
931 case 'GIFFileComment':
932 case 'SceneCode':
933 case 'IntellectualGenre':
934 case 'Event':
935 case 'OrginisationInImage':
936 case 'PersonInImage':
937
938 $val = htmlspecialchars( $val );
939 break;
940
941 case 'ObjectCycle':
942 switch ( $val ) {
943 case 'a':
944 case 'p':
945 case 'b':
946 $val = $this->exifMsg( $tag, $val );
947 break;
948 default:
949 $val = htmlspecialchars( $val );
950 break;
951 }
952 break;
953 case 'Copyrighted':
954 switch ( $val ) {
955 case 'True':
956 case 'False':
957 $val = $this->exifMsg( $tag, $val );
958 break;
959 }
960 break;
961 case 'Rating':
962 if ( $val == '-1' ) {
963 $val = $this->exifMsg( $tag, 'rejected' );
964 } else {
965 $val = $this->formatNum( $val );
966 }
967 break;
968
969 case 'LanguageCode':
970 $lang = Language::fetchLanguageName( strtolower( $val ), $this->getLanguage()->getCode() );
971 if ( $lang ) {
972 $val = htmlspecialchars( $lang );
973 } else {
974 $val = htmlspecialchars( $val );
975 }
976 break;
977
978 default:
979 $val = $this->formatNum( $val );
980 break;
981 }
982 }
983 // End formatting values, start flattening arrays.
984 $vals = $this->flattenArrayReal( $vals, $type );
985 }
986
987 return $tags;
988 }
989
990 /**
991 * Flatten an array, using the content language for any messages.
992 *
993 * @param array $vals Array of values
994 * @param string $type Type of array (either lang, ul, ol).
995 * lang = language assoc array with keys being the lang code
996 * ul = unordered list, ol = ordered list
997 * type can also come from the '_type' member of $vals.
998 * @param bool $noHtml If to avoid returning anything resembling HTML.
999 * (Ugly hack for backwards compatibility with old MediaWiki).
1000 * @param bool|IContextSource $context
1001 * @return string Single value (in wiki-syntax).
1002 * @since 1.23
1003 */
1004 public static function flattenArrayContentLang( $vals, $type = 'ul',
1005 $noHtml = false, $context = false
1006 ) {
1007 global $wgContLang;
1008 $obj = new FormatMetadata;
1009 if ( $context ) {
1010 $obj->setContext( $context );
1011 }
1012 $context = new DerivativeContext( $obj->getContext() );
1013 $context->setLanguage( $wgContLang );
1014 $obj->setContext( $context );
1015
1016 return $obj->flattenArrayReal( $vals, $type, $noHtml );
1017 }
1018
1019 /**
1020 * A function to collapse multivalued tags into a single value.
1021 * This turns an array of (for example) authors into a bulleted list.
1022 *
1023 * This is public on the basis it might be useful outside of this class.
1024 *
1025 * @param array $vals Array of values
1026 * @param string $type Type of array (either lang, ul, ol).
1027 * lang = language assoc array with keys being the lang code
1028 * ul = unordered list, ol = ordered list
1029 * type can also come from the '_type' member of $vals.
1030 * @param bool $noHtml If to avoid returning anything resembling HTML.
1031 * (Ugly hack for backwards compatibility with old mediawiki).
1032 * @return string Single value (in wiki-syntax).
1033 * @since 1.23
1034 */
1035 public function flattenArrayReal( $vals, $type = 'ul', $noHtml = false ) {
1036 if ( !is_array( $vals ) ) {
1037 return $vals; // do nothing if not an array;
1038 }
1039
1040 if ( isset( $vals['_type'] ) ) {
1041 $type = $vals['_type'];
1042 unset( $vals['_type'] );
1043 }
1044
1045 if ( !is_array( $vals ) ) {
1046 return $vals; // do nothing if not an array;
1047 } elseif ( count( $vals ) === 1 && $type !== 'lang' ) {
1048 return $vals[0];
1049 } elseif ( count( $vals ) === 0 ) {
1050 wfDebug( __METHOD__ . " metadata array with 0 elements!\n" );
1051
1052 return ""; // paranoia. This should never happen
1053 } else {
1054 /* @todo FIXME: This should hide some of the list entries if there are
1055 * say more than four. Especially if a field is translated into 20
1056 * languages, we don't want to show them all by default
1057 */
1058 switch ( $type ) {
1059 case 'lang':
1060 // Display default, followed by ContLang,
1061 // followed by the rest in no particular
1062 // order.
1063
1064 // Todo: hide some items if really long list.
1065
1066 $content = '';
1067
1068 $priorityLanguages = $this->getPriorityLanguages();
1069 $defaultItem = false;
1070 $defaultLang = false;
1071
1072 // If default is set, save it for later,
1073 // as we don't know if it's equal to
1074 // one of the lang codes. (In xmp
1075 // you specify the language for a
1076 // default property by having both
1077 // a default prop, and one in the language
1078 // that are identical)
1079 if ( isset( $vals['x-default'] ) ) {
1080 $defaultItem = $vals['x-default'];
1081 unset( $vals['x-default'] );
1082 }
1083 foreach ( $priorityLanguages as $pLang ) {
1084 if ( isset( $vals[$pLang] ) ) {
1085 $isDefault = false;
1086 if ( $vals[$pLang] === $defaultItem ) {
1087 $defaultItem = false;
1088 $isDefault = true;
1089 }
1090 $content .= $this->langItem(
1091 $vals[$pLang], $pLang,
1092 $isDefault, $noHtml );
1093
1094 unset( $vals[$pLang] );
1095
1096 if ( $this->singleLang ) {
1097 return Html::rawElement( 'span',
1098 [ 'lang' => $pLang ], $vals[$pLang] );
1099 }
1100 }
1101 }
1102
1103 // Now do the rest.
1104 foreach ( $vals as $lang => $item ) {
1105 if ( $item === $defaultItem ) {
1106 $defaultLang = $lang;
1107 continue;
1108 }
1109 $content .= $this->langItem( $item,
1110 $lang, false, $noHtml );
1111 if ( $this->singleLang ) {
1112 return Html::rawElement( 'span',
1113 [ 'lang' => $lang ], $item );
1114 }
1115 }
1116 if ( $defaultItem !== false ) {
1117 $content = $this->langItem( $defaultItem,
1118 $defaultLang, true, $noHtml ) .
1119 $content;
1120 if ( $this->singleLang ) {
1121 return $defaultItem;
1122 }
1123 }
1124 if ( $noHtml ) {
1125 return $content;
1126 }
1127
1128 return '<ul class="metadata-langlist">' .
1129 $content .
1130 '</ul>';
1131 case 'ol':
1132 if ( $noHtml ) {
1133 return "\n#" . implode( "\n#", $vals );
1134 }
1135
1136 return "<ol><li>" . implode( "</li>\n<li>", $vals ) . '</li></ol>';
1137 case 'ul':
1138 default:
1139 if ( $noHtml ) {
1140 return "\n*" . implode( "\n*", $vals );
1141 }
1142
1143 return "<ul><li>" . implode( "</li>\n<li>", $vals ) . '</li></ul>';
1144 }
1145 }
1146 }
1147
1148 /** Helper function for creating lists of translations.
1149 *
1150 * @param string $value Value (this is not escaped)
1151 * @param string $lang Lang code of item or false
1152 * @param bool $default If it is default value.
1153 * @param bool $noHtml If to avoid html (for back-compat)
1154 * @throws MWException
1155 * @return string Language item (Note: despite how this looks, this is
1156 * treated as wikitext, not as HTML).
1157 */
1158 private function langItem( $value, $lang, $default = false, $noHtml = false ) {
1159 if ( $lang === false && $default === false ) {
1160 throw new MWException( '$lang and $default cannot both '
1161 . 'be false.' );
1162 }
1163
1164 if ( $noHtml ) {
1165 $wrappedValue = $value;
1166 } else {
1167 $wrappedValue = '<span class="mw-metadata-lang-value">'
1168 . $value . '</span>';
1169 }
1170
1171 if ( $lang === false ) {
1172 $msg = $this->msg( 'metadata-langitem-default', $wrappedValue );
1173 if ( $noHtml ) {
1174 return $msg->text() . "\n\n";
1175 } /* else */
1176
1177 return '<li class="mw-metadata-lang-default">'
1178 . $msg->text()
1179 . "</li>\n";
1180 }
1181
1182 $lowLang = strtolower( $lang );
1183 $langName = Language::fetchLanguageName( $lowLang );
1184 if ( $langName === '' ) {
1185 // try just the base language name. (aka en-US -> en ).
1186 list( $langPrefix ) = explode( '-', $lowLang, 2 );
1187 $langName = Language::fetchLanguageName( $langPrefix );
1188 if ( $langName === '' ) {
1189 // give up.
1190 $langName = $lang;
1191 }
1192 }
1193 // else we have a language specified
1194
1195 $msg = $this->msg( 'metadata-langitem', $wrappedValue, $langName, $lang );
1196 if ( $noHtml ) {
1197 return '*' . $msg->text();
1198 } /* else: */
1199
1200 $item = '<li class="mw-metadata-lang-code-'
1201 . $lang;
1202 if ( $default ) {
1203 $item .= ' mw-metadata-lang-default';
1204 }
1205 $item .= '" lang="' . $lang . '">';
1206 $item .= $msg->text();
1207 $item .= "</li>\n";
1208
1209 return $item;
1210 }
1211
1212 /**
1213 * Convenience function for getFormattedData()
1214 *
1215 * @param string $tag The tag name to pass on
1216 * @param string $val The value of the tag
1217 * @param string $arg An argument to pass ($1)
1218 * @param string $arg2 A 2nd argument to pass ($2)
1219 * @return string The text content of "exif-$tag-$val" message in lower case
1220 */
1221 private function exifMsg( $tag, $val, $arg = null, $arg2 = null ) {
1222 global $wgContLang;
1223
1224 if ( $val === '' ) {
1225 $val = 'value';
1226 }
1227
1228 return $this->msg( $wgContLang->lc( "exif-$tag-$val" ), $arg, $arg2 )->text();
1229 }
1230
1231 /**
1232 * Format a number, convert numbers from fractions into floating point
1233 * numbers, joins arrays of numbers with commas.
1234 *
1235 * @param mixed $num The value to format
1236 * @param float|int|bool $round Digits to round to or false.
1237 * @return mixed A floating point number or whatever we were fed
1238 */
1239 private function formatNum( $num, $round = false ) {
1240 $m = [];
1241 if ( is_array( $num ) ) {
1242 $out = [];
1243 foreach ( $num as $number ) {
1244 $out[] = $this->formatNum( $number );
1245 }
1246
1247 return $this->getLanguage()->commaList( $out );
1248 }
1249 if ( preg_match( '/^(-?\d+)\/(\d+)$/', $num, $m ) ) {
1250 if ( $m[2] != 0 ) {
1251 $newNum = $m[1] / $m[2];
1252 if ( $round !== false ) {
1253 $newNum = round( $newNum, $round );
1254 }
1255 } else {
1256 $newNum = $num;
1257 }
1258
1259 return $this->getLanguage()->formatNum( $newNum );
1260 } else {
1261 if ( is_numeric( $num ) && $round !== false ) {
1262 $num = round( $num, $round );
1263 }
1264
1265 return $this->getLanguage()->formatNum( $num );
1266 }
1267 }
1268
1269 /**
1270 * Format a rational number, reducing fractions
1271 *
1272 * @param mixed $num The value to format
1273 * @return mixed A floating point number or whatever we were fed
1274 */
1275 private function formatFraction( $num ) {
1276 $m = [];
1277 if ( preg_match( '/^(-?\d+)\/(\d+)$/', $num, $m ) ) {
1278 $numerator = intval( $m[1] );
1279 $denominator = intval( $m[2] );
1280 $gcd = $this->gcd( abs( $numerator ), $denominator );
1281 if ( $gcd != 0 ) {
1282 // 0 shouldn't happen! ;)
1283 return $this->formatNum( $numerator / $gcd ) . '/' . $this->formatNum( $denominator / $gcd );
1284 }
1285 }
1286
1287 return $this->formatNum( $num );
1288 }
1289
1290 /**
1291 * Calculate the greatest common divisor of two integers.
1292 *
1293 * @param int $a Numerator
1294 * @param int $b Denominator
1295 * @return int
1296 */
1297 private function gcd( $a, $b ) {
1298 /*
1299 // https://en.wikipedia.org/wiki/Euclidean_algorithm
1300 // Recursive form would be:
1301 if( $b == 0 )
1302 return $a;
1303 else
1304 return gcd( $b, $a % $b );
1305 */
1306 while ( $b != 0 ) {
1307 $remainder = $a % $b;
1308
1309 // tail recursion...
1310 $a = $b;
1311 $b = $remainder;
1312 }
1313
1314 return $a;
1315 }
1316
1317 /**
1318 * Fetch the human readable version of a news code.
1319 * A news code is an 8 digit code. The first two
1320 * digits are a general classification, so we just
1321 * translate that.
1322 *
1323 * Note, leading 0's are significant, so this is
1324 * a string, not an int.
1325 *
1326 * @param string $val The 8 digit news code.
1327 * @return string The human readable form
1328 */
1329 private function convertNewsCode( $val ) {
1330 if ( !preg_match( '/^\d{8}$/D', $val ) ) {
1331 // Not a valid news code.
1332 return $val;
1333 }
1334 $cat = '';
1335 switch ( substr( $val, 0, 2 ) ) {
1336 case '01':
1337 $cat = 'ace';
1338 break;
1339 case '02':
1340 $cat = 'clj';
1341 break;
1342 case '03':
1343 $cat = 'dis';
1344 break;
1345 case '04':
1346 $cat = 'fin';
1347 break;
1348 case '05':
1349 $cat = 'edu';
1350 break;
1351 case '06':
1352 $cat = 'evn';
1353 break;
1354 case '07':
1355 $cat = 'hth';
1356 break;
1357 case '08':
1358 $cat = 'hum';
1359 break;
1360 case '09':
1361 $cat = 'lab';
1362 break;
1363 case '10':
1364 $cat = 'lif';
1365 break;
1366 case '11':
1367 $cat = 'pol';
1368 break;
1369 case '12':
1370 $cat = 'rel';
1371 break;
1372 case '13':
1373 $cat = 'sci';
1374 break;
1375 case '14':
1376 $cat = 'soi';
1377 break;
1378 case '15':
1379 $cat = 'spo';
1380 break;
1381 case '16':
1382 $cat = 'war';
1383 break;
1384 case '17':
1385 $cat = 'wea';
1386 break;
1387 }
1388 if ( $cat !== '' ) {
1389 $catMsg = $this->exifMsg( 'iimcategory', $cat );
1390 $val = $this->exifMsg( 'subjectnewscode', '', $val, $catMsg );
1391 }
1392
1393 return $val;
1394 }
1395
1396 /**
1397 * Format a coordinate value, convert numbers from floating point
1398 * into degree minute second representation.
1399 *
1400 * @param int $coord Degrees, minutes and seconds
1401 * @param string $type Latitude or longitude (for if its a NWS or E)
1402 * @return mixed A floating point number or whatever we were fed
1403 */
1404 private function formatCoords( $coord, $type ) {
1405 $ref = '';
1406 if ( $coord < 0 ) {
1407 $nCoord = -$coord;
1408 if ( $type === 'latitude' ) {
1409 $ref = 'S';
1410 } elseif ( $type === 'longitude' ) {
1411 $ref = 'W';
1412 }
1413 } else {
1414 $nCoord = $coord;
1415 if ( $type === 'latitude' ) {
1416 $ref = 'N';
1417 } elseif ( $type === 'longitude' ) {
1418 $ref = 'E';
1419 }
1420 }
1421
1422 $deg = floor( $nCoord );
1423 $min = floor( ( $nCoord - $deg ) * 60.0 );
1424 $sec = round( ( ( $nCoord - $deg ) - $min / 60 ) * 3600, 2 );
1425
1426 $deg = $this->formatNum( $deg );
1427 $min = $this->formatNum( $min );
1428 $sec = $this->formatNum( $sec );
1429
1430 return $this->msg( 'exif-coordinate-format', $deg, $min, $sec, $ref, $coord )->text();
1431 }
1432
1433 /**
1434 * Format the contact info field into a single value.
1435 *
1436 * This function might be called from
1437 * JpegHandler::convertMetadataVersion which is why it is
1438 * public.
1439 *
1440 * @param array $vals Array with fields of the ContactInfo
1441 * struct defined in the IPTC4XMP spec. Or potentially
1442 * an array with one element that is a free form text
1443 * value from the older iptc iim 1:118 prop.
1444 * @return string HTML-ish looking wikitext
1445 * @since 1.23 no longer static
1446 */
1447 public function collapseContactInfo( $vals ) {
1448 if ( !( isset( $vals['CiAdrExtadr'] )
1449 || isset( $vals['CiAdrCity'] )
1450 || isset( $vals['CiAdrCtry'] )
1451 || isset( $vals['CiEmailWork'] )
1452 || isset( $vals['CiTelWork'] )
1453 || isset( $vals['CiAdrPcode'] )
1454 || isset( $vals['CiAdrRegion'] )
1455 || isset( $vals['CiUrlWork'] )
1456 ) ) {
1457 // We don't have any sub-properties
1458 // This could happen if its using old
1459 // iptc that just had this as a free-form
1460 // text value.
1461 // Note: We run this through htmlspecialchars
1462 // partially to be consistent, and partially
1463 // because people often insert >, etc into
1464 // the metadata which should not be interpreted
1465 // but we still want to auto-link urls.
1466 foreach ( $vals as &$val ) {
1467 $val = htmlspecialchars( $val );
1468 }
1469
1470 return $this->flattenArrayReal( $vals );
1471 } else {
1472 // We have a real ContactInfo field.
1473 // Its unclear if all these fields have to be
1474 // set, so assume they do not.
1475 $url = $tel = $street = $city = $country = '';
1476 $email = $postal = $region = '';
1477
1478 // Also note, some of the class names this uses
1479 // are similar to those used by hCard. This is
1480 // mostly because they're sensible names. This
1481 // does not (and does not attempt to) output
1482 // stuff in the hCard microformat. However it
1483 // might output in the adr microformat.
1484
1485 if ( isset( $vals['CiAdrExtadr'] ) ) {
1486 // Todo: This can potentially be multi-line.
1487 // Need to check how that works in XMP.
1488 $street = '<span class="extended-address">'
1489 . htmlspecialchars(
1490 $vals['CiAdrExtadr'] )
1491 . '</span>';
1492 }
1493 if ( isset( $vals['CiAdrCity'] ) ) {
1494 $city = '<span class="locality">'
1495 . htmlspecialchars( $vals['CiAdrCity'] )
1496 . '</span>';
1497 }
1498 if ( isset( $vals['CiAdrCtry'] ) ) {
1499 $country = '<span class="country-name">'
1500 . htmlspecialchars( $vals['CiAdrCtry'] )
1501 . '</span>';
1502 }
1503 if ( isset( $vals['CiEmailWork'] ) ) {
1504 $emails = [];
1505 // Have to split multiple emails at commas/new lines.
1506 $splitEmails = explode( "\n", $vals['CiEmailWork'] );
1507 foreach ( $splitEmails as $e1 ) {
1508 // Also split on comma
1509 foreach ( explode( ',', $e1 ) as $e2 ) {
1510 $finalEmail = trim( $e2 );
1511 if ( $finalEmail == ',' || $finalEmail == '' ) {
1512 continue;
1513 }
1514 if ( strpos( $finalEmail, '<' ) !== false ) {
1515 // Don't do fancy formatting to
1516 // "My name" <foo@bar.com> style stuff
1517 $emails[] = $finalEmail;
1518 } else {
1519 $emails[] = '[mailto:'
1520 . $finalEmail
1521 . ' <span class="email">'
1522 . $finalEmail
1523 . '</span>]';
1524 }
1525 }
1526 }
1527 $email = implode( ', ', $emails );
1528 }
1529 if ( isset( $vals['CiTelWork'] ) ) {
1530 $tel = '<span class="tel">'
1531 . htmlspecialchars( $vals['CiTelWork'] )
1532 . '</span>';
1533 }
1534 if ( isset( $vals['CiAdrPcode'] ) ) {
1535 $postal = '<span class="postal-code">'
1536 . htmlspecialchars(
1537 $vals['CiAdrPcode'] )
1538 . '</span>';
1539 }
1540 if ( isset( $vals['CiAdrRegion'] ) ) {
1541 // Note this is province/state.
1542 $region = '<span class="region">'
1543 . htmlspecialchars(
1544 $vals['CiAdrRegion'] )
1545 . '</span>';
1546 }
1547 if ( isset( $vals['CiUrlWork'] ) ) {
1548 $url = '<span class="url">'
1549 . htmlspecialchars( $vals['CiUrlWork'] )
1550 . '</span>';
1551 }
1552
1553 return $this->msg( 'exif-contact-value', $email, $url,
1554 $street, $city, $region, $postal, $country,
1555 $tel )->text();
1556 }
1557 }
1558
1559 /**
1560 * Get a list of fields that are visible by default.
1561 *
1562 * @return array
1563 * @since 1.23
1564 */
1565 public static function getVisibleFields() {
1566 $fields = [];
1567 $lines = explode( "\n", wfMessage( 'metadata-fields' )->inContentLanguage()->text() );
1568 foreach ( $lines as $line ) {
1569 $matches = [];
1570 if ( preg_match( '/^\\*\s*(.*?)\s*$/', $line, $matches ) ) {
1571 $fields[] = $matches[1];
1572 }
1573 }
1574 $fields = array_map( 'strtolower', $fields );
1575
1576 return $fields;
1577 }
1578
1579 /**
1580 * Get an array of extended metadata. (See the imageinfo API for format.)
1581 *
1582 * @param File $file File to use
1583 * @return array [<property name> => ['value' => <value>]], or [] on error
1584 * @since 1.23
1585 */
1586 public function fetchExtendedMetadata( File $file ) {
1587 $cache = ObjectCache::getMainWANInstance();
1588
1589 // If revision deleted, exit immediately
1590 if ( $file->isDeleted( File::DELETED_FILE ) ) {
1591 return [];
1592 }
1593
1594 $cacheKey = wfMemcKey(
1595 'getExtendedMetadata',
1596 $this->getLanguage()->getCode(),
1597 (int)$this->singleLang,
1598 $file->getSha1()
1599 );
1600
1601 $cachedValue = $cache->get( $cacheKey );
1602 if (
1603 $cachedValue
1604 && Hooks::run( 'ValidateExtendedMetadataCache', [ $cachedValue['timestamp'], $file ] )
1605 ) {
1606 $extendedMetadata = $cachedValue['data'];
1607 } else {
1608 $maxCacheTime = ( $file instanceof ForeignAPIFile ) ? 60 * 60 * 12 : 60 * 60 * 24 * 30;
1609 $fileMetadata = $this->getExtendedMetadataFromFile( $file );
1610 $extendedMetadata = $this->getExtendedMetadataFromHook( $file, $fileMetadata, $maxCacheTime );
1611 if ( $this->singleLang ) {
1612 $this->resolveMultilangMetadata( $extendedMetadata );
1613 }
1614 $this->discardMultipleValues( $extendedMetadata );
1615 // Make sure the metadata won't break the API when an XML format is used.
1616 // This is an API-specific function so it would be cleaner to call it from
1617 // outside fetchExtendedMetadata, but this way we don't need to redo the
1618 // computation on a cache hit.
1619 $this->sanitizeArrayForAPI( $extendedMetadata );
1620 $valueToCache = [ 'data' => $extendedMetadata, 'timestamp' => wfTimestampNow() ];
1621 $cache->set( $cacheKey, $valueToCache, $maxCacheTime );
1622 }
1623
1624 return $extendedMetadata;
1625 }
1626
1627 /**
1628 * Get file-based metadata in standardized format.
1629 *
1630 * Note that for a remote file, this might return metadata supplied by extensions.
1631 *
1632 * @param File $file File to use
1633 * @return array [<property name> => ['value' => <value>]], or [] on error
1634 * @since 1.23
1635 */
1636 protected function getExtendedMetadataFromFile( File $file ) {
1637 // If this is a remote file accessed via an API request, we already
1638 // have remote metadata so we just ignore any local one
1639 if ( $file instanceof ForeignAPIFile ) {
1640 // In case of error we pretend no metadata - this will get cached.
1641 // Might or might not be a good idea.
1642 return $file->getExtendedMetadata() ?: [];
1643 }
1644
1645 $uploadDate = wfTimestamp( TS_ISO_8601, $file->getTimestamp() );
1646
1647 $fileMetadata = [
1648 // This is modification time, which is close to "upload" time.
1649 'DateTime' => [
1650 'value' => $uploadDate,
1651 'source' => 'mediawiki-metadata',
1652 ],
1653 ];
1654
1655 $title = $file->getTitle();
1656 if ( $title ) {
1657 $text = $title->getText();
1658 $pos = strrpos( $text, '.' );
1659
1660 if ( $pos ) {
1661 $name = substr( $text, 0, $pos );
1662 } else {
1663 $name = $text;
1664 }
1665
1666 $fileMetadata['ObjectName'] = [
1667 'value' => $name,
1668 'source' => 'mediawiki-metadata',
1669 ];
1670 }
1671
1672 return $fileMetadata;
1673 }
1674
1675 /**
1676 * Get additional metadata from hooks in standardized format.
1677 *
1678 * @param File $file File to use
1679 * @param array $extendedMetadata
1680 * @param int $maxCacheTime Hook handlers might use this parameter to override cache time
1681 *
1682 * @return array [<property name> => ['value' => <value>]], or [] on error
1683 * @since 1.23
1684 */
1685 protected function getExtendedMetadataFromHook( File $file, array $extendedMetadata,
1686 &$maxCacheTime
1687 ) {
1688 Hooks::run( 'GetExtendedMetadata', [
1689 &$extendedMetadata,
1690 $file,
1691 $this->getContext(),
1692 $this->singleLang,
1693 &$maxCacheTime
1694 ] );
1695
1696 $visible = array_flip( self::getVisibleFields() );
1697 foreach ( $extendedMetadata as $key => $value ) {
1698 if ( !isset( $visible[strtolower( $key )] ) ) {
1699 $extendedMetadata[$key]['hidden'] = '';
1700 }
1701 }
1702
1703 return $extendedMetadata;
1704 }
1705
1706 /**
1707 * Turns an XMP-style multilang array into a single value.
1708 * If the value is not a multilang array, it is returned unchanged.
1709 * See mediawiki.org/wiki/Manual:File_metadata_handling#Multi-language_array_format
1710 * @param mixed $value
1711 * @return mixed Value in best language, null if there were no languages at all
1712 * @since 1.23
1713 */
1714 protected function resolveMultilangValue( $value ) {
1715 if (
1716 !is_array( $value )
1717 || !isset( $value['_type'] )
1718 || $value['_type'] != 'lang'
1719 ) {
1720 return $value; // do nothing if not a multilang array
1721 }
1722
1723 // choose the language best matching user or site settings
1724 $priorityLanguages = $this->getPriorityLanguages();
1725 foreach ( $priorityLanguages as $lang ) {
1726 if ( isset( $value[$lang] ) ) {
1727 return $value[$lang];
1728 }
1729 }
1730
1731 // otherwise go with the default language, if set
1732 if ( isset( $value['x-default'] ) ) {
1733 return $value['x-default'];
1734 }
1735
1736 // otherwise just return any one language
1737 unset( $value['_type'] );
1738 if ( !empty( $value ) ) {
1739 return reset( $value );
1740 }
1741
1742 // this should not happen; signal error
1743 return null;
1744 }
1745
1746 /**
1747 * Turns an XMP-style multivalue array into a single value by dropping all but the first
1748 * value. If the value is not a multivalue array (or a multivalue array inside a multilang
1749 * array), it is returned unchanged.
1750 * See mediawiki.org/wiki/Manual:File_metadata_handling#Multi-language_array_format
1751 * @param mixed $value
1752 * @return mixed The value, or the first value if there were multiple ones
1753 * @since 1.25
1754 */
1755 protected function resolveMultivalueValue( $value ) {
1756 if ( !is_array( $value ) ) {
1757 return $value;
1758 } elseif ( isset( $value['_type'] ) && $value['_type'] === 'lang' ) {
1759 // if this is a multilang array, process fields separately
1760 $newValue = [];
1761 foreach ( $value as $k => $v ) {
1762 $newValue[$k] = $this->resolveMultivalueValue( $v );
1763 }
1764 return $newValue;
1765 } else { // _type is 'ul' or 'ol' or missing in which case it defaults to 'ul'
1766 list( $k, $v ) = each( $value );
1767 if ( $k === '_type' ) {
1768 $v = current( $value );
1769 }
1770 return $v;
1771 }
1772 }
1773
1774 /**
1775 * Takes an array returned by the getExtendedMetadata* functions,
1776 * and resolves multi-language values in it.
1777 * @param array $metadata
1778 * @since 1.23
1779 */
1780 protected function resolveMultilangMetadata( &$metadata ) {
1781 if ( !is_array( $metadata ) ) {
1782 return;
1783 }
1784 foreach ( $metadata as &$field ) {
1785 if ( isset( $field['value'] ) ) {
1786 $field['value'] = $this->resolveMultilangValue( $field['value'] );
1787 }
1788 }
1789 }
1790
1791 /**
1792 * Takes an array returned by the getExtendedMetadata* functions,
1793 * and turns all fields into single-valued ones by dropping extra values.
1794 * @param array $metadata
1795 * @since 1.25
1796 */
1797 protected function discardMultipleValues( &$metadata ) {
1798 if ( !is_array( $metadata ) ) {
1799 return;
1800 }
1801 foreach ( $metadata as $key => &$field ) {
1802 if ( $key === 'Software' || $key === 'Contact' ) {
1803 // we skip some fields which have composite values. They are not particularly interesting
1804 // and you can get them via the metadata / commonmetadata APIs anyway.
1805 continue;
1806 }
1807 if ( isset( $field['value'] ) ) {
1808 $field['value'] = $this->resolveMultivalueValue( $field['value'] );
1809 }
1810 }
1811 }
1812
1813 /**
1814 * Makes sure the given array is a valid API response fragment
1815 * @param array $arr
1816 */
1817 protected function sanitizeArrayForAPI( &$arr ) {
1818 if ( !is_array( $arr ) ) {
1819 return;
1820 }
1821
1822 $counter = 1;
1823 foreach ( $arr as $key => &$value ) {
1824 $sanitizedKey = $this->sanitizeKeyForAPI( $key );
1825 if ( $sanitizedKey !== $key ) {
1826 if ( isset( $arr[$sanitizedKey] ) ) {
1827 // Make the sanitized keys hopefully unique.
1828 // To make it definitely unique would be too much effort, given that
1829 // sanitizing is only needed for misformatted metadata anyway, but
1830 // this at least covers the case when $arr is numeric.
1831 $sanitizedKey .= $counter;
1832 ++$counter;
1833 }
1834 $arr[$sanitizedKey] = $arr[$key];
1835 unset( $arr[$key] );
1836 }
1837 if ( is_array( $value ) ) {
1838 $this->sanitizeArrayForAPI( $value );
1839 }
1840 }
1841
1842 // Handle API metadata keys (particularly "_type")
1843 $keys = array_filter( array_keys( $arr ), 'ApiResult::isMetadataKey' );
1844 if ( $keys ) {
1845 ApiResult::setPreserveKeysList( $arr, $keys );
1846 }
1847 }
1848
1849 /**
1850 * Turns a string into a valid API identifier.
1851 * @param string $key
1852 * @return string
1853 * @since 1.23
1854 */
1855 protected function sanitizeKeyForAPI( $key ) {
1856 // drop all characters which are not valid in an XML tag name
1857 // a bunch of non-ASCII letters would be valid but probably won't
1858 // be used so we take the easy way
1859 $key = preg_replace( '/[^a-zA-z0-9_:.-]/', '', $key );
1860 // drop characters which are invalid at the first position
1861 $key = preg_replace( '/^[\d-.]+/', '', $key );
1862
1863 if ( $key == '' ) {
1864 $key = '_';
1865 }
1866
1867 // special case for an internal keyword
1868 if ( $key == '_element' ) {
1869 $key = 'element';
1870 }
1871
1872 return $key;
1873 }
1874
1875 /**
1876 * Returns a list of languages (first is best) to use when formatting multilang fields,
1877 * based on user and site preferences.
1878 * @return array
1879 * @since 1.23
1880 */
1881 protected function getPriorityLanguages() {
1882 $priorityLanguages =
1883 Language::getFallbacksIncludingSiteLanguage( $this->getLanguage()->getCode() );
1884 $priorityLanguages = array_merge(
1885 (array)$this->getLanguage()->getCode(),
1886 $priorityLanguages[0],
1887 $priorityLanguages[1]
1888 );
1889
1890 return $priorityLanguages;
1891 }
1892 }