Merge "Let Html::element do the HTML encoding"
[lhc/web/wiklou.git] / includes / media / FormatMetadata.php
1 <?php
2 /**
3 * Formatting of image metadata values into human readable form.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @ingroup Media
21 * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
22 * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason, 2009 Brent Garber, 2010 Brian Wolff
23 * @license GPL-2.0-or-later
24 * @see http://exif.org/Exif2-2.PDF The Exif 2.2 specification
25 * @file
26 */
27 use MediaWiki\MediaWikiServices;
28 use Wikimedia\Timestamp\TimestampException;
29
30 /**
31 * Format Image metadata values into a human readable form.
32 *
33 * Note lots of these messages use the prefix 'exif' even though
34 * they may not be exif properties. For example 'exif-ImageDescription'
35 * can be the Exif ImageDescription, or it could be the iptc-iim caption
36 * property, or it could be the xmp dc:description property. This
37 * is because these messages should be independent of how the data is
38 * stored, sine the user doesn't care if the description is stored in xmp,
39 * exif, etc only that its a description. (Additionally many of these properties
40 * are merged together following the MWG standard, such that for example,
41 * exif properties override XMP properties that mean the same thing if
42 * there is a conflict).
43 *
44 * It should perhaps use a prefix like 'metadata' instead, but there
45 * is already a large number of messages using the 'exif' prefix.
46 *
47 * @ingroup Media
48 * @since 1.23 the class extends ContextSource and various formerly-public
49 * internal methods are private
50 */
51 class FormatMetadata extends ContextSource {
52 /**
53 * Only output a single language for multi-language fields
54 * @var bool
55 * @since 1.23
56 */
57 protected $singleLang = false;
58
59 /**
60 * Trigger only outputting single language for multilanguage fields
61 *
62 * @param bool $val
63 * @since 1.23
64 */
65 public function setSingleLanguage( $val ) {
66 $this->singleLang = $val;
67 }
68
69 /**
70 * Numbers given by Exif user agents are often magical, that is they
71 * should be replaced by a detailed explanation depending on their
72 * value which most of the time are plain integers. This function
73 * formats Exif (and other metadata) values into human readable form.
74 *
75 * This is the usual entry point for this class.
76 *
77 * @param array $tags The Exif data to format ( as returned by
78 * Exif::getFilteredData() or BitmapMetadataHandler )
79 * @param bool|IContextSource $context Context to use (optional)
80 * @return array
81 */
82 public static function getFormattedData( $tags, $context = false ) {
83 $obj = new FormatMetadata;
84 if ( $context ) {
85 $obj->setContext( $context );
86 }
87
88 return $obj->makeFormattedData( $tags );
89 }
90
91 /**
92 * Numbers given by Exif user agents are often magical, that is they
93 * should be replaced by a detailed explanation depending on their
94 * value which most of the time are plain integers. This function
95 * formats Exif (and other metadata) values into human readable form.
96 *
97 * @param array $tags The Exif data to format ( as returned by
98 * Exif::getFilteredData() or BitmapMetadataHandler )
99 * @return array
100 * @since 1.23
101 */
102 public function makeFormattedData( $tags ) {
103 $resolutionunit = !isset( $tags['ResolutionUnit'] ) || $tags['ResolutionUnit'] == 2 ? 2 : 3;
104 unset( $tags['ResolutionUnit'] );
105
106 foreach ( $tags as $tag => &$vals ) {
107 // This seems ugly to wrap non-array's in an array just to unwrap again,
108 // especially when most of the time it is not an array
109 if ( !is_array( $tags[$tag] ) ) {
110 $vals = [ $vals ];
111 }
112
113 // _type is a special value to say what array type
114 if ( isset( $tags[$tag]['_type'] ) ) {
115 $type = $tags[$tag]['_type'];
116 unset( $vals['_type'] );
117 } else {
118 $type = 'ul'; // default unordered list.
119 }
120
121 // This is done differently as the tag is an array.
122 if ( $tag == 'GPSTimeStamp' && count( $vals ) === 3 ) {
123 // hour min sec array
124
125 $h = explode( '/', $vals[0] );
126 $m = explode( '/', $vals[1] );
127 $s = explode( '/', $vals[2] );
128
129 // this should already be validated
130 // when loaded from file, but it could
131 // come from a foreign repo, so be
132 // paranoid.
133 if ( !isset( $h[1] )
134 || !isset( $m[1] )
135 || !isset( $s[1] )
136 || $h[1] == 0
137 || $m[1] == 0
138 || $s[1] == 0
139 ) {
140 continue;
141 }
142 $tags[$tag] = str_pad( intval( $h[0] / $h[1] ), 2, '0', STR_PAD_LEFT )
143 . ':' . str_pad( intval( $m[0] / $m[1] ), 2, '0', STR_PAD_LEFT )
144 . ':' . str_pad( intval( $s[0] / $s[1] ), 2, '0', STR_PAD_LEFT );
145
146 try {
147 $time = wfTimestamp( TS_MW, '1971:01:01 ' . $tags[$tag] );
148 // the 1971:01:01 is just a placeholder, and not shown to user.
149 if ( $time && intval( $time ) > 0 ) {
150 $tags[$tag] = $this->getLanguage()->time( $time );
151 }
152 } catch ( TimestampException $e ) {
153 // This shouldn't happen, but we've seen bad formats
154 // such as 4-digit seconds in the wild.
155 // leave $tags[$tag] as-is
156 }
157 continue;
158 }
159
160 // The contact info is a multi-valued field
161 // instead of the other props which are single
162 // valued (mostly) so handle as a special case.
163 if ( $tag === 'Contact' ) {
164 $vals = $this->collapseContactInfo( $vals );
165 continue;
166 }
167
168 foreach ( $vals as &$val ) {
169 switch ( $tag ) {
170 case 'Compression':
171 switch ( $val ) {
172 case 1:
173 case 2:
174 case 3:
175 case 4:
176 case 5:
177 case 6:
178 case 7:
179 case 8:
180 case 32773:
181 case 32946:
182 case 34712:
183 $val = $this->exifMsg( $tag, $val );
184 break;
185 default:
186 /* If not recognized, display as is. */
187 break;
188 }
189 break;
190
191 case 'PhotometricInterpretation':
192 switch ( $val ) {
193 case 0:
194 case 1:
195 case 2:
196 case 3:
197 case 4:
198 case 5:
199 case 6:
200 case 8:
201 case 9:
202 case 10:
203 case 32803:
204 case 34892:
205 $val = $this->exifMsg( $tag, $val );
206 break;
207 default:
208 /* If not recognized, display as is. */
209 break;
210 }
211 break;
212
213 case 'Orientation':
214 switch ( $val ) {
215 case 1:
216 case 2:
217 case 3:
218 case 4:
219 case 5:
220 case 6:
221 case 7:
222 case 8:
223 $val = $this->exifMsg( $tag, $val );
224 break;
225 default:
226 /* If not recognized, display as is. */
227 break;
228 }
229 break;
230
231 case 'PlanarConfiguration':
232 switch ( $val ) {
233 case 1:
234 case 2:
235 $val = $this->exifMsg( $tag, $val );
236 break;
237 default:
238 /* If not recognized, display as is. */
239 break;
240 }
241 break;
242
243 // TODO: YCbCrSubSampling
244 case 'YCbCrPositioning':
245 switch ( $val ) {
246 case 1:
247 case 2:
248 $val = $this->exifMsg( $tag, $val );
249 break;
250 default:
251 /* If not recognized, display as is. */
252 break;
253 }
254 break;
255
256 case 'XResolution':
257 case 'YResolution':
258 switch ( $resolutionunit ) {
259 case 2:
260 $val = $this->exifMsg( 'XYResolution', 'i', $this->formatNum( $val ) );
261 break;
262 case 3:
263 $val = $this->exifMsg( 'XYResolution', 'c', $this->formatNum( $val ) );
264 break;
265 default:
266 /* If not recognized, display as is. */
267 break;
268 }
269 break;
270
271 // TODO: YCbCrCoefficients #p27 (see annex E)
272 case 'ExifVersion':
273 case 'FlashpixVersion':
274 $val = (int)$val / 100;
275 break;
276
277 case 'ColorSpace':
278 switch ( $val ) {
279 case 1:
280 case 65535:
281 $val = $this->exifMsg( $tag, $val );
282 break;
283 default:
284 /* If not recognized, display as is. */
285 break;
286 }
287 break;
288
289 case 'ComponentsConfiguration':
290 switch ( $val ) {
291 case 0:
292 case 1:
293 case 2:
294 case 3:
295 case 4:
296 case 5:
297 case 6:
298 $val = $this->exifMsg( $tag, $val );
299 break;
300 default:
301 /* If not recognized, display as is. */
302 break;
303 }
304 break;
305
306 case 'DateTime':
307 case 'DateTimeOriginal':
308 case 'DateTimeDigitized':
309 case 'DateTimeReleased':
310 case 'DateTimeExpires':
311 case 'GPSDateStamp':
312 case 'dc-date':
313 case 'DateTimeMetadata':
314 if ( $val == '0000:00:00 00:00:00' || $val == ' : : : : ' ) {
315 $val = $this->msg( 'exif-unknowndate' )->text();
316 } elseif ( preg_match(
317 '/^(?:\d{4}):(?:\d\d):(?:\d\d) (?:\d\d):(?:\d\d):(?:\d\d)$/D',
318 $val
319 ) ) {
320 // Full date.
321 $time = wfTimestamp( TS_MW, $val );
322 if ( $time && intval( $time ) > 0 ) {
323 $val = $this->getLanguage()->timeanddate( $time );
324 }
325 } elseif ( preg_match( '/^(?:\d{4}):(?:\d\d):(?:\d\d) (?:\d\d):(?:\d\d)$/D', $val ) ) {
326 // No second field. Still format the same
327 // since timeanddate doesn't include seconds anyways,
328 // but second still available in api
329 $time = wfTimestamp( TS_MW, $val . ':00' );
330 if ( $time && intval( $time ) > 0 ) {
331 $val = $this->getLanguage()->timeanddate( $time );
332 }
333 } elseif ( preg_match( '/^(?:\d{4}):(?:\d\d):(?:\d\d)$/D', $val ) ) {
334 // If only the date but not the time is filled in.
335 $time = wfTimestamp( TS_MW, substr( $val, 0, 4 )
336 . substr( $val, 5, 2 )
337 . substr( $val, 8, 2 )
338 . '000000' );
339 if ( $time && intval( $time ) > 0 ) {
340 $val = $this->getLanguage()->date( $time );
341 }
342 }
343 // else it will just output $val without formatting it.
344 break;
345
346 case 'ExposureProgram':
347 switch ( $val ) {
348 case 0:
349 case 1:
350 case 2:
351 case 3:
352 case 4:
353 case 5:
354 case 6:
355 case 7:
356 case 8:
357 $val = $this->exifMsg( $tag, $val );
358 break;
359 default:
360 /* If not recognized, display as is. */
361 break;
362 }
363 break;
364
365 case 'SubjectDistance':
366 $val = $this->exifMsg( $tag, '', $this->formatNum( $val ) );
367 break;
368
369 case 'MeteringMode':
370 switch ( $val ) {
371 case 0:
372 case 1:
373 case 2:
374 case 3:
375 case 4:
376 case 5:
377 case 6:
378 case 7:
379 case 255:
380 $val = $this->exifMsg( $tag, $val );
381 break;
382 default:
383 /* If not recognized, display as is. */
384 break;
385 }
386 break;
387
388 case 'LightSource':
389 switch ( $val ) {
390 case 0:
391 case 1:
392 case 2:
393 case 3:
394 case 4:
395 case 9:
396 case 10:
397 case 11:
398 case 12:
399 case 13:
400 case 14:
401 case 15:
402 case 17:
403 case 18:
404 case 19:
405 case 20:
406 case 21:
407 case 22:
408 case 23:
409 case 24:
410 case 255:
411 $val = $this->exifMsg( $tag, $val );
412 break;
413 default:
414 /* If not recognized, display as is. */
415 break;
416 }
417 break;
418
419 case 'Flash':
420 $flashDecode = [
421 'fired' => $val & 0b00000001,
422 'return' => ( $val & 0b00000110 ) >> 1,
423 'mode' => ( $val & 0b00011000 ) >> 3,
424 'function' => ( $val & 0b00100000 ) >> 5,
425 'redeye' => ( $val & 0b01000000 ) >> 6,
426 // 'reserved' => ( $val & 0b10000000 ) >> 7,
427 ];
428 $flashMsgs = [];
429 # We do not need to handle unknown values since all are used.
430 foreach ( $flashDecode as $subTag => $subValue ) {
431 # We do not need any message for zeroed values.
432 if ( $subTag != 'fired' && $subValue == 0 ) {
433 continue;
434 }
435 $fullTag = $tag . '-' . $subTag;
436 $flashMsgs[] = $this->exifMsg( $fullTag, $subValue );
437 }
438 $val = $this->getLanguage()->commaList( $flashMsgs );
439 break;
440
441 case 'FocalPlaneResolutionUnit':
442 switch ( $val ) {
443 case 2:
444 $val = $this->exifMsg( $tag, $val );
445 break;
446 default:
447 /* If not recognized, display as is. */
448 break;
449 }
450 break;
451
452 case 'SensingMethod':
453 switch ( $val ) {
454 case 1:
455 case 2:
456 case 3:
457 case 4:
458 case 5:
459 case 7:
460 case 8:
461 $val = $this->exifMsg( $tag, $val );
462 break;
463 default:
464 /* If not recognized, display as is. */
465 break;
466 }
467 break;
468
469 case 'FileSource':
470 switch ( $val ) {
471 case 3:
472 $val = $this->exifMsg( $tag, $val );
473 break;
474 default:
475 /* If not recognized, display as is. */
476 break;
477 }
478 break;
479
480 case 'SceneType':
481 switch ( $val ) {
482 case 1:
483 $val = $this->exifMsg( $tag, $val );
484 break;
485 default:
486 /* If not recognized, display as is. */
487 break;
488 }
489 break;
490
491 case 'CustomRendered':
492 switch ( $val ) {
493 case 0: /* normal */
494 case 1: /* custom */
495 /* The following are unofficial Apple additions */
496 case 2: /* HDR (no original saved) */
497 case 3: /* HDR (original saved) */
498 case 4: /* Original (for HDR) */
499 /* Yes 5 is not present ;) */
500 case 6: /* Panorama */
501 case 7: /* Portrait HDR */
502 case 8: /* Portrait */
503 $val = $this->exifMsg( $tag, $val );
504 break;
505 default:
506 /* If not recognized, display as is. */
507 break;
508 }
509 break;
510
511 case 'ExposureMode':
512 switch ( $val ) {
513 case 0:
514 case 1:
515 case 2:
516 $val = $this->exifMsg( $tag, $val );
517 break;
518 default:
519 /* If not recognized, display as is. */
520 break;
521 }
522 break;
523
524 case 'WhiteBalance':
525 switch ( $val ) {
526 case 0:
527 case 1:
528 $val = $this->exifMsg( $tag, $val );
529 break;
530 default:
531 /* If not recognized, display as is. */
532 break;
533 }
534 break;
535
536 case 'SceneCaptureType':
537 switch ( $val ) {
538 case 0:
539 case 1:
540 case 2:
541 case 3:
542 $val = $this->exifMsg( $tag, $val );
543 break;
544 default:
545 /* If not recognized, display as is. */
546 break;
547 }
548 break;
549
550 case 'GainControl':
551 switch ( $val ) {
552 case 0:
553 case 1:
554 case 2:
555 case 3:
556 case 4:
557 $val = $this->exifMsg( $tag, $val );
558 break;
559 default:
560 /* If not recognized, display as is. */
561 break;
562 }
563 break;
564
565 case 'Contrast':
566 switch ( $val ) {
567 case 0:
568 case 1:
569 case 2:
570 $val = $this->exifMsg( $tag, $val );
571 break;
572 default:
573 /* If not recognized, display as is. */
574 break;
575 }
576 break;
577
578 case 'Saturation':
579 switch ( $val ) {
580 case 0:
581 case 1:
582 case 2:
583 $val = $this->exifMsg( $tag, $val );
584 break;
585 default:
586 /* If not recognized, display as is. */
587 break;
588 }
589 break;
590
591 case 'Sharpness':
592 switch ( $val ) {
593 case 0:
594 case 1:
595 case 2:
596 $val = $this->exifMsg( $tag, $val );
597 break;
598 default:
599 /* If not recognized, display as is. */
600 break;
601 }
602 break;
603
604 case 'SubjectDistanceRange':
605 switch ( $val ) {
606 case 0:
607 case 1:
608 case 2:
609 case 3:
610 $val = $this->exifMsg( $tag, $val );
611 break;
612 default:
613 /* If not recognized, display as is. */
614 break;
615 }
616 break;
617
618 // The GPS...Ref values are kept for compatibility, probably won't be reached.
619 case 'GPSLatitudeRef':
620 case 'GPSDestLatitudeRef':
621 switch ( $val ) {
622 case 'N':
623 case 'S':
624 $val = $this->exifMsg( 'GPSLatitude', $val );
625 break;
626 default:
627 /* If not recognized, display as is. */
628 break;
629 }
630 break;
631
632 case 'GPSLongitudeRef':
633 case 'GPSDestLongitudeRef':
634 switch ( $val ) {
635 case 'E':
636 case 'W':
637 $val = $this->exifMsg( 'GPSLongitude', $val );
638 break;
639 default:
640 /* If not recognized, display as is. */
641 break;
642 }
643 break;
644
645 case 'GPSAltitude':
646 if ( $val < 0 ) {
647 $val = $this->exifMsg( 'GPSAltitude', 'below-sealevel', $this->formatNum( -$val, 3 ) );
648 } else {
649 $val = $this->exifMsg( 'GPSAltitude', 'above-sealevel', $this->formatNum( $val, 3 ) );
650 }
651 break;
652
653 case 'GPSStatus':
654 switch ( $val ) {
655 case 'A':
656 case 'V':
657 $val = $this->exifMsg( $tag, $val );
658 break;
659 default:
660 /* If not recognized, display as is. */
661 break;
662 }
663 break;
664
665 case 'GPSMeasureMode':
666 switch ( $val ) {
667 case 2:
668 case 3:
669 $val = $this->exifMsg( $tag, $val );
670 break;
671 default:
672 /* If not recognized, display as is. */
673 break;
674 }
675 break;
676
677 case 'GPSTrackRef':
678 case 'GPSImgDirectionRef':
679 case 'GPSDestBearingRef':
680 switch ( $val ) {
681 case 'T':
682 case 'M':
683 $val = $this->exifMsg( 'GPSDirection', $val );
684 break;
685 default:
686 /* If not recognized, display as is. */
687 break;
688 }
689 break;
690
691 case 'GPSLatitude':
692 case 'GPSDestLatitude':
693 $val = $this->formatCoords( $val, 'latitude' );
694 break;
695 case 'GPSLongitude':
696 case 'GPSDestLongitude':
697 $val = $this->formatCoords( $val, 'longitude' );
698 break;
699
700 case 'GPSSpeedRef':
701 switch ( $val ) {
702 case 'K':
703 case 'M':
704 case 'N':
705 $val = $this->exifMsg( 'GPSSpeed', $val );
706 break;
707 default:
708 /* If not recognized, display as is. */
709 break;
710 }
711 break;
712
713 case 'GPSDestDistanceRef':
714 switch ( $val ) {
715 case 'K':
716 case 'M':
717 case 'N':
718 $val = $this->exifMsg( 'GPSDestDistance', $val );
719 break;
720 default:
721 /* If not recognized, display as is. */
722 break;
723 }
724 break;
725
726 case 'GPSDOP':
727 // See https://en.wikipedia.org/wiki/Dilution_of_precision_(GPS)
728 if ( $val <= 2 ) {
729 $val = $this->exifMsg( $tag, 'excellent', $this->formatNum( $val ) );
730 } elseif ( $val <= 5 ) {
731 $val = $this->exifMsg( $tag, 'good', $this->formatNum( $val ) );
732 } elseif ( $val <= 10 ) {
733 $val = $this->exifMsg( $tag, 'moderate', $this->formatNum( $val ) );
734 } elseif ( $val <= 20 ) {
735 $val = $this->exifMsg( $tag, 'fair', $this->formatNum( $val ) );
736 } else {
737 $val = $this->exifMsg( $tag, 'poor', $this->formatNum( $val ) );
738 }
739 break;
740
741 // This is not in the Exif standard, just a special
742 // case for our purposes which enables wikis to wikify
743 // the make, model and software name to link to their articles.
744 case 'Make':
745 case 'Model':
746 $val = $this->exifMsg( $tag, '', $val );
747 break;
748
749 case 'Software':
750 if ( is_array( $val ) ) {
751 if ( count( $val ) > 1 ) {
752 // if its a software, version array.
753 $val = $this->msg( 'exif-software-version-value', $val[0], $val[1] )->text();
754 } else {
755 // https://phabricator.wikimedia.org/T178130
756 $val = $this->exifMsg( $tag, '', $val[0] );
757 }
758 } else {
759 $val = $this->exifMsg( $tag, '', $val );
760 }
761 break;
762
763 case 'ExposureTime':
764 // Show the pretty fraction as well as decimal version
765 $val = $this->msg( 'exif-exposuretime-format',
766 $this->formatFraction( $val ), $this->formatNum( $val ) )->text();
767 break;
768 case 'ISOSpeedRatings':
769 // If its = 65535 that means its at the
770 // limit of the size of Exif::short and
771 // is really higher.
772 if ( $val == '65535' ) {
773 $val = $this->exifMsg( $tag, 'overflow' );
774 } else {
775 $val = $this->formatNum( $val );
776 }
777 break;
778 case 'FNumber':
779 $val = $this->msg( 'exif-fnumber-format',
780 $this->formatNum( $val ) )->text();
781 break;
782
783 case 'FocalLength':
784 case 'FocalLengthIn35mmFilm':
785 $val = $this->msg( 'exif-focallength-format',
786 $this->formatNum( $val ) )->text();
787 break;
788
789 case 'MaxApertureValue':
790 if ( strpos( $val, '/' ) !== false ) {
791 // need to expand this earlier to calculate fNumber
792 list( $n, $d ) = explode( '/', $val );
793 if ( is_numeric( $n ) && is_numeric( $d ) ) {
794 $val = $n / $d;
795 }
796 }
797 if ( is_numeric( $val ) ) {
798 $fNumber = 2 ** ( $val / 2 );
799 if ( $fNumber !== false ) {
800 $val = $this->msg( 'exif-maxaperturevalue-value',
801 $this->formatNum( $val ),
802 $this->formatNum( $fNumber, 2 )
803 )->text();
804 }
805 }
806 break;
807
808 case 'iimCategory':
809 switch ( strtolower( $val ) ) {
810 // See pg 29 of IPTC photo
811 // metadata standard.
812 case 'ace':
813 case 'clj':
814 case 'dis':
815 case 'fin':
816 case 'edu':
817 case 'evn':
818 case 'hth':
819 case 'hum':
820 case 'lab':
821 case 'lif':
822 case 'pol':
823 case 'rel':
824 case 'sci':
825 case 'soi':
826 case 'spo':
827 case 'war':
828 case 'wea':
829 $val = $this->exifMsg(
830 'iimcategory',
831 $val
832 );
833 }
834 break;
835 case 'SubjectNewsCode':
836 // Essentially like iimCategory.
837 // 8 (numeric) digit hierarchical
838 // classification. We decode the
839 // first 2 digits, which provide
840 // a broad category.
841 $val = $this->convertNewsCode( $val );
842 break;
843 case 'Urgency':
844 // 1-8 with 1 being highest, 5 normal
845 // 0 is reserved, and 9 is 'user-defined'.
846 $urgency = '';
847 if ( $val == 0 || $val == 9 ) {
848 $urgency = 'other';
849 } elseif ( $val < 5 && $val > 1 ) {
850 $urgency = 'high';
851 } elseif ( $val == 5 ) {
852 $urgency = 'normal';
853 } elseif ( $val <= 8 && $val > 5 ) {
854 $urgency = 'low';
855 }
856
857 if ( $urgency !== '' ) {
858 $val = $this->exifMsg( 'urgency',
859 $urgency, $val
860 );
861 }
862 break;
863
864 // Things that have a unit of pixels.
865 case 'OriginalImageHeight':
866 case 'OriginalImageWidth':
867 case 'PixelXDimension':
868 case 'PixelYDimension':
869 case 'ImageWidth':
870 case 'ImageLength':
871 $val = $this->formatNum( $val ) . ' ' . $this->msg( 'unit-pixel' )->text();
872 break;
873
874 // Do not transform fields with pure text.
875 // For some languages the formatNum()
876 // conversion results to wrong output like
877 // foo,bar@example,com or foo٫bar@example٫com.
878 // Also some 'numeric' things like Scene codes
879 // are included here as we really don't want
880 // commas inserted.
881 case 'ImageDescription':
882 case 'UserComment':
883 case 'Artist':
884 case 'Copyright':
885 case 'RelatedSoundFile':
886 case 'ImageUniqueID':
887 case 'SpectralSensitivity':
888 case 'GPSSatellites':
889 case 'GPSVersionID':
890 case 'GPSMapDatum':
891 case 'Keywords':
892 case 'WorldRegionDest':
893 case 'CountryDest':
894 case 'CountryCodeDest':
895 case 'ProvinceOrStateDest':
896 case 'CityDest':
897 case 'SublocationDest':
898 case 'WorldRegionCreated':
899 case 'CountryCreated':
900 case 'CountryCodeCreated':
901 case 'ProvinceOrStateCreated':
902 case 'CityCreated':
903 case 'SublocationCreated':
904 case 'ObjectName':
905 case 'SpecialInstructions':
906 case 'Headline':
907 case 'Credit':
908 case 'Source':
909 case 'EditStatus':
910 case 'FixtureIdentifier':
911 case 'LocationDest':
912 case 'LocationDestCode':
913 case 'Writer':
914 case 'JPEGFileComment':
915 case 'iimSupplementalCategory':
916 case 'OriginalTransmissionRef':
917 case 'Identifier':
918 case 'dc-contributor':
919 case 'dc-coverage':
920 case 'dc-publisher':
921 case 'dc-relation':
922 case 'dc-rights':
923 case 'dc-source':
924 case 'dc-type':
925 case 'Lens':
926 case 'SerialNumber':
927 case 'CameraOwnerName':
928 case 'Label':
929 case 'Nickname':
930 case 'RightsCertificate':
931 case 'CopyrightOwner':
932 case 'UsageTerms':
933 case 'WebStatement':
934 case 'OriginalDocumentID':
935 case 'LicenseUrl':
936 case 'MorePermissionsUrl':
937 case 'AttributionUrl':
938 case 'PreferredAttributionName':
939 case 'PNGFileComment':
940 case 'Disclaimer':
941 case 'ContentWarning':
942 case 'GIFFileComment':
943 case 'SceneCode':
944 case 'IntellectualGenre':
945 case 'Event':
946 case 'OrginisationInImage':
947 case 'PersonInImage':
948
949 $val = htmlspecialchars( $val );
950 break;
951
952 case 'ObjectCycle':
953 switch ( $val ) {
954 case 'a':
955 case 'p':
956 case 'b':
957 $val = $this->exifMsg( $tag, $val );
958 break;
959 default:
960 $val = htmlspecialchars( $val );
961 break;
962 }
963 break;
964 case 'Copyrighted':
965 switch ( $val ) {
966 case 'True':
967 case 'False':
968 $val = $this->exifMsg( $tag, $val );
969 break;
970 }
971 break;
972 case 'Rating':
973 if ( $val == '-1' ) {
974 $val = $this->exifMsg( $tag, 'rejected' );
975 } else {
976 $val = $this->formatNum( $val );
977 }
978 break;
979
980 case 'LanguageCode':
981 $lang = Language::fetchLanguageName( strtolower( $val ), $this->getLanguage()->getCode() );
982 $val = htmlspecialchars( $lang ?: $val );
983 break;
984
985 default:
986 $val = $this->formatNum( $val );
987 break;
988 }
989 }
990 // End formatting values, start flattening arrays.
991 $vals = $this->flattenArrayReal( $vals, $type );
992 }
993
994 return $tags;
995 }
996
997 /**
998 * Flatten an array, using the content language for any messages.
999 *
1000 * @param array $vals Array of values
1001 * @param string $type Type of array (either lang, ul, ol).
1002 * lang = language assoc array with keys being the lang code
1003 * ul = unordered list, ol = ordered list
1004 * type can also come from the '_type' member of $vals.
1005 * @param bool $noHtml If to avoid returning anything resembling HTML.
1006 * (Ugly hack for backwards compatibility with old MediaWiki).
1007 * @param bool|IContextSource $context
1008 * @return string Single value (in wiki-syntax).
1009 * @since 1.23
1010 */
1011 public static function flattenArrayContentLang( $vals, $type = 'ul',
1012 $noHtml = false, $context = false
1013 ) {
1014 $obj = new FormatMetadata;
1015 if ( $context ) {
1016 $obj->setContext( $context );
1017 }
1018 $context = new DerivativeContext( $obj->getContext() );
1019 $context->setLanguage( MediaWikiServices::getInstance()->getContentLanguage() );
1020 $obj->setContext( $context );
1021
1022 return $obj->flattenArrayReal( $vals, $type, $noHtml );
1023 }
1024
1025 /**
1026 * A function to collapse multivalued tags into a single value.
1027 * This turns an array of (for example) authors into a bulleted list.
1028 *
1029 * This is public on the basis it might be useful outside of this class.
1030 *
1031 * @param array $vals Array of values
1032 * @param string $type Type of array (either lang, ul, ol).
1033 * lang = language assoc array with keys being the lang code
1034 * ul = unordered list, ol = ordered list
1035 * type can also come from the '_type' member of $vals.
1036 * @param bool $noHtml If to avoid returning anything resembling HTML.
1037 * (Ugly hack for backwards compatibility with old mediawiki).
1038 * @return string Single value (in wiki-syntax).
1039 * @since 1.23
1040 */
1041 public function flattenArrayReal( $vals, $type = 'ul', $noHtml = false ) {
1042 if ( !is_array( $vals ) ) {
1043 return $vals; // do nothing if not an array;
1044 }
1045
1046 if ( isset( $vals['_type'] ) ) {
1047 $type = $vals['_type'];
1048 unset( $vals['_type'] );
1049 }
1050
1051 if ( !is_array( $vals ) ) {
1052 return $vals; // do nothing if not an array;
1053 } elseif ( count( $vals ) === 1 && $type !== 'lang' && isset( $vals[0] ) ) {
1054 return $vals[0];
1055 } elseif ( count( $vals ) === 0 ) {
1056 wfDebug( __METHOD__ . " metadata array with 0 elements!\n" );
1057
1058 return ""; // paranoia. This should never happen
1059 } else {
1060 /* @todo FIXME: This should hide some of the list entries if there are
1061 * say more than four. Especially if a field is translated into 20
1062 * languages, we don't want to show them all by default
1063 */
1064 switch ( $type ) {
1065 case 'lang':
1066 // Display default, followed by ContentLanguage,
1067 // followed by the rest in no particular
1068 // order.
1069
1070 // Todo: hide some items if really long list.
1071
1072 $content = '';
1073
1074 $priorityLanguages = $this->getPriorityLanguages();
1075 $defaultItem = false;
1076 $defaultLang = false;
1077
1078 // If default is set, save it for later,
1079 // as we don't know if it's equal to
1080 // one of the lang codes. (In xmp
1081 // you specify the language for a
1082 // default property by having both
1083 // a default prop, and one in the language
1084 // that are identical)
1085 if ( isset( $vals['x-default'] ) ) {
1086 $defaultItem = $vals['x-default'];
1087 unset( $vals['x-default'] );
1088 }
1089 foreach ( $priorityLanguages as $pLang ) {
1090 if ( isset( $vals[$pLang] ) ) {
1091 $isDefault = false;
1092 if ( $vals[$pLang] === $defaultItem ) {
1093 $defaultItem = false;
1094 $isDefault = true;
1095 }
1096 $content .= $this->langItem(
1097 $vals[$pLang], $pLang,
1098 $isDefault, $noHtml );
1099
1100 unset( $vals[$pLang] );
1101
1102 if ( $this->singleLang ) {
1103 return Html::rawElement( 'span',
1104 [ 'lang' => $pLang ], $vals[$pLang] );
1105 }
1106 }
1107 }
1108
1109 // Now do the rest.
1110 foreach ( $vals as $lang => $item ) {
1111 if ( $item === $defaultItem ) {
1112 $defaultLang = $lang;
1113 continue;
1114 }
1115 $content .= $this->langItem( $item,
1116 $lang, false, $noHtml );
1117 if ( $this->singleLang ) {
1118 return Html::rawElement( 'span',
1119 [ 'lang' => $lang ], $item );
1120 }
1121 }
1122 if ( $defaultItem !== false ) {
1123 $content = $this->langItem( $defaultItem,
1124 $defaultLang, true, $noHtml ) .
1125 $content;
1126 if ( $this->singleLang ) {
1127 return $defaultItem;
1128 }
1129 }
1130 if ( $noHtml ) {
1131 return $content;
1132 }
1133
1134 return '<ul class="metadata-langlist">' .
1135 $content .
1136 '</ul>';
1137 case 'ol':
1138 if ( $noHtml ) {
1139 return "\n#" . implode( "\n#", $vals );
1140 }
1141
1142 return "<ol><li>" . implode( "</li>\n<li>", $vals ) . '</li></ol>';
1143 case 'ul':
1144 default:
1145 if ( $noHtml ) {
1146 return "\n*" . implode( "\n*", $vals );
1147 }
1148
1149 return "<ul><li>" . implode( "</li>\n<li>", $vals ) . '</li></ul>';
1150 }
1151 }
1152 }
1153
1154 /** Helper function for creating lists of translations.
1155 *
1156 * @param string $value Value (this is not escaped)
1157 * @param string $lang Lang code of item or false
1158 * @param bool $default If it is default value.
1159 * @param bool $noHtml If to avoid html (for back-compat)
1160 * @throws MWException
1161 * @return string Language item (Note: despite how this looks, this is
1162 * treated as wikitext, not as HTML).
1163 */
1164 private function langItem( $value, $lang, $default = false, $noHtml = false ) {
1165 if ( $lang === false && $default === false ) {
1166 throw new MWException( '$lang and $default cannot both '
1167 . 'be false.' );
1168 }
1169
1170 if ( $noHtml ) {
1171 $wrappedValue = $value;
1172 } else {
1173 $wrappedValue = '<span class="mw-metadata-lang-value">'
1174 . $value . '</span>';
1175 }
1176
1177 if ( $lang === false ) {
1178 $msg = $this->msg( 'metadata-langitem-default', $wrappedValue );
1179 if ( $noHtml ) {
1180 return $msg->text() . "\n\n";
1181 } /* else */
1182
1183 return '<li class="mw-metadata-lang-default">'
1184 . $msg->text()
1185 . "</li>\n";
1186 }
1187
1188 $lowLang = strtolower( $lang );
1189 $langName = Language::fetchLanguageName( $lowLang );
1190 if ( $langName === '' ) {
1191 // try just the base language name. (aka en-US -> en ).
1192 $langPrefix = explode( '-', $lowLang, 2 )[0];
1193 $langName = Language::fetchLanguageName( $langPrefix );
1194 if ( $langName === '' ) {
1195 // give up.
1196 $langName = $lang;
1197 }
1198 }
1199 // else we have a language specified
1200
1201 $msg = $this->msg( 'metadata-langitem', $wrappedValue, $langName, $lang );
1202 if ( $noHtml ) {
1203 return '*' . $msg->text();
1204 } /* else: */
1205
1206 $item = '<li class="mw-metadata-lang-code-'
1207 . $lang;
1208 if ( $default ) {
1209 $item .= ' mw-metadata-lang-default';
1210 }
1211 $item .= '" lang="' . $lang . '">';
1212 $item .= $msg->text();
1213 $item .= "</li>\n";
1214
1215 return $item;
1216 }
1217
1218 /**
1219 * Convenience function for getFormattedData()
1220 *
1221 * @param string $tag The tag name to pass on
1222 * @param string $val The value of the tag
1223 * @param string $arg An argument to pass ($1)
1224 * @param string $arg2 A 2nd argument to pass ($2)
1225 * @return string The text content of "exif-$tag-$val" message in lower case
1226 */
1227 private function exifMsg( $tag, $val, $arg = null, $arg2 = null ) {
1228 if ( $val === '' ) {
1229 $val = 'value';
1230 }
1231
1232 return $this->msg(
1233 MediaWikiServices::getInstance()->getContentLanguage()->lc( "exif-$tag-$val" ),
1234 $arg,
1235 $arg2
1236 )->text();
1237 }
1238
1239 /**
1240 * Format a number, convert numbers from fractions into floating point
1241 * numbers, joins arrays of numbers with commas.
1242 *
1243 * @param mixed $num The value to format
1244 * @param float|int|bool $round Digits to round to or false.
1245 * @return mixed A floating point number or whatever we were fed
1246 */
1247 private function formatNum( $num, $round = false ) {
1248 $m = [];
1249 if ( is_array( $num ) ) {
1250 $out = [];
1251 foreach ( $num as $number ) {
1252 $out[] = $this->formatNum( $number );
1253 }
1254
1255 return $this->getLanguage()->commaList( $out );
1256 }
1257 if ( preg_match( '/^(-?\d+)\/(\d+)$/', $num, $m ) ) {
1258 if ( $m[2] != 0 ) {
1259 $newNum = $m[1] / $m[2];
1260 if ( $round !== false ) {
1261 $newNum = round( $newNum, $round );
1262 }
1263 } else {
1264 $newNum = $num;
1265 }
1266
1267 return $this->getLanguage()->formatNum( $newNum );
1268 } else {
1269 if ( is_numeric( $num ) && $round !== false ) {
1270 $num = round( $num, $round );
1271 }
1272
1273 return $this->getLanguage()->formatNum( $num );
1274 }
1275 }
1276
1277 /**
1278 * Format a rational number, reducing fractions
1279 *
1280 * @param mixed $num The value to format
1281 * @return mixed A floating point number or whatever we were fed
1282 */
1283 private function formatFraction( $num ) {
1284 $m = [];
1285 if ( preg_match( '/^(-?\d+)\/(\d+)$/', $num, $m ) ) {
1286 $numerator = intval( $m[1] );
1287 $denominator = intval( $m[2] );
1288 $gcd = $this->gcd( abs( $numerator ), $denominator );
1289 if ( $gcd != 0 ) {
1290 // 0 shouldn't happen! ;)
1291 return $this->formatNum( $numerator / $gcd ) . '/' . $this->formatNum( $denominator / $gcd );
1292 }
1293 }
1294
1295 return $this->formatNum( $num );
1296 }
1297
1298 /**
1299 * Calculate the greatest common divisor of two integers.
1300 *
1301 * @param int $a Numerator
1302 * @param int $b Denominator
1303 * @return int
1304 */
1305 private function gcd( $a, $b ) {
1306 /*
1307 // https://en.wikipedia.org/wiki/Euclidean_algorithm
1308 // Recursive form would be:
1309 if( $b == 0 )
1310 return $a;
1311 else
1312 return gcd( $b, $a % $b );
1313 */
1314 while ( $b != 0 ) {
1315 $remainder = $a % $b;
1316
1317 // tail recursion...
1318 $a = $b;
1319 $b = $remainder;
1320 }
1321
1322 return $a;
1323 }
1324
1325 /**
1326 * Fetch the human readable version of a news code.
1327 * A news code is an 8 digit code. The first two
1328 * digits are a general classification, so we just
1329 * translate that.
1330 *
1331 * Note, leading 0's are significant, so this is
1332 * a string, not an int.
1333 *
1334 * @param string $val The 8 digit news code.
1335 * @return string The human readable form
1336 */
1337 private function convertNewsCode( $val ) {
1338 if ( !preg_match( '/^\d{8}$/D', $val ) ) {
1339 // Not a valid news code.
1340 return $val;
1341 }
1342 $cat = '';
1343 switch ( substr( $val, 0, 2 ) ) {
1344 case '01':
1345 $cat = 'ace';
1346 break;
1347 case '02':
1348 $cat = 'clj';
1349 break;
1350 case '03':
1351 $cat = 'dis';
1352 break;
1353 case '04':
1354 $cat = 'fin';
1355 break;
1356 case '05':
1357 $cat = 'edu';
1358 break;
1359 case '06':
1360 $cat = 'evn';
1361 break;
1362 case '07':
1363 $cat = 'hth';
1364 break;
1365 case '08':
1366 $cat = 'hum';
1367 break;
1368 case '09':
1369 $cat = 'lab';
1370 break;
1371 case '10':
1372 $cat = 'lif';
1373 break;
1374 case '11':
1375 $cat = 'pol';
1376 break;
1377 case '12':
1378 $cat = 'rel';
1379 break;
1380 case '13':
1381 $cat = 'sci';
1382 break;
1383 case '14':
1384 $cat = 'soi';
1385 break;
1386 case '15':
1387 $cat = 'spo';
1388 break;
1389 case '16':
1390 $cat = 'war';
1391 break;
1392 case '17':
1393 $cat = 'wea';
1394 break;
1395 }
1396 if ( $cat !== '' ) {
1397 $catMsg = $this->exifMsg( 'iimcategory', $cat );
1398 $val = $this->exifMsg( 'subjectnewscode', '', $val, $catMsg );
1399 }
1400
1401 return $val;
1402 }
1403
1404 /**
1405 * Format a coordinate value, convert numbers from floating point
1406 * into degree minute second representation.
1407 *
1408 * @param int $coord Degrees, minutes and seconds
1409 * @param string $type Latitude or longitude (for if its a NWS or E)
1410 * @return mixed A floating point number or whatever we were fed
1411 */
1412 private function formatCoords( $coord, $type ) {
1413 $ref = '';
1414 if ( $coord < 0 ) {
1415 $nCoord = -$coord;
1416 if ( $type === 'latitude' ) {
1417 $ref = 'S';
1418 } elseif ( $type === 'longitude' ) {
1419 $ref = 'W';
1420 }
1421 } else {
1422 $nCoord = $coord;
1423 if ( $type === 'latitude' ) {
1424 $ref = 'N';
1425 } elseif ( $type === 'longitude' ) {
1426 $ref = 'E';
1427 }
1428 }
1429
1430 $deg = floor( $nCoord );
1431 $min = floor( ( $nCoord - $deg ) * 60.0 );
1432 $sec = round( ( ( $nCoord - $deg ) - $min / 60 ) * 3600, 2 );
1433
1434 $deg = $this->formatNum( $deg );
1435 $min = $this->formatNum( $min );
1436 $sec = $this->formatNum( $sec );
1437
1438 return $this->msg( 'exif-coordinate-format', $deg, $min, $sec, $ref, $coord )->text();
1439 }
1440
1441 /**
1442 * Format the contact info field into a single value.
1443 *
1444 * This function might be called from
1445 * JpegHandler::convertMetadataVersion which is why it is
1446 * public.
1447 *
1448 * @param array $vals Array with fields of the ContactInfo
1449 * struct defined in the IPTC4XMP spec. Or potentially
1450 * an array with one element that is a free form text
1451 * value from the older iptc iim 1:118 prop.
1452 * @return string HTML-ish looking wikitext
1453 * @since 1.23 no longer static
1454 */
1455 public function collapseContactInfo( $vals ) {
1456 if ( !( isset( $vals['CiAdrExtadr'] )
1457 || isset( $vals['CiAdrCity'] )
1458 || isset( $vals['CiAdrCtry'] )
1459 || isset( $vals['CiEmailWork'] )
1460 || isset( $vals['CiTelWork'] )
1461 || isset( $vals['CiAdrPcode'] )
1462 || isset( $vals['CiAdrRegion'] )
1463 || isset( $vals['CiUrlWork'] )
1464 ) ) {
1465 // We don't have any sub-properties
1466 // This could happen if its using old
1467 // iptc that just had this as a free-form
1468 // text value.
1469 // Note: We run this through htmlspecialchars
1470 // partially to be consistent, and partially
1471 // because people often insert >, etc into
1472 // the metadata which should not be interpreted
1473 // but we still want to auto-link urls.
1474 foreach ( $vals as &$val ) {
1475 $val = htmlspecialchars( $val );
1476 }
1477
1478 return $this->flattenArrayReal( $vals );
1479 } else {
1480 // We have a real ContactInfo field.
1481 // Its unclear if all these fields have to be
1482 // set, so assume they do not.
1483 $url = $tel = $street = $city = $country = '';
1484 $email = $postal = $region = '';
1485
1486 // Also note, some of the class names this uses
1487 // are similar to those used by hCard. This is
1488 // mostly because they're sensible names. This
1489 // does not (and does not attempt to) output
1490 // stuff in the hCard microformat. However it
1491 // might output in the adr microformat.
1492
1493 if ( isset( $vals['CiAdrExtadr'] ) ) {
1494 // Todo: This can potentially be multi-line.
1495 // Need to check how that works in XMP.
1496 $street = '<span class="extended-address">'
1497 . htmlspecialchars(
1498 $vals['CiAdrExtadr'] )
1499 . '</span>';
1500 }
1501 if ( isset( $vals['CiAdrCity'] ) ) {
1502 $city = '<span class="locality">'
1503 . htmlspecialchars( $vals['CiAdrCity'] )
1504 . '</span>';
1505 }
1506 if ( isset( $vals['CiAdrCtry'] ) ) {
1507 $country = '<span class="country-name">'
1508 . htmlspecialchars( $vals['CiAdrCtry'] )
1509 . '</span>';
1510 }
1511 if ( isset( $vals['CiEmailWork'] ) ) {
1512 $emails = [];
1513 // Have to split multiple emails at commas/new lines.
1514 $splitEmails = explode( "\n", $vals['CiEmailWork'] );
1515 foreach ( $splitEmails as $e1 ) {
1516 // Also split on comma
1517 foreach ( explode( ',', $e1 ) as $e2 ) {
1518 $finalEmail = trim( $e2 );
1519 if ( $finalEmail == ',' || $finalEmail == '' ) {
1520 continue;
1521 }
1522 if ( strpos( $finalEmail, '<' ) !== false ) {
1523 // Don't do fancy formatting to
1524 // "My name" <foo@bar.com> style stuff
1525 $emails[] = $finalEmail;
1526 } else {
1527 $emails[] = '[mailto:'
1528 . $finalEmail
1529 . ' <span class="email">'
1530 . $finalEmail
1531 . '</span>]';
1532 }
1533 }
1534 }
1535 $email = implode( ', ', $emails );
1536 }
1537 if ( isset( $vals['CiTelWork'] ) ) {
1538 $tel = '<span class="tel">'
1539 . htmlspecialchars( $vals['CiTelWork'] )
1540 . '</span>';
1541 }
1542 if ( isset( $vals['CiAdrPcode'] ) ) {
1543 $postal = '<span class="postal-code">'
1544 . htmlspecialchars(
1545 $vals['CiAdrPcode'] )
1546 . '</span>';
1547 }
1548 if ( isset( $vals['CiAdrRegion'] ) ) {
1549 // Note this is province/state.
1550 $region = '<span class="region">'
1551 . htmlspecialchars(
1552 $vals['CiAdrRegion'] )
1553 . '</span>';
1554 }
1555 if ( isset( $vals['CiUrlWork'] ) ) {
1556 $url = '<span class="url">'
1557 . htmlspecialchars( $vals['CiUrlWork'] )
1558 . '</span>';
1559 }
1560
1561 return $this->msg( 'exif-contact-value', $email, $url,
1562 $street, $city, $region, $postal, $country,
1563 $tel )->text();
1564 }
1565 }
1566
1567 /**
1568 * Get a list of fields that are visible by default.
1569 *
1570 * @return array
1571 * @since 1.23
1572 */
1573 public static function getVisibleFields() {
1574 $fields = [];
1575 $lines = explode( "\n", wfMessage( 'metadata-fields' )->inContentLanguage()->text() );
1576 foreach ( $lines as $line ) {
1577 $matches = [];
1578 if ( preg_match( '/^\\*\s*(.*?)\s*$/', $line, $matches ) ) {
1579 $fields[] = $matches[1];
1580 }
1581 }
1582 $fields = array_map( 'strtolower', $fields );
1583
1584 return $fields;
1585 }
1586
1587 /**
1588 * Get an array of extended metadata. (See the imageinfo API for format.)
1589 *
1590 * @param File $file File to use
1591 * @return array [<property name> => ['value' => <value>]], or [] on error
1592 * @since 1.23
1593 */
1594 public function fetchExtendedMetadata( File $file ) {
1595 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
1596
1597 // If revision deleted, exit immediately
1598 if ( $file->isDeleted( File::DELETED_FILE ) ) {
1599 return [];
1600 }
1601
1602 $cacheKey = $cache->makeKey(
1603 'getExtendedMetadata',
1604 $this->getLanguage()->getCode(),
1605 (int)$this->singleLang,
1606 $file->getSha1()
1607 );
1608
1609 $cachedValue = $cache->get( $cacheKey );
1610 if (
1611 $cachedValue
1612 && Hooks::run( 'ValidateExtendedMetadataCache', [ $cachedValue['timestamp'], $file ] )
1613 ) {
1614 $extendedMetadata = $cachedValue['data'];
1615 } else {
1616 $maxCacheTime = ( $file instanceof ForeignAPIFile ) ? 60 * 60 * 12 : 60 * 60 * 24 * 30;
1617 $fileMetadata = $this->getExtendedMetadataFromFile( $file );
1618 $extendedMetadata = $this->getExtendedMetadataFromHook( $file, $fileMetadata, $maxCacheTime );
1619 if ( $this->singleLang ) {
1620 $this->resolveMultilangMetadata( $extendedMetadata );
1621 }
1622 $this->discardMultipleValues( $extendedMetadata );
1623 // Make sure the metadata won't break the API when an XML format is used.
1624 // This is an API-specific function so it would be cleaner to call it from
1625 // outside fetchExtendedMetadata, but this way we don't need to redo the
1626 // computation on a cache hit.
1627 $this->sanitizeArrayForAPI( $extendedMetadata );
1628 $valueToCache = [ 'data' => $extendedMetadata, 'timestamp' => wfTimestampNow() ];
1629 $cache->set( $cacheKey, $valueToCache, $maxCacheTime );
1630 }
1631
1632 return $extendedMetadata;
1633 }
1634
1635 /**
1636 * Get file-based metadata in standardized format.
1637 *
1638 * Note that for a remote file, this might return metadata supplied by extensions.
1639 *
1640 * @param File $file File to use
1641 * @return array [<property name> => ['value' => <value>]], or [] on error
1642 * @since 1.23
1643 */
1644 protected function getExtendedMetadataFromFile( File $file ) {
1645 // If this is a remote file accessed via an API request, we already
1646 // have remote metadata so we just ignore any local one
1647 if ( $file instanceof ForeignAPIFile ) {
1648 // In case of error we pretend no metadata - this will get cached.
1649 // Might or might not be a good idea.
1650 return $file->getExtendedMetadata() ?: [];
1651 }
1652
1653 $uploadDate = wfTimestamp( TS_ISO_8601, $file->getTimestamp() );
1654
1655 $fileMetadata = [
1656 // This is modification time, which is close to "upload" time.
1657 'DateTime' => [
1658 'value' => $uploadDate,
1659 'source' => 'mediawiki-metadata',
1660 ],
1661 ];
1662
1663 $title = $file->getTitle();
1664 if ( $title ) {
1665 $text = $title->getText();
1666 $pos = strrpos( $text, '.' );
1667
1668 if ( $pos ) {
1669 $name = substr( $text, 0, $pos );
1670 } else {
1671 $name = $text;
1672 }
1673
1674 $fileMetadata['ObjectName'] = [
1675 'value' => $name,
1676 'source' => 'mediawiki-metadata',
1677 ];
1678 }
1679
1680 return $fileMetadata;
1681 }
1682
1683 /**
1684 * Get additional metadata from hooks in standardized format.
1685 *
1686 * @param File $file File to use
1687 * @param array $extendedMetadata
1688 * @param int &$maxCacheTime Hook handlers might use this parameter to override cache time
1689 *
1690 * @return array [<property name> => ['value' => <value>]], or [] on error
1691 * @since 1.23
1692 */
1693 protected function getExtendedMetadataFromHook( File $file, array $extendedMetadata,
1694 &$maxCacheTime
1695 ) {
1696 Hooks::run( 'GetExtendedMetadata', [
1697 &$extendedMetadata,
1698 $file,
1699 $this->getContext(),
1700 $this->singleLang,
1701 &$maxCacheTime
1702 ] );
1703
1704 $visible = array_flip( self::getVisibleFields() );
1705 foreach ( $extendedMetadata as $key => $value ) {
1706 if ( !isset( $visible[strtolower( $key )] ) ) {
1707 $extendedMetadata[$key]['hidden'] = '';
1708 }
1709 }
1710
1711 return $extendedMetadata;
1712 }
1713
1714 /**
1715 * Turns an XMP-style multilang array into a single value.
1716 * If the value is not a multilang array, it is returned unchanged.
1717 * See mediawiki.org/wiki/Manual:File_metadata_handling#Multi-language_array_format
1718 * @param mixed $value
1719 * @return mixed Value in best language, null if there were no languages at all
1720 * @since 1.23
1721 */
1722 protected function resolveMultilangValue( $value ) {
1723 if (
1724 !is_array( $value )
1725 || !isset( $value['_type'] )
1726 || $value['_type'] != 'lang'
1727 ) {
1728 return $value; // do nothing if not a multilang array
1729 }
1730
1731 // choose the language best matching user or site settings
1732 $priorityLanguages = $this->getPriorityLanguages();
1733 foreach ( $priorityLanguages as $lang ) {
1734 if ( isset( $value[$lang] ) ) {
1735 return $value[$lang];
1736 }
1737 }
1738
1739 // otherwise go with the default language, if set
1740 if ( isset( $value['x-default'] ) ) {
1741 return $value['x-default'];
1742 }
1743
1744 // otherwise just return any one language
1745 unset( $value['_type'] );
1746 if ( !empty( $value ) ) {
1747 return reset( $value );
1748 }
1749
1750 // this should not happen; signal error
1751 return null;
1752 }
1753
1754 /**
1755 * Turns an XMP-style multivalue array into a single value by dropping all but the first
1756 * value. If the value is not a multivalue array (or a multivalue array inside a multilang
1757 * array), it is returned unchanged.
1758 * See mediawiki.org/wiki/Manual:File_metadata_handling#Multi-language_array_format
1759 * @param mixed $value
1760 * @return mixed The value, or the first value if there were multiple ones
1761 * @since 1.25
1762 */
1763 protected function resolveMultivalueValue( $value ) {
1764 if ( !is_array( $value ) ) {
1765 return $value;
1766 } elseif ( isset( $value['_type'] ) && $value['_type'] === 'lang' ) {
1767 // if this is a multilang array, process fields separately
1768 $newValue = [];
1769 foreach ( $value as $k => $v ) {
1770 $newValue[$k] = $this->resolveMultivalueValue( $v );
1771 }
1772 return $newValue;
1773 } else { // _type is 'ul' or 'ol' or missing in which case it defaults to 'ul'
1774 $v = reset( $value );
1775 if ( key( $value ) === '_type' ) {
1776 $v = next( $value );
1777 }
1778 return $v;
1779 }
1780 }
1781
1782 /**
1783 * Takes an array returned by the getExtendedMetadata* functions,
1784 * and resolves multi-language values in it.
1785 * @param array &$metadata
1786 * @since 1.23
1787 */
1788 protected function resolveMultilangMetadata( &$metadata ) {
1789 if ( !is_array( $metadata ) ) {
1790 return;
1791 }
1792 foreach ( $metadata as &$field ) {
1793 if ( isset( $field['value'] ) ) {
1794 $field['value'] = $this->resolveMultilangValue( $field['value'] );
1795 }
1796 }
1797 }
1798
1799 /**
1800 * Takes an array returned by the getExtendedMetadata* functions,
1801 * and turns all fields into single-valued ones by dropping extra values.
1802 * @param array &$metadata
1803 * @since 1.25
1804 */
1805 protected function discardMultipleValues( &$metadata ) {
1806 if ( !is_array( $metadata ) ) {
1807 return;
1808 }
1809 foreach ( $metadata as $key => &$field ) {
1810 if ( $key === 'Software' || $key === 'Contact' ) {
1811 // we skip some fields which have composite values. They are not particularly interesting
1812 // and you can get them via the metadata / commonmetadata APIs anyway.
1813 continue;
1814 }
1815 if ( isset( $field['value'] ) ) {
1816 $field['value'] = $this->resolveMultivalueValue( $field['value'] );
1817 }
1818 }
1819 }
1820
1821 /**
1822 * Makes sure the given array is a valid API response fragment
1823 * @param array &$arr
1824 */
1825 protected function sanitizeArrayForAPI( &$arr ) {
1826 if ( !is_array( $arr ) ) {
1827 return;
1828 }
1829
1830 $counter = 1;
1831 foreach ( $arr as $key => &$value ) {
1832 $sanitizedKey = $this->sanitizeKeyForAPI( $key );
1833 if ( $sanitizedKey !== $key ) {
1834 if ( isset( $arr[$sanitizedKey] ) ) {
1835 // Make the sanitized keys hopefully unique.
1836 // To make it definitely unique would be too much effort, given that
1837 // sanitizing is only needed for misformatted metadata anyway, but
1838 // this at least covers the case when $arr is numeric.
1839 $sanitizedKey .= $counter;
1840 ++$counter;
1841 }
1842 $arr[$sanitizedKey] = $arr[$key];
1843 unset( $arr[$key] );
1844 }
1845 if ( is_array( $value ) ) {
1846 $this->sanitizeArrayForAPI( $value );
1847 }
1848 }
1849
1850 // Handle API metadata keys (particularly "_type")
1851 $keys = array_filter( array_keys( $arr ), 'ApiResult::isMetadataKey' );
1852 if ( $keys ) {
1853 ApiResult::setPreserveKeysList( $arr, $keys );
1854 }
1855 }
1856
1857 /**
1858 * Turns a string into a valid API identifier.
1859 * @param string $key
1860 * @return string
1861 * @since 1.23
1862 */
1863 protected function sanitizeKeyForAPI( $key ) {
1864 // drop all characters which are not valid in an XML tag name
1865 // a bunch of non-ASCII letters would be valid but probably won't
1866 // be used so we take the easy way
1867 $key = preg_replace( '/[^a-zA-z0-9_:.\-]/', '', $key );
1868 // drop characters which are invalid at the first position
1869 $key = preg_replace( '/^[\d\-.]+/', '', $key );
1870
1871 if ( $key == '' ) {
1872 $key = '_';
1873 }
1874
1875 // special case for an internal keyword
1876 if ( $key == '_element' ) {
1877 $key = 'element';
1878 }
1879
1880 return $key;
1881 }
1882
1883 /**
1884 * Returns a list of languages (first is best) to use when formatting multilang fields,
1885 * based on user and site preferences.
1886 * @return array
1887 * @since 1.23
1888 */
1889 protected function getPriorityLanguages() {
1890 $priorityLanguages =
1891 Language::getFallbacksIncludingSiteLanguage( $this->getLanguage()->getCode() );
1892 $priorityLanguages = array_merge(
1893 (array)$this->getLanguage()->getCode(),
1894 $priorityLanguages[0],
1895 $priorityLanguages[1]
1896 );
1897
1898 return $priorityLanguages;
1899 }
1900 }