Merge "Add CollationFa"
[lhc/web/wiklou.git] / includes / media / FormatMetadata.php
1 <?php
2 /**
3 * Formatting of image metadata values into human readable form.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @ingroup Media
21 * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
22 * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason, 2009 Brent Garber, 2010 Brian Wolff
23 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
24 * @see http://exif.org/Exif2-2.PDF The Exif 2.2 specification
25 * @file
26 */
27
28 /**
29 * Format Image metadata values into a human readable form.
30 *
31 * Note lots of these messages use the prefix 'exif' even though
32 * they may not be exif properties. For example 'exif-ImageDescription'
33 * can be the Exif ImageDescription, or it could be the iptc-iim caption
34 * property, or it could be the xmp dc:description property. This
35 * is because these messages should be independent of how the data is
36 * stored, sine the user doesn't care if the description is stored in xmp,
37 * exif, etc only that its a description. (Additionally many of these properties
38 * are merged together following the MWG standard, such that for example,
39 * exif properties override XMP properties that mean the same thing if
40 * there is a conflict).
41 *
42 * It should perhaps use a prefix like 'metadata' instead, but there
43 * is already a large number of messages using the 'exif' prefix.
44 *
45 * @ingroup Media
46 * @since 1.23 the class extends ContextSource and various formerly-public
47 * internal methods are private
48 */
49 class FormatMetadata extends ContextSource {
50 /**
51 * Only output a single language for multi-language fields
52 * @var bool
53 * @since 1.23
54 */
55 protected $singleLang = false;
56
57 /**
58 * Trigger only outputting single language for multilanguage fields
59 *
60 * @param bool $val
61 * @since 1.23
62 */
63 public function setSingleLanguage( $val ) {
64 $this->singleLang = $val;
65 }
66
67 /**
68 * Numbers given by Exif user agents are often magical, that is they
69 * should be replaced by a detailed explanation depending on their
70 * value which most of the time are plain integers. This function
71 * formats Exif (and other metadata) values into human readable form.
72 *
73 * This is the usual entry point for this class.
74 *
75 * @param array $tags The Exif data to format ( as returned by
76 * Exif::getFilteredData() or BitmapMetadataHandler )
77 * @param bool|IContextSource $context Context to use (optional)
78 * @return array
79 */
80 public static function getFormattedData( $tags, $context = false ) {
81 $obj = new FormatMetadata;
82 if ( $context ) {
83 $obj->setContext( $context );
84 }
85
86 return $obj->makeFormattedData( $tags );
87 }
88
89 /**
90 * Numbers given by Exif user agents are often magical, that is they
91 * should be replaced by a detailed explanation depending on their
92 * value which most of the time are plain integers. This function
93 * formats Exif (and other metadata) values into human readable form.
94 *
95 * @param array $tags The Exif data to format ( as returned by
96 * Exif::getFilteredData() or BitmapMetadataHandler )
97 * @return array
98 * @since 1.23
99 */
100 public function makeFormattedData( $tags ) {
101 $resolutionunit = !isset( $tags['ResolutionUnit'] ) || $tags['ResolutionUnit'] == 2 ? 2 : 3;
102 unset( $tags['ResolutionUnit'] );
103
104 foreach ( $tags as $tag => &$vals ) {
105 // This seems ugly to wrap non-array's in an array just to unwrap again,
106 // especially when most of the time it is not an array
107 if ( !is_array( $tags[$tag] ) ) {
108 $vals = [ $vals ];
109 }
110
111 // _type is a special value to say what array type
112 if ( isset( $tags[$tag]['_type'] ) ) {
113 $type = $tags[$tag]['_type'];
114 unset( $vals['_type'] );
115 } else {
116 $type = 'ul'; // default unordered list.
117 }
118
119 // This is done differently as the tag is an array.
120 if ( $tag == 'GPSTimeStamp' && count( $vals ) === 3 ) {
121 // hour min sec array
122
123 $h = explode( '/', $vals[0] );
124 $m = explode( '/', $vals[1] );
125 $s = explode( '/', $vals[2] );
126
127 // this should already be validated
128 // when loaded from file, but it could
129 // come from a foreign repo, so be
130 // paranoid.
131 if ( !isset( $h[1] )
132 || !isset( $m[1] )
133 || !isset( $s[1] )
134 || $h[1] == 0
135 || $m[1] == 0
136 || $s[1] == 0
137 ) {
138 continue;
139 }
140 $tags[$tag] = str_pad( intval( $h[0] / $h[1] ), 2, '0', STR_PAD_LEFT )
141 . ':' . str_pad( intval( $m[0] / $m[1] ), 2, '0', STR_PAD_LEFT )
142 . ':' . str_pad( intval( $s[0] / $s[1] ), 2, '0', STR_PAD_LEFT );
143
144 try {
145 $time = wfTimestamp( TS_MW, '1971:01:01 ' . $tags[$tag] );
146 // the 1971:01:01 is just a placeholder, and not shown to user.
147 if ( $time && intval( $time ) > 0 ) {
148 $tags[$tag] = $this->getLanguage()->time( $time );
149 }
150 } catch ( TimestampException $e ) {
151 // This shouldn't happen, but we've seen bad formats
152 // such as 4-digit seconds in the wild.
153 // leave $tags[$tag] as-is
154 }
155 continue;
156 }
157
158 // The contact info is a multi-valued field
159 // instead of the other props which are single
160 // valued (mostly) so handle as a special case.
161 if ( $tag === 'Contact' ) {
162 $vals = $this->collapseContactInfo( $vals );
163 continue;
164 }
165
166 foreach ( $vals as &$val ) {
167 switch ( $tag ) {
168 case 'Compression':
169 switch ( $val ) {
170 case 1:
171 case 2:
172 case 3:
173 case 4:
174 case 5:
175 case 6:
176 case 7:
177 case 8:
178 case 32773:
179 case 32946:
180 case 34712:
181 $val = $this->exifMsg( $tag, $val );
182 break;
183 default:
184 /* If not recognized, display as is. */
185 break;
186 }
187 break;
188
189 case 'PhotometricInterpretation':
190 switch ( $val ) {
191 case 0:
192 case 1:
193 case 2:
194 case 3:
195 case 4:
196 case 5:
197 case 6:
198 case 8:
199 case 9:
200 case 10:
201 case 32803:
202 case 34892:
203 $val = $this->exifMsg( $tag, $val );
204 break;
205 default:
206 /* If not recognized, display as is. */
207 break;
208 }
209 break;
210
211 case 'Orientation':
212 switch ( $val ) {
213 case 1:
214 case 2:
215 case 3:
216 case 4:
217 case 5:
218 case 6:
219 case 7:
220 case 8:
221 $val = $this->exifMsg( $tag, $val );
222 break;
223 default:
224 /* If not recognized, display as is. */
225 break;
226 }
227 break;
228
229 case 'PlanarConfiguration':
230 switch ( $val ) {
231 case 1:
232 case 2:
233 $val = $this->exifMsg( $tag, $val );
234 break;
235 default:
236 /* If not recognized, display as is. */
237 break;
238 }
239 break;
240
241 // TODO: YCbCrSubSampling
242 case 'YCbCrPositioning':
243 switch ( $val ) {
244 case 1:
245 case 2:
246 $val = $this->exifMsg( $tag, $val );
247 break;
248 default:
249 /* If not recognized, display as is. */
250 break;
251 }
252 break;
253
254 case 'XResolution':
255 case 'YResolution':
256 switch ( $resolutionunit ) {
257 case 2:
258 $val = $this->exifMsg( 'XYResolution', 'i', $this->formatNum( $val ) );
259 break;
260 case 3:
261 $val = $this->exifMsg( 'XYResolution', 'c', $this->formatNum( $val ) );
262 break;
263 default:
264 /* If not recognized, display as is. */
265 break;
266 }
267 break;
268
269 // TODO: YCbCrCoefficients #p27 (see annex E)
270 case 'ExifVersion':
271 case 'FlashpixVersion':
272 $val = "$val" / 100;
273 break;
274
275 case 'ColorSpace':
276 switch ( $val ) {
277 case 1:
278 case 65535:
279 $val = $this->exifMsg( $tag, $val );
280 break;
281 default:
282 /* If not recognized, display as is. */
283 break;
284 }
285 break;
286
287 case 'ComponentsConfiguration':
288 switch ( $val ) {
289 case 0:
290 case 1:
291 case 2:
292 case 3:
293 case 4:
294 case 5:
295 case 6:
296 $val = $this->exifMsg( $tag, $val );
297 break;
298 default:
299 /* If not recognized, display as is. */
300 break;
301 }
302 break;
303
304 case 'DateTime':
305 case 'DateTimeOriginal':
306 case 'DateTimeDigitized':
307 case 'DateTimeReleased':
308 case 'DateTimeExpires':
309 case 'GPSDateStamp':
310 case 'dc-date':
311 case 'DateTimeMetadata':
312 if ( $val == '0000:00:00 00:00:00' || $val == ' : : : : ' ) {
313 $val = $this->msg( 'exif-unknowndate' )->text();
314 } elseif ( preg_match(
315 '/^(?:\d{4}):(?:\d\d):(?:\d\d) (?:\d\d):(?:\d\d):(?:\d\d)$/D',
316 $val
317 ) ) {
318 // Full date.
319 $time = wfTimestamp( TS_MW, $val );
320 if ( $time && intval( $time ) > 0 ) {
321 $val = $this->getLanguage()->timeanddate( $time );
322 }
323 } elseif ( preg_match( '/^(?:\d{4}):(?:\d\d):(?:\d\d) (?:\d\d):(?:\d\d)$/D', $val ) ) {
324 // No second field. Still format the same
325 // since timeanddate doesn't include seconds anyways,
326 // but second still available in api
327 $time = wfTimestamp( TS_MW, $val . ':00' );
328 if ( $time && intval( $time ) > 0 ) {
329 $val = $this->getLanguage()->timeanddate( $time );
330 }
331 } elseif ( preg_match( '/^(?:\d{4}):(?:\d\d):(?:\d\d)$/D', $val ) ) {
332 // If only the date but not the time is filled in.
333 $time = wfTimestamp( TS_MW, substr( $val, 0, 4 )
334 . substr( $val, 5, 2 )
335 . substr( $val, 8, 2 )
336 . '000000' );
337 if ( $time && intval( $time ) > 0 ) {
338 $val = $this->getLanguage()->date( $time );
339 }
340 }
341 // else it will just output $val without formatting it.
342 break;
343
344 case 'ExposureProgram':
345 switch ( $val ) {
346 case 0:
347 case 1:
348 case 2:
349 case 3:
350 case 4:
351 case 5:
352 case 6:
353 case 7:
354 case 8:
355 $val = $this->exifMsg( $tag, $val );
356 break;
357 default:
358 /* If not recognized, display as is. */
359 break;
360 }
361 break;
362
363 case 'SubjectDistance':
364 $val = $this->exifMsg( $tag, '', $this->formatNum( $val ) );
365 break;
366
367 case 'MeteringMode':
368 switch ( $val ) {
369 case 0:
370 case 1:
371 case 2:
372 case 3:
373 case 4:
374 case 5:
375 case 6:
376 case 7:
377 case 255:
378 $val = $this->exifMsg( $tag, $val );
379 break;
380 default:
381 /* If not recognized, display as is. */
382 break;
383 }
384 break;
385
386 case 'LightSource':
387 switch ( $val ) {
388 case 0:
389 case 1:
390 case 2:
391 case 3:
392 case 4:
393 case 9:
394 case 10:
395 case 11:
396 case 12:
397 case 13:
398 case 14:
399 case 15:
400 case 17:
401 case 18:
402 case 19:
403 case 20:
404 case 21:
405 case 22:
406 case 23:
407 case 24:
408 case 255:
409 $val = $this->exifMsg( $tag, $val );
410 break;
411 default:
412 /* If not recognized, display as is. */
413 break;
414 }
415 break;
416
417 case 'Flash':
418 $flashDecode = [
419 'fired' => $val & 0b00000001,
420 'return' => ( $val & 0b00000110 ) >> 1,
421 'mode' => ( $val & 0b00011000 ) >> 3,
422 'function' => ( $val & 0b00100000 ) >> 5,
423 'redeye' => ( $val & 0b01000000 ) >> 6,
424 // 'reserved' => ( $val & 0b10000000 ) >> 7,
425 ];
426 $flashMsgs = [];
427 # We do not need to handle unknown values since all are used.
428 foreach ( $flashDecode as $subTag => $subValue ) {
429 # We do not need any message for zeroed values.
430 if ( $subTag != 'fired' && $subValue == 0 ) {
431 continue;
432 }
433 $fullTag = $tag . '-' . $subTag;
434 $flashMsgs[] = $this->exifMsg( $fullTag, $subValue );
435 }
436 $val = $this->getLanguage()->commaList( $flashMsgs );
437 break;
438
439 case 'FocalPlaneResolutionUnit':
440 switch ( $val ) {
441 case 2:
442 $val = $this->exifMsg( $tag, $val );
443 break;
444 default:
445 /* If not recognized, display as is. */
446 break;
447 }
448 break;
449
450 case 'SensingMethod':
451 switch ( $val ) {
452 case 1:
453 case 2:
454 case 3:
455 case 4:
456 case 5:
457 case 7:
458 case 8:
459 $val = $this->exifMsg( $tag, $val );
460 break;
461 default:
462 /* If not recognized, display as is. */
463 break;
464 }
465 break;
466
467 case 'FileSource':
468 switch ( $val ) {
469 case 3:
470 $val = $this->exifMsg( $tag, $val );
471 break;
472 default:
473 /* If not recognized, display as is. */
474 break;
475 }
476 break;
477
478 case 'SceneType':
479 switch ( $val ) {
480 case 1:
481 $val = $this->exifMsg( $tag, $val );
482 break;
483 default:
484 /* If not recognized, display as is. */
485 break;
486 }
487 break;
488
489 case 'CustomRendered':
490 switch ( $val ) {
491 case 0:
492 case 1:
493 $val = $this->exifMsg( $tag, $val );
494 break;
495 default:
496 /* If not recognized, display as is. */
497 break;
498 }
499 break;
500
501 case 'ExposureMode':
502 switch ( $val ) {
503 case 0:
504 case 1:
505 case 2:
506 $val = $this->exifMsg( $tag, $val );
507 break;
508 default:
509 /* If not recognized, display as is. */
510 break;
511 }
512 break;
513
514 case 'WhiteBalance':
515 switch ( $val ) {
516 case 0:
517 case 1:
518 $val = $this->exifMsg( $tag, $val );
519 break;
520 default:
521 /* If not recognized, display as is. */
522 break;
523 }
524 break;
525
526 case 'SceneCaptureType':
527 switch ( $val ) {
528 case 0:
529 case 1:
530 case 2:
531 case 3:
532 $val = $this->exifMsg( $tag, $val );
533 break;
534 default:
535 /* If not recognized, display as is. */
536 break;
537 }
538 break;
539
540 case 'GainControl':
541 switch ( $val ) {
542 case 0:
543 case 1:
544 case 2:
545 case 3:
546 case 4:
547 $val = $this->exifMsg( $tag, $val );
548 break;
549 default:
550 /* If not recognized, display as is. */
551 break;
552 }
553 break;
554
555 case 'Contrast':
556 switch ( $val ) {
557 case 0:
558 case 1:
559 case 2:
560 $val = $this->exifMsg( $tag, $val );
561 break;
562 default:
563 /* If not recognized, display as is. */
564 break;
565 }
566 break;
567
568 case 'Saturation':
569 switch ( $val ) {
570 case 0:
571 case 1:
572 case 2:
573 $val = $this->exifMsg( $tag, $val );
574 break;
575 default:
576 /* If not recognized, display as is. */
577 break;
578 }
579 break;
580
581 case 'Sharpness':
582 switch ( $val ) {
583 case 0:
584 case 1:
585 case 2:
586 $val = $this->exifMsg( $tag, $val );
587 break;
588 default:
589 /* If not recognized, display as is. */
590 break;
591 }
592 break;
593
594 case 'SubjectDistanceRange':
595 switch ( $val ) {
596 case 0:
597 case 1:
598 case 2:
599 case 3:
600 $val = $this->exifMsg( $tag, $val );
601 break;
602 default:
603 /* If not recognized, display as is. */
604 break;
605 }
606 break;
607
608 // The GPS...Ref values are kept for compatibility, probably won't be reached.
609 case 'GPSLatitudeRef':
610 case 'GPSDestLatitudeRef':
611 switch ( $val ) {
612 case 'N':
613 case 'S':
614 $val = $this->exifMsg( 'GPSLatitude', $val );
615 break;
616 default:
617 /* If not recognized, display as is. */
618 break;
619 }
620 break;
621
622 case 'GPSLongitudeRef':
623 case 'GPSDestLongitudeRef':
624 switch ( $val ) {
625 case 'E':
626 case 'W':
627 $val = $this->exifMsg( 'GPSLongitude', $val );
628 break;
629 default:
630 /* If not recognized, display as is. */
631 break;
632 }
633 break;
634
635 case 'GPSAltitude':
636 if ( $val < 0 ) {
637 $val = $this->exifMsg( 'GPSAltitude', 'below-sealevel', $this->formatNum( -$val, 3 ) );
638 } else {
639 $val = $this->exifMsg( 'GPSAltitude', 'above-sealevel', $this->formatNum( $val, 3 ) );
640 }
641 break;
642
643 case 'GPSStatus':
644 switch ( $val ) {
645 case 'A':
646 case 'V':
647 $val = $this->exifMsg( $tag, $val );
648 break;
649 default:
650 /* If not recognized, display as is. */
651 break;
652 }
653 break;
654
655 case 'GPSMeasureMode':
656 switch ( $val ) {
657 case 2:
658 case 3:
659 $val = $this->exifMsg( $tag, $val );
660 break;
661 default:
662 /* If not recognized, display as is. */
663 break;
664 }
665 break;
666
667 case 'GPSTrackRef':
668 case 'GPSImgDirectionRef':
669 case 'GPSDestBearingRef':
670 switch ( $val ) {
671 case 'T':
672 case 'M':
673 $val = $this->exifMsg( 'GPSDirection', $val );
674 break;
675 default:
676 /* If not recognized, display as is. */
677 break;
678 }
679 break;
680
681 case 'GPSLatitude':
682 case 'GPSDestLatitude':
683 $val = $this->formatCoords( $val, 'latitude' );
684 break;
685 case 'GPSLongitude':
686 case 'GPSDestLongitude':
687 $val = $this->formatCoords( $val, 'longitude' );
688 break;
689
690 case 'GPSSpeedRef':
691 switch ( $val ) {
692 case 'K':
693 case 'M':
694 case 'N':
695 $val = $this->exifMsg( 'GPSSpeed', $val );
696 break;
697 default:
698 /* If not recognized, display as is. */
699 break;
700 }
701 break;
702
703 case 'GPSDestDistanceRef':
704 switch ( $val ) {
705 case 'K':
706 case 'M':
707 case 'N':
708 $val = $this->exifMsg( 'GPSDestDistance', $val );
709 break;
710 default:
711 /* If not recognized, display as is. */
712 break;
713 }
714 break;
715
716 case 'GPSDOP':
717 // See https://en.wikipedia.org/wiki/Dilution_of_precision_(GPS)
718 if ( $val <= 2 ) {
719 $val = $this->exifMsg( $tag, 'excellent', $this->formatNum( $val ) );
720 } elseif ( $val <= 5 ) {
721 $val = $this->exifMsg( $tag, 'good', $this->formatNum( $val ) );
722 } elseif ( $val <= 10 ) {
723 $val = $this->exifMsg( $tag, 'moderate', $this->formatNum( $val ) );
724 } elseif ( $val <= 20 ) {
725 $val = $this->exifMsg( $tag, 'fair', $this->formatNum( $val ) );
726 } else {
727 $val = $this->exifMsg( $tag, 'poor', $this->formatNum( $val ) );
728 }
729 break;
730
731 // This is not in the Exif standard, just a special
732 // case for our purposes which enables wikis to wikify
733 // the make, model and software name to link to their articles.
734 case 'Make':
735 case 'Model':
736 $val = $this->exifMsg( $tag, '', $val );
737 break;
738
739 case 'Software':
740 if ( is_array( $val ) ) {
741 // if its a software, version array.
742 $val = $this->msg( 'exif-software-version-value', $val[0], $val[1] )->text();
743 } else {
744 $val = $this->exifMsg( $tag, '', $val );
745 }
746 break;
747
748 case 'ExposureTime':
749 // Show the pretty fraction as well as decimal version
750 $val = $this->msg( 'exif-exposuretime-format',
751 $this->formatFraction( $val ), $this->formatNum( $val ) )->text();
752 break;
753 case 'ISOSpeedRatings':
754 // If its = 65535 that means its at the
755 // limit of the size of Exif::short and
756 // is really higher.
757 if ( $val == '65535' ) {
758 $val = $this->exifMsg( $tag, 'overflow' );
759 } else {
760 $val = $this->formatNum( $val );
761 }
762 break;
763 case 'FNumber':
764 $val = $this->msg( 'exif-fnumber-format',
765 $this->formatNum( $val ) )->text();
766 break;
767
768 case 'FocalLength':
769 case 'FocalLengthIn35mmFilm':
770 $val = $this->msg( 'exif-focallength-format',
771 $this->formatNum( $val ) )->text();
772 break;
773
774 case 'MaxApertureValue':
775 if ( strpos( $val, '/' ) !== false ) {
776 // need to expand this earlier to calculate fNumber
777 list( $n, $d ) = explode( '/', $val );
778 if ( is_numeric( $n ) && is_numeric( $d ) ) {
779 $val = $n / $d;
780 }
781 }
782 if ( is_numeric( $val ) ) {
783 $fNumber = pow( 2, $val / 2 );
784 if ( $fNumber !== false ) {
785 $val = $this->msg( 'exif-maxaperturevalue-value',
786 $this->formatNum( $val ),
787 $this->formatNum( $fNumber, 2 )
788 )->text();
789 }
790 }
791 break;
792
793 case 'iimCategory':
794 switch ( strtolower( $val ) ) {
795 // See pg 29 of IPTC photo
796 // metadata standard.
797 case 'ace':
798 case 'clj':
799 case 'dis':
800 case 'fin':
801 case 'edu':
802 case 'evn':
803 case 'hth':
804 case 'hum':
805 case 'lab':
806 case 'lif':
807 case 'pol':
808 case 'rel':
809 case 'sci':
810 case 'soi':
811 case 'spo':
812 case 'war':
813 case 'wea':
814 $val = $this->exifMsg(
815 'iimcategory',
816 $val
817 );
818 }
819 break;
820 case 'SubjectNewsCode':
821 // Essentially like iimCategory.
822 // 8 (numeric) digit hierarchical
823 // classification. We decode the
824 // first 2 digits, which provide
825 // a broad category.
826 $val = $this->convertNewsCode( $val );
827 break;
828 case 'Urgency':
829 // 1-8 with 1 being highest, 5 normal
830 // 0 is reserved, and 9 is 'user-defined'.
831 $urgency = '';
832 if ( $val == 0 || $val == 9 ) {
833 $urgency = 'other';
834 } elseif ( $val < 5 && $val > 1 ) {
835 $urgency = 'high';
836 } elseif ( $val == 5 ) {
837 $urgency = 'normal';
838 } elseif ( $val <= 8 && $val > 5 ) {
839 $urgency = 'low';
840 }
841
842 if ( $urgency !== '' ) {
843 $val = $this->exifMsg( 'urgency',
844 $urgency, $val
845 );
846 }
847 break;
848
849 // Things that have a unit of pixels.
850 case 'OriginalImageHeight':
851 case 'OriginalImageWidth':
852 case 'PixelXDimension':
853 case 'PixelYDimension':
854 case 'ImageWidth':
855 case 'ImageLength':
856 $val = $this->formatNum( $val ) . ' ' . $this->msg( 'unit-pixel' )->text();
857 break;
858
859 // Do not transform fields with pure text.
860 // For some languages the formatNum()
861 // conversion results to wrong output like
862 // foo,bar@example,com or foo٫bar@example٫com.
863 // Also some 'numeric' things like Scene codes
864 // are included here as we really don't want
865 // commas inserted.
866 case 'ImageDescription':
867 case 'UserComment':
868 case 'Artist':
869 case 'Copyright':
870 case 'RelatedSoundFile':
871 case 'ImageUniqueID':
872 case 'SpectralSensitivity':
873 case 'GPSSatellites':
874 case 'GPSVersionID':
875 case 'GPSMapDatum':
876 case 'Keywords':
877 case 'WorldRegionDest':
878 case 'CountryDest':
879 case 'CountryCodeDest':
880 case 'ProvinceOrStateDest':
881 case 'CityDest':
882 case 'SublocationDest':
883 case 'WorldRegionCreated':
884 case 'CountryCreated':
885 case 'CountryCodeCreated':
886 case 'ProvinceOrStateCreated':
887 case 'CityCreated':
888 case 'SublocationCreated':
889 case 'ObjectName':
890 case 'SpecialInstructions':
891 case 'Headline':
892 case 'Credit':
893 case 'Source':
894 case 'EditStatus':
895 case 'FixtureIdentifier':
896 case 'LocationDest':
897 case 'LocationDestCode':
898 case 'Writer':
899 case 'JPEGFileComment':
900 case 'iimSupplementalCategory':
901 case 'OriginalTransmissionRef':
902 case 'Identifier':
903 case 'dc-contributor':
904 case 'dc-coverage':
905 case 'dc-publisher':
906 case 'dc-relation':
907 case 'dc-rights':
908 case 'dc-source':
909 case 'dc-type':
910 case 'Lens':
911 case 'SerialNumber':
912 case 'CameraOwnerName':
913 case 'Label':
914 case 'Nickname':
915 case 'RightsCertificate':
916 case 'CopyrightOwner':
917 case 'UsageTerms':
918 case 'WebStatement':
919 case 'OriginalDocumentID':
920 case 'LicenseUrl':
921 case 'MorePermissionsUrl':
922 case 'AttributionUrl':
923 case 'PreferredAttributionName':
924 case 'PNGFileComment':
925 case 'Disclaimer':
926 case 'ContentWarning':
927 case 'GIFFileComment':
928 case 'SceneCode':
929 case 'IntellectualGenre':
930 case 'Event':
931 case 'OrginisationInImage':
932 case 'PersonInImage':
933
934 $val = htmlspecialchars( $val );
935 break;
936
937 case 'ObjectCycle':
938 switch ( $val ) {
939 case 'a':
940 case 'p':
941 case 'b':
942 $val = $this->exifMsg( $tag, $val );
943 break;
944 default:
945 $val = htmlspecialchars( $val );
946 break;
947 }
948 break;
949 case 'Copyrighted':
950 switch ( $val ) {
951 case 'True':
952 case 'False':
953 $val = $this->exifMsg( $tag, $val );
954 break;
955 }
956 break;
957 case 'Rating':
958 if ( $val == '-1' ) {
959 $val = $this->exifMsg( $tag, 'rejected' );
960 } else {
961 $val = $this->formatNum( $val );
962 }
963 break;
964
965 case 'LanguageCode':
966 $lang = Language::fetchLanguageName( strtolower( $val ), $this->getLanguage()->getCode() );
967 if ( $lang ) {
968 $val = htmlspecialchars( $lang );
969 } else {
970 $val = htmlspecialchars( $val );
971 }
972 break;
973
974 default:
975 $val = $this->formatNum( $val );
976 break;
977 }
978 }
979 // End formatting values, start flattening arrays.
980 $vals = $this->flattenArrayReal( $vals, $type );
981 }
982
983 return $tags;
984 }
985
986 /**
987 * Flatten an array, using the content language for any messages.
988 *
989 * @param array $vals Array of values
990 * @param string $type Type of array (either lang, ul, ol).
991 * lang = language assoc array with keys being the lang code
992 * ul = unordered list, ol = ordered list
993 * type can also come from the '_type' member of $vals.
994 * @param bool $noHtml If to avoid returning anything resembling HTML.
995 * (Ugly hack for backwards compatibility with old MediaWiki).
996 * @param bool|IContextSource $context
997 * @return string Single value (in wiki-syntax).
998 * @since 1.23
999 */
1000 public static function flattenArrayContentLang( $vals, $type = 'ul',
1001 $noHtml = false, $context = false
1002 ) {
1003 global $wgContLang;
1004 $obj = new FormatMetadata;
1005 if ( $context ) {
1006 $obj->setContext( $context );
1007 }
1008 $context = new DerivativeContext( $obj->getContext() );
1009 $context->setLanguage( $wgContLang );
1010 $obj->setContext( $context );
1011
1012 return $obj->flattenArrayReal( $vals, $type, $noHtml );
1013 }
1014
1015 /**
1016 * A function to collapse multivalued tags into a single value.
1017 * This turns an array of (for example) authors into a bulleted list.
1018 *
1019 * This is public on the basis it might be useful outside of this class.
1020 *
1021 * @param array $vals Array of values
1022 * @param string $type Type of array (either lang, ul, ol).
1023 * lang = language assoc array with keys being the lang code
1024 * ul = unordered list, ol = ordered list
1025 * type can also come from the '_type' member of $vals.
1026 * @param bool $noHtml If to avoid returning anything resembling HTML.
1027 * (Ugly hack for backwards compatibility with old mediawiki).
1028 * @return string Single value (in wiki-syntax).
1029 * @since 1.23
1030 */
1031 public function flattenArrayReal( $vals, $type = 'ul', $noHtml = false ) {
1032 if ( !is_array( $vals ) ) {
1033 return $vals; // do nothing if not an array;
1034 }
1035
1036 if ( isset( $vals['_type'] ) ) {
1037 $type = $vals['_type'];
1038 unset( $vals['_type'] );
1039 }
1040
1041 if ( !is_array( $vals ) ) {
1042 return $vals; // do nothing if not an array;
1043 } elseif ( count( $vals ) === 1 && $type !== 'lang' ) {
1044 return $vals[0];
1045 } elseif ( count( $vals ) === 0 ) {
1046 wfDebug( __METHOD__ . " metadata array with 0 elements!\n" );
1047
1048 return ""; // paranoia. This should never happen
1049 } else {
1050 /* @todo FIXME: This should hide some of the list entries if there are
1051 * say more than four. Especially if a field is translated into 20
1052 * languages, we don't want to show them all by default
1053 */
1054 switch ( $type ) {
1055 case 'lang':
1056 // Display default, followed by ContLang,
1057 // followed by the rest in no particular
1058 // order.
1059
1060 // Todo: hide some items if really long list.
1061
1062 $content = '';
1063
1064 $priorityLanguages = $this->getPriorityLanguages();
1065 $defaultItem = false;
1066 $defaultLang = false;
1067
1068 // If default is set, save it for later,
1069 // as we don't know if it's equal to
1070 // one of the lang codes. (In xmp
1071 // you specify the language for a
1072 // default property by having both
1073 // a default prop, and one in the language
1074 // that are identical)
1075 if ( isset( $vals['x-default'] ) ) {
1076 $defaultItem = $vals['x-default'];
1077 unset( $vals['x-default'] );
1078 }
1079 foreach ( $priorityLanguages as $pLang ) {
1080 if ( isset( $vals[$pLang] ) ) {
1081 $isDefault = false;
1082 if ( $vals[$pLang] === $defaultItem ) {
1083 $defaultItem = false;
1084 $isDefault = true;
1085 }
1086 $content .= $this->langItem(
1087 $vals[$pLang], $pLang,
1088 $isDefault, $noHtml );
1089
1090 unset( $vals[$pLang] );
1091
1092 if ( $this->singleLang ) {
1093 return Html::rawElement( 'span',
1094 [ 'lang' => $pLang ], $vals[$pLang] );
1095 }
1096 }
1097 }
1098
1099 // Now do the rest.
1100 foreach ( $vals as $lang => $item ) {
1101 if ( $item === $defaultItem ) {
1102 $defaultLang = $lang;
1103 continue;
1104 }
1105 $content .= $this->langItem( $item,
1106 $lang, false, $noHtml );
1107 if ( $this->singleLang ) {
1108 return Html::rawElement( 'span',
1109 [ 'lang' => $lang ], $item );
1110 }
1111 }
1112 if ( $defaultItem !== false ) {
1113 $content = $this->langItem( $defaultItem,
1114 $defaultLang, true, $noHtml ) .
1115 $content;
1116 if ( $this->singleLang ) {
1117 return $defaultItem;
1118 }
1119 }
1120 if ( $noHtml ) {
1121 return $content;
1122 }
1123
1124 return '<ul class="metadata-langlist">' .
1125 $content .
1126 '</ul>';
1127 case 'ol':
1128 if ( $noHtml ) {
1129 return "\n#" . implode( "\n#", $vals );
1130 }
1131
1132 return "<ol><li>" . implode( "</li>\n<li>", $vals ) . '</li></ol>';
1133 case 'ul':
1134 default:
1135 if ( $noHtml ) {
1136 return "\n*" . implode( "\n*", $vals );
1137 }
1138
1139 return "<ul><li>" . implode( "</li>\n<li>", $vals ) . '</li></ul>';
1140 }
1141 }
1142 }
1143
1144 /** Helper function for creating lists of translations.
1145 *
1146 * @param string $value Value (this is not escaped)
1147 * @param string $lang Lang code of item or false
1148 * @param bool $default If it is default value.
1149 * @param bool $noHtml If to avoid html (for back-compat)
1150 * @throws MWException
1151 * @return string Language item (Note: despite how this looks, this is
1152 * treated as wikitext, not as HTML).
1153 */
1154 private function langItem( $value, $lang, $default = false, $noHtml = false ) {
1155 if ( $lang === false && $default === false ) {
1156 throw new MWException( '$lang and $default cannot both '
1157 . 'be false.' );
1158 }
1159
1160 if ( $noHtml ) {
1161 $wrappedValue = $value;
1162 } else {
1163 $wrappedValue = '<span class="mw-metadata-lang-value">'
1164 . $value . '</span>';
1165 }
1166
1167 if ( $lang === false ) {
1168 $msg = $this->msg( 'metadata-langitem-default', $wrappedValue );
1169 if ( $noHtml ) {
1170 return $msg->text() . "\n\n";
1171 } /* else */
1172
1173 return '<li class="mw-metadata-lang-default">'
1174 . $msg->text()
1175 . "</li>\n";
1176 }
1177
1178 $lowLang = strtolower( $lang );
1179 $langName = Language::fetchLanguageName( $lowLang );
1180 if ( $langName === '' ) {
1181 // try just the base language name. (aka en-US -> en ).
1182 list( $langPrefix ) = explode( '-', $lowLang, 2 );
1183 $langName = Language::fetchLanguageName( $langPrefix );
1184 if ( $langName === '' ) {
1185 // give up.
1186 $langName = $lang;
1187 }
1188 }
1189 // else we have a language specified
1190
1191 $msg = $this->msg( 'metadata-langitem', $wrappedValue, $langName, $lang );
1192 if ( $noHtml ) {
1193 return '*' . $msg->text();
1194 } /* else: */
1195
1196 $item = '<li class="mw-metadata-lang-code-'
1197 . $lang;
1198 if ( $default ) {
1199 $item .= ' mw-metadata-lang-default';
1200 }
1201 $item .= '" lang="' . $lang . '">';
1202 $item .= $msg->text();
1203 $item .= "</li>\n";
1204
1205 return $item;
1206 }
1207
1208 /**
1209 * Convenience function for getFormattedData()
1210 *
1211 * @param string $tag The tag name to pass on
1212 * @param string $val The value of the tag
1213 * @param string $arg An argument to pass ($1)
1214 * @param string $arg2 A 2nd argument to pass ($2)
1215 * @return string The text content of "exif-$tag-$val" message in lower case
1216 */
1217 private function exifMsg( $tag, $val, $arg = null, $arg2 = null ) {
1218 global $wgContLang;
1219
1220 if ( $val === '' ) {
1221 $val = 'value';
1222 }
1223
1224 return $this->msg( $wgContLang->lc( "exif-$tag-$val" ), $arg, $arg2 )->text();
1225 }
1226
1227 /**
1228 * Format a number, convert numbers from fractions into floating point
1229 * numbers, joins arrays of numbers with commas.
1230 *
1231 * @param mixed $num The value to format
1232 * @param float|int|bool $round Digits to round to or false.
1233 * @return mixed A floating point number or whatever we were fed
1234 */
1235 private function formatNum( $num, $round = false ) {
1236 $m = [];
1237 if ( is_array( $num ) ) {
1238 $out = [];
1239 foreach ( $num as $number ) {
1240 $out[] = $this->formatNum( $number );
1241 }
1242
1243 return $this->getLanguage()->commaList( $out );
1244 }
1245 if ( preg_match( '/^(-?\d+)\/(\d+)$/', $num, $m ) ) {
1246 if ( $m[2] != 0 ) {
1247 $newNum = $m[1] / $m[2];
1248 if ( $round !== false ) {
1249 $newNum = round( $newNum, $round );
1250 }
1251 } else {
1252 $newNum = $num;
1253 }
1254
1255 return $this->getLanguage()->formatNum( $newNum );
1256 } else {
1257 if ( is_numeric( $num ) && $round !== false ) {
1258 $num = round( $num, $round );
1259 }
1260
1261 return $this->getLanguage()->formatNum( $num );
1262 }
1263 }
1264
1265 /**
1266 * Format a rational number, reducing fractions
1267 *
1268 * @param mixed $num The value to format
1269 * @return mixed A floating point number or whatever we were fed
1270 */
1271 private function formatFraction( $num ) {
1272 $m = [];
1273 if ( preg_match( '/^(-?\d+)\/(\d+)$/', $num, $m ) ) {
1274 $numerator = intval( $m[1] );
1275 $denominator = intval( $m[2] );
1276 $gcd = $this->gcd( abs( $numerator ), $denominator );
1277 if ( $gcd != 0 ) {
1278 // 0 shouldn't happen! ;)
1279 return $this->formatNum( $numerator / $gcd ) . '/' . $this->formatNum( $denominator / $gcd );
1280 }
1281 }
1282
1283 return $this->formatNum( $num );
1284 }
1285
1286 /**
1287 * Calculate the greatest common divisor of two integers.
1288 *
1289 * @param int $a Numerator
1290 * @param int $b Denominator
1291 * @return int
1292 */
1293 private function gcd( $a, $b ) {
1294 /*
1295 // https://en.wikipedia.org/wiki/Euclidean_algorithm
1296 // Recursive form would be:
1297 if( $b == 0 )
1298 return $a;
1299 else
1300 return gcd( $b, $a % $b );
1301 */
1302 while ( $b != 0 ) {
1303 $remainder = $a % $b;
1304
1305 // tail recursion...
1306 $a = $b;
1307 $b = $remainder;
1308 }
1309
1310 return $a;
1311 }
1312
1313 /**
1314 * Fetch the human readable version of a news code.
1315 * A news code is an 8 digit code. The first two
1316 * digits are a general classification, so we just
1317 * translate that.
1318 *
1319 * Note, leading 0's are significant, so this is
1320 * a string, not an int.
1321 *
1322 * @param string $val The 8 digit news code.
1323 * @return string The human readable form
1324 */
1325 private function convertNewsCode( $val ) {
1326 if ( !preg_match( '/^\d{8}$/D', $val ) ) {
1327 // Not a valid news code.
1328 return $val;
1329 }
1330 $cat = '';
1331 switch ( substr( $val, 0, 2 ) ) {
1332 case '01':
1333 $cat = 'ace';
1334 break;
1335 case '02':
1336 $cat = 'clj';
1337 break;
1338 case '03':
1339 $cat = 'dis';
1340 break;
1341 case '04':
1342 $cat = 'fin';
1343 break;
1344 case '05':
1345 $cat = 'edu';
1346 break;
1347 case '06':
1348 $cat = 'evn';
1349 break;
1350 case '07':
1351 $cat = 'hth';
1352 break;
1353 case '08':
1354 $cat = 'hum';
1355 break;
1356 case '09':
1357 $cat = 'lab';
1358 break;
1359 case '10':
1360 $cat = 'lif';
1361 break;
1362 case '11':
1363 $cat = 'pol';
1364 break;
1365 case '12':
1366 $cat = 'rel';
1367 break;
1368 case '13':
1369 $cat = 'sci';
1370 break;
1371 case '14':
1372 $cat = 'soi';
1373 break;
1374 case '15':
1375 $cat = 'spo';
1376 break;
1377 case '16':
1378 $cat = 'war';
1379 break;
1380 case '17':
1381 $cat = 'wea';
1382 break;
1383 }
1384 if ( $cat !== '' ) {
1385 $catMsg = $this->exifMsg( 'iimcategory', $cat );
1386 $val = $this->exifMsg( 'subjectnewscode', '', $val, $catMsg );
1387 }
1388
1389 return $val;
1390 }
1391
1392 /**
1393 * Format a coordinate value, convert numbers from floating point
1394 * into degree minute second representation.
1395 *
1396 * @param int $coord Degrees, minutes and seconds
1397 * @param string $type Latitude or longitude (for if its a NWS or E)
1398 * @return mixed A floating point number or whatever we were fed
1399 */
1400 private function formatCoords( $coord, $type ) {
1401 $ref = '';
1402 if ( $coord < 0 ) {
1403 $nCoord = -$coord;
1404 if ( $type === 'latitude' ) {
1405 $ref = 'S';
1406 } elseif ( $type === 'longitude' ) {
1407 $ref = 'W';
1408 }
1409 } else {
1410 $nCoord = $coord;
1411 if ( $type === 'latitude' ) {
1412 $ref = 'N';
1413 } elseif ( $type === 'longitude' ) {
1414 $ref = 'E';
1415 }
1416 }
1417
1418 $deg = floor( $nCoord );
1419 $min = floor( ( $nCoord - $deg ) * 60.0 );
1420 $sec = round( ( ( $nCoord - $deg ) - $min / 60 ) * 3600, 2 );
1421
1422 $deg = $this->formatNum( $deg );
1423 $min = $this->formatNum( $min );
1424 $sec = $this->formatNum( $sec );
1425
1426 return $this->msg( 'exif-coordinate-format', $deg, $min, $sec, $ref, $coord )->text();
1427 }
1428
1429 /**
1430 * Format the contact info field into a single value.
1431 *
1432 * This function might be called from
1433 * JpegHandler::convertMetadataVersion which is why it is
1434 * public.
1435 *
1436 * @param array $vals Array with fields of the ContactInfo
1437 * struct defined in the IPTC4XMP spec. Or potentially
1438 * an array with one element that is a free form text
1439 * value from the older iptc iim 1:118 prop.
1440 * @return string HTML-ish looking wikitext
1441 * @since 1.23 no longer static
1442 */
1443 public function collapseContactInfo( $vals ) {
1444 if ( !( isset( $vals['CiAdrExtadr'] )
1445 || isset( $vals['CiAdrCity'] )
1446 || isset( $vals['CiAdrCtry'] )
1447 || isset( $vals['CiEmailWork'] )
1448 || isset( $vals['CiTelWork'] )
1449 || isset( $vals['CiAdrPcode'] )
1450 || isset( $vals['CiAdrRegion'] )
1451 || isset( $vals['CiUrlWork'] )
1452 ) ) {
1453 // We don't have any sub-properties
1454 // This could happen if its using old
1455 // iptc that just had this as a free-form
1456 // text value.
1457 // Note: We run this through htmlspecialchars
1458 // partially to be consistent, and partially
1459 // because people often insert >, etc into
1460 // the metadata which should not be interpreted
1461 // but we still want to auto-link urls.
1462 foreach ( $vals as &$val ) {
1463 $val = htmlspecialchars( $val );
1464 }
1465
1466 return $this->flattenArrayReal( $vals );
1467 } else {
1468 // We have a real ContactInfo field.
1469 // Its unclear if all these fields have to be
1470 // set, so assume they do not.
1471 $url = $tel = $street = $city = $country = '';
1472 $email = $postal = $region = '';
1473
1474 // Also note, some of the class names this uses
1475 // are similar to those used by hCard. This is
1476 // mostly because they're sensible names. This
1477 // does not (and does not attempt to) output
1478 // stuff in the hCard microformat. However it
1479 // might output in the adr microformat.
1480
1481 if ( isset( $vals['CiAdrExtadr'] ) ) {
1482 // Todo: This can potentially be multi-line.
1483 // Need to check how that works in XMP.
1484 $street = '<span class="extended-address">'
1485 . htmlspecialchars(
1486 $vals['CiAdrExtadr'] )
1487 . '</span>';
1488 }
1489 if ( isset( $vals['CiAdrCity'] ) ) {
1490 $city = '<span class="locality">'
1491 . htmlspecialchars( $vals['CiAdrCity'] )
1492 . '</span>';
1493 }
1494 if ( isset( $vals['CiAdrCtry'] ) ) {
1495 $country = '<span class="country-name">'
1496 . htmlspecialchars( $vals['CiAdrCtry'] )
1497 . '</span>';
1498 }
1499 if ( isset( $vals['CiEmailWork'] ) ) {
1500 $emails = [];
1501 // Have to split multiple emails at commas/new lines.
1502 $splitEmails = explode( "\n", $vals['CiEmailWork'] );
1503 foreach ( $splitEmails as $e1 ) {
1504 // Also split on comma
1505 foreach ( explode( ',', $e1 ) as $e2 ) {
1506 $finalEmail = trim( $e2 );
1507 if ( $finalEmail == ',' || $finalEmail == '' ) {
1508 continue;
1509 }
1510 if ( strpos( $finalEmail, '<' ) !== false ) {
1511 // Don't do fancy formatting to
1512 // "My name" <foo@bar.com> style stuff
1513 $emails[] = $finalEmail;
1514 } else {
1515 $emails[] = '[mailto:'
1516 . $finalEmail
1517 . ' <span class="email">'
1518 . $finalEmail
1519 . '</span>]';
1520 }
1521 }
1522 }
1523 $email = implode( ', ', $emails );
1524 }
1525 if ( isset( $vals['CiTelWork'] ) ) {
1526 $tel = '<span class="tel">'
1527 . htmlspecialchars( $vals['CiTelWork'] )
1528 . '</span>';
1529 }
1530 if ( isset( $vals['CiAdrPcode'] ) ) {
1531 $postal = '<span class="postal-code">'
1532 . htmlspecialchars(
1533 $vals['CiAdrPcode'] )
1534 . '</span>';
1535 }
1536 if ( isset( $vals['CiAdrRegion'] ) ) {
1537 // Note this is province/state.
1538 $region = '<span class="region">'
1539 . htmlspecialchars(
1540 $vals['CiAdrRegion'] )
1541 . '</span>';
1542 }
1543 if ( isset( $vals['CiUrlWork'] ) ) {
1544 $url = '<span class="url">'
1545 . htmlspecialchars( $vals['CiUrlWork'] )
1546 . '</span>';
1547 }
1548
1549 return $this->msg( 'exif-contact-value', $email, $url,
1550 $street, $city, $region, $postal, $country,
1551 $tel )->text();
1552 }
1553 }
1554
1555 /**
1556 * Get a list of fields that are visible by default.
1557 *
1558 * @return array
1559 * @since 1.23
1560 */
1561 public static function getVisibleFields() {
1562 $fields = [];
1563 $lines = explode( "\n", wfMessage( 'metadata-fields' )->inContentLanguage()->text() );
1564 foreach ( $lines as $line ) {
1565 $matches = [];
1566 if ( preg_match( '/^\\*\s*(.*?)\s*$/', $line, $matches ) ) {
1567 $fields[] = $matches[1];
1568 }
1569 }
1570 $fields = array_map( 'strtolower', $fields );
1571
1572 return $fields;
1573 }
1574
1575 /**
1576 * Get an array of extended metadata. (See the imageinfo API for format.)
1577 *
1578 * @param File $file File to use
1579 * @return array [<property name> => ['value' => <value>]], or [] on error
1580 * @since 1.23
1581 */
1582 public function fetchExtendedMetadata( File $file ) {
1583 $cache = ObjectCache::getMainWANInstance();
1584
1585 // If revision deleted, exit immediately
1586 if ( $file->isDeleted( File::DELETED_FILE ) ) {
1587 return [];
1588 }
1589
1590 $cacheKey = wfMemcKey(
1591 'getExtendedMetadata',
1592 $this->getLanguage()->getCode(),
1593 (int)$this->singleLang,
1594 $file->getSha1()
1595 );
1596
1597 $cachedValue = $cache->get( $cacheKey );
1598 if (
1599 $cachedValue
1600 && Hooks::run( 'ValidateExtendedMetadataCache', [ $cachedValue['timestamp'], $file ] )
1601 ) {
1602 $extendedMetadata = $cachedValue['data'];
1603 } else {
1604 $maxCacheTime = ( $file instanceof ForeignAPIFile ) ? 60 * 60 * 12 : 60 * 60 * 24 * 30;
1605 $fileMetadata = $this->getExtendedMetadataFromFile( $file );
1606 $extendedMetadata = $this->getExtendedMetadataFromHook( $file, $fileMetadata, $maxCacheTime );
1607 if ( $this->singleLang ) {
1608 $this->resolveMultilangMetadata( $extendedMetadata );
1609 }
1610 $this->discardMultipleValues( $extendedMetadata );
1611 // Make sure the metadata won't break the API when an XML format is used.
1612 // This is an API-specific function so it would be cleaner to call it from
1613 // outside fetchExtendedMetadata, but this way we don't need to redo the
1614 // computation on a cache hit.
1615 $this->sanitizeArrayForAPI( $extendedMetadata );
1616 $valueToCache = [ 'data' => $extendedMetadata, 'timestamp' => wfTimestampNow() ];
1617 $cache->set( $cacheKey, $valueToCache, $maxCacheTime );
1618 }
1619
1620 return $extendedMetadata;
1621 }
1622
1623 /**
1624 * Get file-based metadata in standardized format.
1625 *
1626 * Note that for a remote file, this might return metadata supplied by extensions.
1627 *
1628 * @param File $file File to use
1629 * @return array [<property name> => ['value' => <value>]], or [] on error
1630 * @since 1.23
1631 */
1632 protected function getExtendedMetadataFromFile( File $file ) {
1633 // If this is a remote file accessed via an API request, we already
1634 // have remote metadata so we just ignore any local one
1635 if ( $file instanceof ForeignAPIFile ) {
1636 // In case of error we pretend no metadata - this will get cached.
1637 // Might or might not be a good idea.
1638 return $file->getExtendedMetadata() ?: [];
1639 }
1640
1641 $uploadDate = wfTimestamp( TS_ISO_8601, $file->getTimestamp() );
1642
1643 $fileMetadata = [
1644 // This is modification time, which is close to "upload" time.
1645 'DateTime' => [
1646 'value' => $uploadDate,
1647 'source' => 'mediawiki-metadata',
1648 ],
1649 ];
1650
1651 $title = $file->getTitle();
1652 if ( $title ) {
1653 $text = $title->getText();
1654 $pos = strrpos( $text, '.' );
1655
1656 if ( $pos ) {
1657 $name = substr( $text, 0, $pos );
1658 } else {
1659 $name = $text;
1660 }
1661
1662 $fileMetadata['ObjectName'] = [
1663 'value' => $name,
1664 'source' => 'mediawiki-metadata',
1665 ];
1666 }
1667
1668 return $fileMetadata;
1669 }
1670
1671 /**
1672 * Get additional metadata from hooks in standardized format.
1673 *
1674 * @param File $file File to use
1675 * @param array $extendedMetadata
1676 * @param int $maxCacheTime Hook handlers might use this parameter to override cache time
1677 *
1678 * @return array [<property name> => ['value' => <value>]], or [] on error
1679 * @since 1.23
1680 */
1681 protected function getExtendedMetadataFromHook( File $file, array $extendedMetadata,
1682 &$maxCacheTime
1683 ) {
1684 Hooks::run( 'GetExtendedMetadata', [
1685 &$extendedMetadata,
1686 $file,
1687 $this->getContext(),
1688 $this->singleLang,
1689 &$maxCacheTime
1690 ] );
1691
1692 $visible = array_flip( self::getVisibleFields() );
1693 foreach ( $extendedMetadata as $key => $value ) {
1694 if ( !isset( $visible[strtolower( $key )] ) ) {
1695 $extendedMetadata[$key]['hidden'] = '';
1696 }
1697 }
1698
1699 return $extendedMetadata;
1700 }
1701
1702 /**
1703 * Turns an XMP-style multilang array into a single value.
1704 * If the value is not a multilang array, it is returned unchanged.
1705 * See mediawiki.org/wiki/Manual:File_metadata_handling#Multi-language_array_format
1706 * @param mixed $value
1707 * @return mixed Value in best language, null if there were no languages at all
1708 * @since 1.23
1709 */
1710 protected function resolveMultilangValue( $value ) {
1711 if (
1712 !is_array( $value )
1713 || !isset( $value['_type'] )
1714 || $value['_type'] != 'lang'
1715 ) {
1716 return $value; // do nothing if not a multilang array
1717 }
1718
1719 // choose the language best matching user or site settings
1720 $priorityLanguages = $this->getPriorityLanguages();
1721 foreach ( $priorityLanguages as $lang ) {
1722 if ( isset( $value[$lang] ) ) {
1723 return $value[$lang];
1724 }
1725 }
1726
1727 // otherwise go with the default language, if set
1728 if ( isset( $value['x-default'] ) ) {
1729 return $value['x-default'];
1730 }
1731
1732 // otherwise just return any one language
1733 unset( $value['_type'] );
1734 if ( !empty( $value ) ) {
1735 return reset( $value );
1736 }
1737
1738 // this should not happen; signal error
1739 return null;
1740 }
1741
1742 /**
1743 * Turns an XMP-style multivalue array into a single value by dropping all but the first
1744 * value. If the value is not a multivalue array (or a multivalue array inside a multilang
1745 * array), it is returned unchanged.
1746 * See mediawiki.org/wiki/Manual:File_metadata_handling#Multi-language_array_format
1747 * @param mixed $value
1748 * @return mixed The value, or the first value if there were multiple ones
1749 * @since 1.25
1750 */
1751 protected function resolveMultivalueValue( $value ) {
1752 if ( !is_array( $value ) ) {
1753 return $value;
1754 } elseif ( isset( $value['_type'] ) && $value['_type'] === 'lang' ) {
1755 // if this is a multilang array, process fields separately
1756 $newValue = [];
1757 foreach ( $value as $k => $v ) {
1758 $newValue[$k] = $this->resolveMultivalueValue( $v );
1759 }
1760 return $newValue;
1761 } else { // _type is 'ul' or 'ol' or missing in which case it defaults to 'ul'
1762 list( $k, $v ) = each( $value );
1763 if ( $k === '_type' ) {
1764 $v = current( $value );
1765 }
1766 return $v;
1767 }
1768 }
1769
1770 /**
1771 * Takes an array returned by the getExtendedMetadata* functions,
1772 * and resolves multi-language values in it.
1773 * @param array $metadata
1774 * @since 1.23
1775 */
1776 protected function resolveMultilangMetadata( &$metadata ) {
1777 if ( !is_array( $metadata ) ) {
1778 return;
1779 }
1780 foreach ( $metadata as &$field ) {
1781 if ( isset( $field['value'] ) ) {
1782 $field['value'] = $this->resolveMultilangValue( $field['value'] );
1783 }
1784 }
1785 }
1786
1787 /**
1788 * Takes an array returned by the getExtendedMetadata* functions,
1789 * and turns all fields into single-valued ones by dropping extra values.
1790 * @param array $metadata
1791 * @since 1.25
1792 */
1793 protected function discardMultipleValues( &$metadata ) {
1794 if ( !is_array( $metadata ) ) {
1795 return;
1796 }
1797 foreach ( $metadata as $key => &$field ) {
1798 if ( $key === 'Software' || $key === 'Contact' ) {
1799 // we skip some fields which have composite values. They are not particularly interesting
1800 // and you can get them via the metadata / commonmetadata APIs anyway.
1801 continue;
1802 }
1803 if ( isset( $field['value'] ) ) {
1804 $field['value'] = $this->resolveMultivalueValue( $field['value'] );
1805 }
1806 }
1807 }
1808
1809 /**
1810 * Makes sure the given array is a valid API response fragment
1811 * @param array $arr
1812 */
1813 protected function sanitizeArrayForAPI( &$arr ) {
1814 if ( !is_array( $arr ) ) {
1815 return;
1816 }
1817
1818 $counter = 1;
1819 foreach ( $arr as $key => &$value ) {
1820 $sanitizedKey = $this->sanitizeKeyForAPI( $key );
1821 if ( $sanitizedKey !== $key ) {
1822 if ( isset( $arr[$sanitizedKey] ) ) {
1823 // Make the sanitized keys hopefully unique.
1824 // To make it definitely unique would be too much effort, given that
1825 // sanitizing is only needed for misformatted metadata anyway, but
1826 // this at least covers the case when $arr is numeric.
1827 $sanitizedKey .= $counter;
1828 ++$counter;
1829 }
1830 $arr[$sanitizedKey] = $arr[$key];
1831 unset( $arr[$key] );
1832 }
1833 if ( is_array( $value ) ) {
1834 $this->sanitizeArrayForAPI( $value );
1835 }
1836 }
1837
1838 // Handle API metadata keys (particularly "_type")
1839 $keys = array_filter( array_keys( $arr ), 'ApiResult::isMetadataKey' );
1840 if ( $keys ) {
1841 ApiResult::setPreserveKeysList( $arr, $keys );
1842 }
1843 }
1844
1845 /**
1846 * Turns a string into a valid API identifier.
1847 * @param string $key
1848 * @return string
1849 * @since 1.23
1850 */
1851 protected function sanitizeKeyForAPI( $key ) {
1852 // drop all characters which are not valid in an XML tag name
1853 // a bunch of non-ASCII letters would be valid but probably won't
1854 // be used so we take the easy way
1855 $key = preg_replace( '/[^a-zA-z0-9_:.-]/', '', $key );
1856 // drop characters which are invalid at the first position
1857 $key = preg_replace( '/^[\d-.]+/', '', $key );
1858
1859 if ( $key == '' ) {
1860 $key = '_';
1861 }
1862
1863 // special case for an internal keyword
1864 if ( $key == '_element' ) {
1865 $key = 'element';
1866 }
1867
1868 return $key;
1869 }
1870
1871 /**
1872 * Returns a list of languages (first is best) to use when formatting multilang fields,
1873 * based on user and site preferences.
1874 * @return array
1875 * @since 1.23
1876 */
1877 protected function getPriorityLanguages() {
1878 $priorityLanguages =
1879 Language::getFallbacksIncludingSiteLanguage( $this->getLanguage()->getCode() );
1880 $priorityLanguages = array_merge(
1881 (array)$this->getLanguage()->getCode(),
1882 $priorityLanguages[0],
1883 $priorityLanguages[1]
1884 );
1885
1886 return $priorityLanguages;
1887 }
1888 }