Merge "Add option to expose original sha1 in thumb url"
[lhc/web/wiklou.git] / includes / media / FormatMetadata.php
1 <?php
2 // @codingStandardsIgnoreFile
3 // PHPCS can't handle the level of nesting in this file
4 /**
5 * Formatting of image metadata values into human readable form.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
21 *
22 * @ingroup Media
23 * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
24 * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason, 2009 Brent Garber, 2010 Brian Wolff
25 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
26 * @see http://exif.org/Exif2-2.PDF The Exif 2.2 specification
27 * @file
28 */
29
30 /**
31 * Format Image metadata values into a human readable form.
32 *
33 * Note lots of these messages use the prefix 'exif' even though
34 * they may not be exif properties. For example 'exif-ImageDescription'
35 * can be the Exif ImageDescription, or it could be the iptc-iim caption
36 * property, or it could be the xmp dc:description property. This
37 * is because these messages should be independent of how the data is
38 * stored, sine the user doesn't care if the description is stored in xmp,
39 * exif, etc only that its a description. (Additionally many of these properties
40 * are merged together following the MWG standard, such that for example,
41 * exif properties override XMP properties that mean the same thing if
42 * there is a conflict).
43 *
44 * It should perhaps use a prefix like 'metadata' instead, but there
45 * is already a large number of messages using the 'exif' prefix.
46 *
47 * @ingroup Media
48 * @since 1.23 the class extends ContextSource and various formerly-public
49 * internal methods are private
50 */
51 class FormatMetadata extends ContextSource {
52 /**
53 * Only output a single language for multi-language fields
54 * @var bool
55 * @since 1.23
56 */
57 protected $singleLang = false;
58
59 /**
60 * Trigger only outputting single language for multilanguage fields
61 *
62 * @param bool $val
63 * @since 1.23
64 */
65 public function setSingleLanguage( $val ) {
66 $this->singleLang = $val;
67 }
68
69 /**
70 * Numbers given by Exif user agents are often magical, that is they
71 * should be replaced by a detailed explanation depending on their
72 * value which most of the time are plain integers. This function
73 * formats Exif (and other metadata) values into human readable form.
74 *
75 * This is the usual entry point for this class.
76 *
77 * @param array $tags The Exif data to format ( as returned by
78 * Exif::getFilteredData() or BitmapMetadataHandler )
79 * @param bool|IContextSource $context Context to use (optional)
80 * @return array
81 */
82 public static function getFormattedData( $tags, $context = false ) {
83 $obj = new FormatMetadata;
84 if ( $context ) {
85 $obj->setContext( $context );
86 }
87
88 return $obj->makeFormattedData( $tags );
89 }
90
91 /**
92 * Numbers given by Exif user agents are often magical, that is they
93 * should be replaced by a detailed explanation depending on their
94 * value which most of the time are plain integers. This function
95 * formats Exif (and other metadata) values into human readable form.
96 *
97 * @param array $tags The Exif data to format ( as returned by
98 * Exif::getFilteredData() or BitmapMetadataHandler )
99 * @return array
100 * @since 1.23
101 */
102 public function makeFormattedData( $tags ) {
103 $resolutionunit = !isset( $tags['ResolutionUnit'] ) || $tags['ResolutionUnit'] == 2 ? 2 : 3;
104 unset( $tags['ResolutionUnit'] );
105
106 foreach ( $tags as $tag => &$vals ) {
107
108 // This seems ugly to wrap non-array's in an array just to unwrap again,
109 // especially when most of the time it is not an array
110 if ( !is_array( $tags[$tag] ) ) {
111 $vals = array( $vals );
112 }
113
114 // _type is a special value to say what array type
115 if ( isset( $tags[$tag]['_type'] ) ) {
116 $type = $tags[$tag]['_type'];
117 unset( $vals['_type'] );
118 } else {
119 $type = 'ul'; // default unordered list.
120 }
121
122 // This is done differently as the tag is an array.
123 if ( $tag == 'GPSTimeStamp' && count( $vals ) === 3 ) {
124 // hour min sec array
125
126 $h = explode( '/', $vals[0] );
127 $m = explode( '/', $vals[1] );
128 $s = explode( '/', $vals[2] );
129
130 // this should already be validated
131 // when loaded from file, but it could
132 // come from a foreign repo, so be
133 // paranoid.
134 if ( !isset( $h[1] )
135 || !isset( $m[1] )
136 || !isset( $s[1] )
137 || $h[1] == 0
138 || $m[1] == 0
139 || $s[1] == 0
140 ) {
141 continue;
142 }
143 $tags[$tag] = str_pad( intval( $h[0] / $h[1] ), 2, '0', STR_PAD_LEFT )
144 . ':' . str_pad( intval( $m[0] / $m[1] ), 2, '0', STR_PAD_LEFT )
145 . ':' . str_pad( intval( $s[0] / $s[1] ), 2, '0', STR_PAD_LEFT );
146
147 try {
148 $time = wfTimestamp( TS_MW, '1971:01:01 ' . $tags[$tag] );
149 // the 1971:01:01 is just a placeholder, and not shown to user.
150 if ( $time && intval( $time ) > 0 ) {
151 $tags[$tag] = $this->getLanguage()->time( $time );
152 }
153 } catch ( TimestampException $e ) {
154 // This shouldn't happen, but we've seen bad formats
155 // such as 4-digit seconds in the wild.
156 // leave $tags[$tag] as-is
157 }
158 continue;
159 }
160
161 // The contact info is a multi-valued field
162 // instead of the other props which are single
163 // valued (mostly) so handle as a special case.
164 if ( $tag === 'Contact' ) {
165 $vals = $this->collapseContactInfo( $vals );
166 continue;
167 }
168
169 foreach ( $vals as &$val ) {
170
171 switch ( $tag ) {
172 case 'Compression':
173 switch ( $val ) {
174 case 1:
175 case 2:
176 case 3:
177 case 4:
178 case 5:
179 case 6:
180 case 7:
181 case 8:
182 case 32773:
183 case 32946:
184 case 34712:
185 $val = $this->exifMsg( $tag, $val );
186 break;
187 default:
188 /* If not recognized, display as is. */
189 break;
190 }
191 break;
192
193 case 'PhotometricInterpretation':
194 switch ( $val ) {
195 case 2:
196 case 6:
197 $val = $this->exifMsg( $tag, $val );
198 break;
199 default:
200 /* If not recognized, display as is. */
201 break;
202 }
203 break;
204
205 case 'Orientation':
206 switch ( $val ) {
207 case 1:
208 case 2:
209 case 3:
210 case 4:
211 case 5:
212 case 6:
213 case 7:
214 case 8:
215 $val = $this->exifMsg( $tag, $val );
216 break;
217 default:
218 /* If not recognized, display as is. */
219 break;
220 }
221 break;
222
223 case 'PlanarConfiguration':
224 switch ( $val ) {
225 case 1:
226 case 2:
227 $val = $this->exifMsg( $tag, $val );
228 break;
229 default:
230 /* If not recognized, display as is. */
231 break;
232 }
233 break;
234
235 // TODO: YCbCrSubSampling
236 case 'YCbCrPositioning':
237 switch ( $val ) {
238 case 1:
239 case 2:
240 $val = $this->exifMsg( $tag, $val );
241 break;
242 default:
243 /* If not recognized, display as is. */
244 break;
245 }
246 break;
247
248 case 'XResolution':
249 case 'YResolution':
250 switch ( $resolutionunit ) {
251 case 2:
252 $val = $this->exifMsg( 'XYResolution', 'i', $this->formatNum( $val ) );
253 break;
254 case 3:
255 $val = $this->exifMsg( 'XYResolution', 'c', $this->formatNum( $val ) );
256 break;
257 default:
258 /* If not recognized, display as is. */
259 break;
260 }
261 break;
262
263 // TODO: YCbCrCoefficients #p27 (see annex E)
264 case 'ExifVersion':
265 case 'FlashpixVersion':
266 $val = "$val" / 100;
267 break;
268
269 case 'ColorSpace':
270 switch ( $val ) {
271 case 1:
272 case 65535:
273 $val = $this->exifMsg( $tag, $val );
274 break;
275 default:
276 /* If not recognized, display as is. */
277 break;
278 }
279 break;
280
281 case 'ComponentsConfiguration':
282 switch ( $val ) {
283 case 0:
284 case 1:
285 case 2:
286 case 3:
287 case 4:
288 case 5:
289 case 6:
290 $val = $this->exifMsg( $tag, $val );
291 break;
292 default:
293 /* If not recognized, display as is. */
294 break;
295 }
296 break;
297
298 case 'DateTime':
299 case 'DateTimeOriginal':
300 case 'DateTimeDigitized':
301 case 'DateTimeReleased':
302 case 'DateTimeExpires':
303 case 'GPSDateStamp':
304 case 'dc-date':
305 case 'DateTimeMetadata':
306 if ( $val == '0000:00:00 00:00:00' || $val == ' : : : : ' ) {
307 $val = $this->msg( 'exif-unknowndate' )->text();
308 } elseif ( preg_match(
309 '/^(?:\d{4}):(?:\d\d):(?:\d\d) (?:\d\d):(?:\d\d):(?:\d\d)$/D',
310 $val
311 ) ) {
312 // Full date.
313 $time = wfTimestamp( TS_MW, $val );
314 if ( $time && intval( $time ) > 0 ) {
315 $val = $this->getLanguage()->timeanddate( $time );
316 }
317 } elseif ( preg_match( '/^(?:\d{4}):(?:\d\d):(?:\d\d) (?:\d\d):(?:\d\d)$/D', $val ) ) {
318 // No second field. Still format the same
319 // since timeanddate doesn't include seconds anyways,
320 // but second still available in api
321 $time = wfTimestamp( TS_MW, $val . ':00' );
322 if ( $time && intval( $time ) > 0 ) {
323 $val = $this->getLanguage()->timeanddate( $time );
324 }
325 } elseif ( preg_match( '/^(?:\d{4}):(?:\d\d):(?:\d\d)$/D', $val ) ) {
326 // If only the date but not the time is filled in.
327 $time = wfTimestamp( TS_MW, substr( $val, 0, 4 )
328 . substr( $val, 5, 2 )
329 . substr( $val, 8, 2 )
330 . '000000' );
331 if ( $time && intval( $time ) > 0 ) {
332 $val = $this->getLanguage()->date( $time );
333 }
334 }
335 // else it will just output $val without formatting it.
336 break;
337
338 case 'ExposureProgram':
339 switch ( $val ) {
340 case 0:
341 case 1:
342 case 2:
343 case 3:
344 case 4:
345 case 5:
346 case 6:
347 case 7:
348 case 8:
349 $val = $this->exifMsg( $tag, $val );
350 break;
351 default:
352 /* If not recognized, display as is. */
353 break;
354 }
355 break;
356
357 case 'SubjectDistance':
358 $val = $this->exifMsg( $tag, '', $this->formatNum( $val ) );
359 break;
360
361 case 'MeteringMode':
362 switch ( $val ) {
363 case 0:
364 case 1:
365 case 2:
366 case 3:
367 case 4:
368 case 5:
369 case 6:
370 case 7:
371 case 255:
372 $val = $this->exifMsg( $tag, $val );
373 break;
374 default:
375 /* If not recognized, display as is. */
376 break;
377 }
378 break;
379
380 case 'LightSource':
381 switch ( $val ) {
382 case 0:
383 case 1:
384 case 2:
385 case 3:
386 case 4:
387 case 9:
388 case 10:
389 case 11:
390 case 12:
391 case 13:
392 case 14:
393 case 15:
394 case 17:
395 case 18:
396 case 19:
397 case 20:
398 case 21:
399 case 22:
400 case 23:
401 case 24:
402 case 255:
403 $val = $this->exifMsg( $tag, $val );
404 break;
405 default:
406 /* If not recognized, display as is. */
407 break;
408 }
409 break;
410
411 case 'Flash':
412 $flashDecode = array(
413 'fired' => $val & bindec( '00000001' ),
414 'return' => ( $val & bindec( '00000110' ) ) >> 1,
415 'mode' => ( $val & bindec( '00011000' ) ) >> 3,
416 'function' => ( $val & bindec( '00100000' ) ) >> 5,
417 'redeye' => ( $val & bindec( '01000000' ) ) >> 6,
418 // 'reserved' => ($val & bindec( '10000000' )) >> 7,
419 );
420 $flashMsgs = array();
421 # We do not need to handle unknown values since all are used.
422 foreach ( $flashDecode as $subTag => $subValue ) {
423 # We do not need any message for zeroed values.
424 if ( $subTag != 'fired' && $subValue == 0 ) {
425 continue;
426 }
427 $fullTag = $tag . '-' . $subTag;
428 $flashMsgs[] = $this->exifMsg( $fullTag, $subValue );
429 }
430 $val = $this->getLanguage()->commaList( $flashMsgs );
431 break;
432
433 case 'FocalPlaneResolutionUnit':
434 switch ( $val ) {
435 case 2:
436 $val = $this->exifMsg( $tag, $val );
437 break;
438 default:
439 /* If not recognized, display as is. */
440 break;
441 }
442 break;
443
444 case 'SensingMethod':
445 switch ( $val ) {
446 case 1:
447 case 2:
448 case 3:
449 case 4:
450 case 5:
451 case 7:
452 case 8:
453 $val = $this->exifMsg( $tag, $val );
454 break;
455 default:
456 /* If not recognized, display as is. */
457 break;
458 }
459 break;
460
461 case 'FileSource':
462 switch ( $val ) {
463 case 3:
464 $val = $this->exifMsg( $tag, $val );
465 break;
466 default:
467 /* If not recognized, display as is. */
468 break;
469 }
470 break;
471
472 case 'SceneType':
473 switch ( $val ) {
474 case 1:
475 $val = $this->exifMsg( $tag, $val );
476 break;
477 default:
478 /* If not recognized, display as is. */
479 break;
480 }
481 break;
482
483 case 'CustomRendered':
484 switch ( $val ) {
485 case 0:
486 case 1:
487 $val = $this->exifMsg( $tag, $val );
488 break;
489 default:
490 /* If not recognized, display as is. */
491 break;
492 }
493 break;
494
495 case 'ExposureMode':
496 switch ( $val ) {
497 case 0:
498 case 1:
499 case 2:
500 $val = $this->exifMsg( $tag, $val );
501 break;
502 default:
503 /* If not recognized, display as is. */
504 break;
505 }
506 break;
507
508 case 'WhiteBalance':
509 switch ( $val ) {
510 case 0:
511 case 1:
512 $val = $this->exifMsg( $tag, $val );
513 break;
514 default:
515 /* If not recognized, display as is. */
516 break;
517 }
518 break;
519
520 case 'SceneCaptureType':
521 switch ( $val ) {
522 case 0:
523 case 1:
524 case 2:
525 case 3:
526 $val = $this->exifMsg( $tag, $val );
527 break;
528 default:
529 /* If not recognized, display as is. */
530 break;
531 }
532 break;
533
534 case 'GainControl':
535 switch ( $val ) {
536 case 0:
537 case 1:
538 case 2:
539 case 3:
540 case 4:
541 $val = $this->exifMsg( $tag, $val );
542 break;
543 default:
544 /* If not recognized, display as is. */
545 break;
546 }
547 break;
548
549 case 'Contrast':
550 switch ( $val ) {
551 case 0:
552 case 1:
553 case 2:
554 $val = $this->exifMsg( $tag, $val );
555 break;
556 default:
557 /* If not recognized, display as is. */
558 break;
559 }
560 break;
561
562 case 'Saturation':
563 switch ( $val ) {
564 case 0:
565 case 1:
566 case 2:
567 $val = $this->exifMsg( $tag, $val );
568 break;
569 default:
570 /* If not recognized, display as is. */
571 break;
572 }
573 break;
574
575 case 'Sharpness':
576 switch ( $val ) {
577 case 0:
578 case 1:
579 case 2:
580 $val = $this->exifMsg( $tag, $val );
581 break;
582 default:
583 /* If not recognized, display as is. */
584 break;
585 }
586 break;
587
588 case 'SubjectDistanceRange':
589 switch ( $val ) {
590 case 0:
591 case 1:
592 case 2:
593 case 3:
594 $val = $this->exifMsg( $tag, $val );
595 break;
596 default:
597 /* If not recognized, display as is. */
598 break;
599 }
600 break;
601
602 // The GPS...Ref values are kept for compatibility, probably won't be reached.
603 case 'GPSLatitudeRef':
604 case 'GPSDestLatitudeRef':
605 switch ( $val ) {
606 case 'N':
607 case 'S':
608 $val = $this->exifMsg( 'GPSLatitude', $val );
609 break;
610 default:
611 /* If not recognized, display as is. */
612 break;
613 }
614 break;
615
616 case 'GPSLongitudeRef':
617 case 'GPSDestLongitudeRef':
618 switch ( $val ) {
619 case 'E':
620 case 'W':
621 $val = $this->exifMsg( 'GPSLongitude', $val );
622 break;
623 default:
624 /* If not recognized, display as is. */
625 break;
626 }
627 break;
628
629 case 'GPSAltitude':
630 if ( $val < 0 ) {
631 $val = $this->exifMsg( 'GPSAltitude', 'below-sealevel', $this->formatNum( -$val, 3 ) );
632 } else {
633 $val = $this->exifMsg( 'GPSAltitude', 'above-sealevel', $this->formatNum( $val, 3 ) );
634 }
635 break;
636
637 case 'GPSStatus':
638 switch ( $val ) {
639 case 'A':
640 case 'V':
641 $val = $this->exifMsg( $tag, $val );
642 break;
643 default:
644 /* If not recognized, display as is. */
645 break;
646 }
647 break;
648
649 case 'GPSMeasureMode':
650 switch ( $val ) {
651 case 2:
652 case 3:
653 $val = $this->exifMsg( $tag, $val );
654 break;
655 default:
656 /* If not recognized, display as is. */
657 break;
658 }
659 break;
660
661 case 'GPSTrackRef':
662 case 'GPSImgDirectionRef':
663 case 'GPSDestBearingRef':
664 switch ( $val ) {
665 case 'T':
666 case 'M':
667 $val = $this->exifMsg( 'GPSDirection', $val );
668 break;
669 default:
670 /* If not recognized, display as is. */
671 break;
672 }
673 break;
674
675 case 'GPSLatitude':
676 case 'GPSDestLatitude':
677 $val = $this->formatCoords( $val, 'latitude' );
678 break;
679 case 'GPSLongitude':
680 case 'GPSDestLongitude':
681 $val = $this->formatCoords( $val, 'longitude' );
682 break;
683
684 case 'GPSSpeedRef':
685 switch ( $val ) {
686 case 'K':
687 case 'M':
688 case 'N':
689 $val = $this->exifMsg( 'GPSSpeed', $val );
690 break;
691 default:
692 /* If not recognized, display as is. */
693 break;
694 }
695 break;
696
697 case 'GPSDestDistanceRef':
698 switch ( $val ) {
699 case 'K':
700 case 'M':
701 case 'N':
702 $val = $this->exifMsg( 'GPSDestDistance', $val );
703 break;
704 default:
705 /* If not recognized, display as is. */
706 break;
707 }
708 break;
709
710 case 'GPSDOP':
711 // See https://en.wikipedia.org/wiki/Dilution_of_precision_(GPS)
712 if ( $val <= 2 ) {
713 $val = $this->exifMsg( $tag, 'excellent', $this->formatNum( $val ) );
714 } elseif ( $val <= 5 ) {
715 $val = $this->exifMsg( $tag, 'good', $this->formatNum( $val ) );
716 } elseif ( $val <= 10 ) {
717 $val = $this->exifMsg( $tag, 'moderate', $this->formatNum( $val ) );
718 } elseif ( $val <= 20 ) {
719 $val = $this->exifMsg( $tag, 'fair', $this->formatNum( $val ) );
720 } else {
721 $val = $this->exifMsg( $tag, 'poor', $this->formatNum( $val ) );
722 }
723 break;
724
725 // This is not in the Exif standard, just a special
726 // case for our purposes which enables wikis to wikify
727 // the make, model and software name to link to their articles.
728 case 'Make':
729 case 'Model':
730 $val = $this->exifMsg( $tag, '', $val );
731 break;
732
733 case 'Software':
734 if ( is_array( $val ) ) {
735 // if its a software, version array.
736 $val = $this->msg( 'exif-software-version-value', $val[0], $val[1] )->text();
737 } else {
738 $val = $this->exifMsg( $tag, '', $val );
739 }
740 break;
741
742 case 'ExposureTime':
743 // Show the pretty fraction as well as decimal version
744 $val = $this->msg( 'exif-exposuretime-format',
745 $this->formatFraction( $val ), $this->formatNum( $val ) )->text();
746 break;
747 case 'ISOSpeedRatings':
748 // If its = 65535 that means its at the
749 // limit of the size of Exif::short and
750 // is really higher.
751 if ( $val == '65535' ) {
752 $val = $this->exifMsg( $tag, 'overflow' );
753 } else {
754 $val = $this->formatNum( $val );
755 }
756 break;
757 case 'FNumber':
758 $val = $this->msg( 'exif-fnumber-format',
759 $this->formatNum( $val ) )->text();
760 break;
761
762 case 'FocalLength':
763 case 'FocalLengthIn35mmFilm':
764 $val = $this->msg( 'exif-focallength-format',
765 $this->formatNum( $val ) )->text();
766 break;
767
768 case 'MaxApertureValue':
769 if ( strpos( $val, '/' ) !== false ) {
770 // need to expand this earlier to calculate fNumber
771 list( $n, $d ) = explode( '/', $val );
772 if ( is_numeric( $n ) && is_numeric( $d ) ) {
773 $val = $n / $d;
774 }
775 }
776 if ( is_numeric( $val ) ) {
777 $fNumber = pow( 2, $val / 2 );
778 if ( $fNumber !== false ) {
779 $val = $this->msg( 'exif-maxaperturevalue-value',
780 $this->formatNum( $val ),
781 $this->formatNum( $fNumber, 2 )
782 )->text();
783 }
784 }
785 break;
786
787 case 'iimCategory':
788 switch ( strtolower( $val ) ) {
789 // See pg 29 of IPTC photo
790 // metadata standard.
791 case 'ace':
792 case 'clj':
793 case 'dis':
794 case 'fin':
795 case 'edu':
796 case 'evn':
797 case 'hth':
798 case 'hum':
799 case 'lab':
800 case 'lif':
801 case 'pol':
802 case 'rel':
803 case 'sci':
804 case 'soi':
805 case 'spo':
806 case 'war':
807 case 'wea':
808 $val = $this->exifMsg(
809 'iimcategory',
810 $val
811 );
812 }
813 break;
814 case 'SubjectNewsCode':
815 // Essentially like iimCategory.
816 // 8 (numeric) digit hierarchical
817 // classification. We decode the
818 // first 2 digits, which provide
819 // a broad category.
820 $val = $this->convertNewsCode( $val );
821 break;
822 case 'Urgency':
823 // 1-8 with 1 being highest, 5 normal
824 // 0 is reserved, and 9 is 'user-defined'.
825 $urgency = '';
826 if ( $val == 0 || $val == 9 ) {
827 $urgency = 'other';
828 } elseif ( $val < 5 && $val > 1 ) {
829 $urgency = 'high';
830 } elseif ( $val == 5 ) {
831 $urgency = 'normal';
832 } elseif ( $val <= 8 && $val > 5 ) {
833 $urgency = 'low';
834 }
835
836 if ( $urgency !== '' ) {
837 $val = $this->exifMsg( 'urgency',
838 $urgency, $val
839 );
840 }
841 break;
842
843 // Things that have a unit of pixels.
844 case 'OriginalImageHeight':
845 case 'OriginalImageWidth':
846 case 'PixelXDimension':
847 case 'PixelYDimension':
848 case 'ImageWidth':
849 case 'ImageLength':
850 $val = $this->formatNum( $val ) . ' ' . $this->msg( 'unit-pixel' )->text();
851 break;
852
853 // Do not transform fields with pure text.
854 // For some languages the formatNum()
855 // conversion results to wrong output like
856 // foo,bar@example,com or foo٫bar@example٫com.
857 // Also some 'numeric' things like Scene codes
858 // are included here as we really don't want
859 // commas inserted.
860 case 'ImageDescription':
861 case 'Artist':
862 case 'Copyright':
863 case 'RelatedSoundFile':
864 case 'ImageUniqueID':
865 case 'SpectralSensitivity':
866 case 'GPSSatellites':
867 case 'GPSVersionID':
868 case 'GPSMapDatum':
869 case 'Keywords':
870 case 'WorldRegionDest':
871 case 'CountryDest':
872 case 'CountryCodeDest':
873 case 'ProvinceOrStateDest':
874 case 'CityDest':
875 case 'SublocationDest':
876 case 'WorldRegionCreated':
877 case 'CountryCreated':
878 case 'CountryCodeCreated':
879 case 'ProvinceOrStateCreated':
880 case 'CityCreated':
881 case 'SublocationCreated':
882 case 'ObjectName':
883 case 'SpecialInstructions':
884 case 'Headline':
885 case 'Credit':
886 case 'Source':
887 case 'EditStatus':
888 case 'FixtureIdentifier':
889 case 'LocationDest':
890 case 'LocationDestCode':
891 case 'Writer':
892 case 'JPEGFileComment':
893 case 'iimSupplementalCategory':
894 case 'OriginalTransmissionRef':
895 case 'Identifier':
896 case 'dc-contributor':
897 case 'dc-coverage':
898 case 'dc-publisher':
899 case 'dc-relation':
900 case 'dc-rights':
901 case 'dc-source':
902 case 'dc-type':
903 case 'Lens':
904 case 'SerialNumber':
905 case 'CameraOwnerName':
906 case 'Label':
907 case 'Nickname':
908 case 'RightsCertificate':
909 case 'CopyrightOwner':
910 case 'UsageTerms':
911 case 'WebStatement':
912 case 'OriginalDocumentID':
913 case 'LicenseUrl':
914 case 'MorePermissionsUrl':
915 case 'AttributionUrl':
916 case 'PreferredAttributionName':
917 case 'PNGFileComment':
918 case 'Disclaimer':
919 case 'ContentWarning':
920 case 'GIFFileComment':
921 case 'SceneCode':
922 case 'IntellectualGenre':
923 case 'Event':
924 case 'OrginisationInImage':
925 case 'PersonInImage':
926
927 $val = htmlspecialchars( $val );
928 break;
929
930 case 'ObjectCycle':
931 switch ( $val ) {
932 case 'a':
933 case 'p':
934 case 'b':
935 $val = $this->exifMsg( $tag, $val );
936 break;
937 default:
938 $val = htmlspecialchars( $val );
939 break;
940 }
941 break;
942 case 'Copyrighted':
943 switch ( $val ) {
944 case 'True':
945 case 'False':
946 $val = $this->exifMsg( $tag, $val );
947 break;
948 }
949 break;
950 case 'Rating':
951 if ( $val == '-1' ) {
952 $val = $this->exifMsg( $tag, 'rejected' );
953 } else {
954 $val = $this->formatNum( $val );
955 }
956 break;
957
958 case 'LanguageCode':
959 $lang = Language::fetchLanguageName( strtolower( $val ), $this->getLanguage()->getCode() );
960 if ( $lang ) {
961 $val = htmlspecialchars( $lang );
962 } else {
963 $val = htmlspecialchars( $val );
964 }
965 break;
966
967 default:
968 $val = $this->formatNum( $val );
969 break;
970 }
971 }
972 // End formatting values, start flattening arrays.
973 $vals = $this->flattenArrayReal( $vals, $type );
974 }
975
976 return $tags;
977 }
978
979 /**
980 * Flatten an array, using the content language for any messages.
981 *
982 * @param array $vals Array of values
983 * @param string $type Type of array (either lang, ul, ol).
984 * lang = language assoc array with keys being the lang code
985 * ul = unordered list, ol = ordered list
986 * type can also come from the '_type' member of $vals.
987 * @param bool $noHtml If to avoid returning anything resembling HTML.
988 * (Ugly hack for backwards compatibility with old MediaWiki).
989 * @param bool|IContextSource $context
990 * @return string Single value (in wiki-syntax).
991 * @since 1.23
992 */
993 public static function flattenArrayContentLang( $vals, $type = 'ul',
994 $noHtml = false, $context = false
995 ) {
996 global $wgContLang;
997 $obj = new FormatMetadata;
998 if ( $context ) {
999 $obj->setContext( $context );
1000 }
1001 $context = new DerivativeContext( $obj->getContext() );
1002 $context->setLanguage( $wgContLang );
1003 $obj->setContext( $context );
1004
1005 return $obj->flattenArrayReal( $vals, $type, $noHtml );
1006 }
1007
1008 /**
1009 * A function to collapse multivalued tags into a single value.
1010 * This turns an array of (for example) authors into a bulleted list.
1011 *
1012 * This is public on the basis it might be useful outside of this class.
1013 *
1014 * @param array $vals Array of values
1015 * @param string $type Type of array (either lang, ul, ol).
1016 * lang = language assoc array with keys being the lang code
1017 * ul = unordered list, ol = ordered list
1018 * type can also come from the '_type' member of $vals.
1019 * @param bool $noHtml If to avoid returning anything resembling HTML.
1020 * (Ugly hack for backwards compatibility with old mediawiki).
1021 * @return string Single value (in wiki-syntax).
1022 * @since 1.23
1023 */
1024 public function flattenArrayReal( $vals, $type = 'ul', $noHtml = false ) {
1025 if ( !is_array( $vals ) ) {
1026 return $vals; // do nothing if not an array;
1027 }
1028
1029 if ( isset( $vals['_type'] ) ) {
1030 $type = $vals['_type'];
1031 unset( $vals['_type'] );
1032 }
1033
1034 if ( !is_array( $vals ) ) {
1035 return $vals; // do nothing if not an array;
1036 } elseif ( count( $vals ) === 1 && $type !== 'lang' ) {
1037 return $vals[0];
1038 } elseif ( count( $vals ) === 0 ) {
1039 wfDebug( __METHOD__ . " metadata array with 0 elements!\n" );
1040
1041 return ""; // paranoia. This should never happen
1042 } else {
1043 /* @todo FIXME: This should hide some of the list entries if there are
1044 * say more than four. Especially if a field is translated into 20
1045 * languages, we don't want to show them all by default
1046 */
1047 switch ( $type ) {
1048 case 'lang':
1049 // Display default, followed by ContLang,
1050 // followed by the rest in no particular
1051 // order.
1052
1053 // Todo: hide some items if really long list.
1054
1055 $content = '';
1056
1057 $priorityLanguages = $this->getPriorityLanguages();
1058 $defaultItem = false;
1059 $defaultLang = false;
1060
1061 // If default is set, save it for later,
1062 // as we don't know if it's equal to
1063 // one of the lang codes. (In xmp
1064 // you specify the language for a
1065 // default property by having both
1066 // a default prop, and one in the language
1067 // that are identical)
1068 if ( isset( $vals['x-default'] ) ) {
1069 $defaultItem = $vals['x-default'];
1070 unset( $vals['x-default'] );
1071 }
1072 foreach ( $priorityLanguages as $pLang ) {
1073 if ( isset( $vals[$pLang] ) ) {
1074 $isDefault = false;
1075 if ( $vals[$pLang] === $defaultItem ) {
1076 $defaultItem = false;
1077 $isDefault = true;
1078 }
1079 $content .= $this->langItem(
1080 $vals[$pLang], $pLang,
1081 $isDefault, $noHtml );
1082
1083 unset( $vals[$pLang] );
1084
1085 if ( $this->singleLang ) {
1086 return Html::rawElement( 'span',
1087 array( 'lang' => $pLang ), $vals[$pLang] );
1088 }
1089 }
1090 }
1091
1092 // Now do the rest.
1093 foreach ( $vals as $lang => $item ) {
1094 if ( $item === $defaultItem ) {
1095 $defaultLang = $lang;
1096 continue;
1097 }
1098 $content .= $this->langItem( $item,
1099 $lang, false, $noHtml );
1100 if ( $this->singleLang ) {
1101 return Html::rawElement( 'span',
1102 array( 'lang' => $lang ), $item );
1103 }
1104 }
1105 if ( $defaultItem !== false ) {
1106 $content = $this->langItem( $defaultItem,
1107 $defaultLang, true, $noHtml ) .
1108 $content;
1109 if ( $this->singleLang ) {
1110 return $defaultItem;
1111 }
1112 }
1113 if ( $noHtml ) {
1114 return $content;
1115 }
1116
1117 return '<ul class="metadata-langlist">' .
1118 $content .
1119 '</ul>';
1120 case 'ol':
1121 if ( $noHtml ) {
1122 return "\n#" . implode( "\n#", $vals );
1123 }
1124
1125 return "<ol><li>" . implode( "</li>\n<li>", $vals ) . '</li></ol>';
1126 case 'ul':
1127 default:
1128 if ( $noHtml ) {
1129 return "\n*" . implode( "\n*", $vals );
1130 }
1131
1132 return "<ul><li>" . implode( "</li>\n<li>", $vals ) . '</li></ul>';
1133 }
1134 }
1135 }
1136
1137 /** Helper function for creating lists of translations.
1138 *
1139 * @param string $value Value (this is not escaped)
1140 * @param string $lang Lang code of item or false
1141 * @param bool $default If it is default value.
1142 * @param bool $noHtml If to avoid html (for back-compat)
1143 * @throws MWException
1144 * @return string Language item (Note: despite how this looks, this is
1145 * treated as wikitext, not as HTML).
1146 */
1147 private function langItem( $value, $lang, $default = false, $noHtml = false ) {
1148 if ( $lang === false && $default === false ) {
1149 throw new MWException( '$lang and $default cannot both '
1150 . 'be false.' );
1151 }
1152
1153 if ( $noHtml ) {
1154 $wrappedValue = $value;
1155 } else {
1156 $wrappedValue = '<span class="mw-metadata-lang-value">'
1157 . $value . '</span>';
1158 }
1159
1160 if ( $lang === false ) {
1161 $msg = $this->msg( 'metadata-langitem-default', $wrappedValue );
1162 if ( $noHtml ) {
1163 return $msg->text() . "\n\n";
1164 } /* else */
1165
1166 return '<li class="mw-metadata-lang-default">'
1167 . $msg->text()
1168 . "</li>\n";
1169 }
1170
1171 $lowLang = strtolower( $lang );
1172 $langName = Language::fetchLanguageName( $lowLang );
1173 if ( $langName === '' ) {
1174 // try just the base language name. (aka en-US -> en ).
1175 list( $langPrefix ) = explode( '-', $lowLang, 2 );
1176 $langName = Language::fetchLanguageName( $langPrefix );
1177 if ( $langName === '' ) {
1178 // give up.
1179 $langName = $lang;
1180 }
1181 }
1182 // else we have a language specified
1183
1184 $msg = $this->msg( 'metadata-langitem', $wrappedValue, $langName, $lang );
1185 if ( $noHtml ) {
1186 return '*' . $msg->text();
1187 } /* else: */
1188
1189 $item = '<li class="mw-metadata-lang-code-'
1190 . $lang;
1191 if ( $default ) {
1192 $item .= ' mw-metadata-lang-default';
1193 }
1194 $item .= '" lang="' . $lang . '">';
1195 $item .= $msg->text();
1196 $item .= "</li>\n";
1197
1198 return $item;
1199 }
1200
1201 /**
1202 * Convenience function for getFormattedData()
1203 *
1204 * @param string $tag The tag name to pass on
1205 * @param string $val The value of the tag
1206 * @param string $arg An argument to pass ($1)
1207 * @param string $arg2 A 2nd argument to pass ($2)
1208 * @return string The text content of "exif-$tag-$val" message in lower case
1209 */
1210 private function exifMsg( $tag, $val, $arg = null, $arg2 = null ) {
1211 global $wgContLang;
1212
1213 if ( $val === '' ) {
1214 $val = 'value';
1215 }
1216
1217 return $this->msg( $wgContLang->lc( "exif-$tag-$val" ), $arg, $arg2 )->text();
1218 }
1219
1220 /**
1221 * Format a number, convert numbers from fractions into floating point
1222 * numbers, joins arrays of numbers with commas.
1223 *
1224 * @param mixed $num The value to format
1225 * @param float|int|bool $round Digits to round to or false.
1226 * @return mixed A floating point number or whatever we were fed
1227 */
1228 private function formatNum( $num, $round = false ) {
1229 $m = array();
1230 if ( is_array( $num ) ) {
1231 $out = array();
1232 foreach ( $num as $number ) {
1233 $out[] = $this->formatNum( $number );
1234 }
1235
1236 return $this->getLanguage()->commaList( $out );
1237 }
1238 if ( preg_match( '/^(-?\d+)\/(\d+)$/', $num, $m ) ) {
1239 if ( $m[2] != 0 ) {
1240 $newNum = $m[1] / $m[2];
1241 if ( $round !== false ) {
1242 $newNum = round( $newNum, $round );
1243 }
1244 } else {
1245 $newNum = $num;
1246 }
1247
1248 return $this->getLanguage()->formatNum( $newNum );
1249 } else {
1250 if ( is_numeric( $num ) && $round !== false ) {
1251 $num = round( $num, $round );
1252 }
1253
1254 return $this->getLanguage()->formatNum( $num );
1255 }
1256 }
1257
1258 /**
1259 * Format a rational number, reducing fractions
1260 *
1261 * @param mixed $num The value to format
1262 * @return mixed A floating point number or whatever we were fed
1263 */
1264 private function formatFraction( $num ) {
1265 $m = array();
1266 if ( preg_match( '/^(-?\d+)\/(\d+)$/', $num, $m ) ) {
1267 $numerator = intval( $m[1] );
1268 $denominator = intval( $m[2] );
1269 $gcd = $this->gcd( abs( $numerator ), $denominator );
1270 if ( $gcd != 0 ) {
1271 // 0 shouldn't happen! ;)
1272 return $this->formatNum( $numerator / $gcd ) . '/' . $this->formatNum( $denominator / $gcd );
1273 }
1274 }
1275
1276 return $this->formatNum( $num );
1277 }
1278
1279 /**
1280 * Calculate the greatest common divisor of two integers.
1281 *
1282 * @param int $a Numerator
1283 * @param int $b Denominator
1284 * @return int
1285 */
1286 private function gcd( $a, $b ) {
1287 /*
1288 // https://en.wikipedia.org/wiki/Euclidean_algorithm
1289 // Recursive form would be:
1290 if( $b == 0 )
1291 return $a;
1292 else
1293 return gcd( $b, $a % $b );
1294 */
1295 while ( $b != 0 ) {
1296 $remainder = $a % $b;
1297
1298 // tail recursion...
1299 $a = $b;
1300 $b = $remainder;
1301 }
1302
1303 return $a;
1304 }
1305
1306 /**
1307 * Fetch the human readable version of a news code.
1308 * A news code is an 8 digit code. The first two
1309 * digits are a general classification, so we just
1310 * translate that.
1311 *
1312 * Note, leading 0's are significant, so this is
1313 * a string, not an int.
1314 *
1315 * @param string $val The 8 digit news code.
1316 * @return string The human readable form
1317 */
1318 private function convertNewsCode( $val ) {
1319 if ( !preg_match( '/^\d{8}$/D', $val ) ) {
1320 // Not a valid news code.
1321 return $val;
1322 }
1323 $cat = '';
1324 switch ( substr( $val, 0, 2 ) ) {
1325 case '01':
1326 $cat = 'ace';
1327 break;
1328 case '02':
1329 $cat = 'clj';
1330 break;
1331 case '03':
1332 $cat = 'dis';
1333 break;
1334 case '04':
1335 $cat = 'fin';
1336 break;
1337 case '05':
1338 $cat = 'edu';
1339 break;
1340 case '06':
1341 $cat = 'evn';
1342 break;
1343 case '07':
1344 $cat = 'hth';
1345 break;
1346 case '08':
1347 $cat = 'hum';
1348 break;
1349 case '09':
1350 $cat = 'lab';
1351 break;
1352 case '10':
1353 $cat = 'lif';
1354 break;
1355 case '11':
1356 $cat = 'pol';
1357 break;
1358 case '12':
1359 $cat = 'rel';
1360 break;
1361 case '13':
1362 $cat = 'sci';
1363 break;
1364 case '14':
1365 $cat = 'soi';
1366 break;
1367 case '15':
1368 $cat = 'spo';
1369 break;
1370 case '16':
1371 $cat = 'war';
1372 break;
1373 case '17':
1374 $cat = 'wea';
1375 break;
1376 }
1377 if ( $cat !== '' ) {
1378 $catMsg = $this->exifMsg( 'iimcategory', $cat );
1379 $val = $this->exifMsg( 'subjectnewscode', '', $val, $catMsg );
1380 }
1381
1382 return $val;
1383 }
1384
1385 /**
1386 * Format a coordinate value, convert numbers from floating point
1387 * into degree minute second representation.
1388 *
1389 * @param int $coord Degrees, minutes and seconds
1390 * @param string $type Latitude or longitude (for if its a NWS or E)
1391 * @return mixed A floating point number or whatever we were fed
1392 */
1393 private function formatCoords( $coord, $type ) {
1394 $ref = '';
1395 if ( $coord < 0 ) {
1396 $nCoord = -$coord;
1397 if ( $type === 'latitude' ) {
1398 $ref = 'S';
1399 } elseif ( $type === 'longitude' ) {
1400 $ref = 'W';
1401 }
1402 } else {
1403 $nCoord = $coord;
1404 if ( $type === 'latitude' ) {
1405 $ref = 'N';
1406 } elseif ( $type === 'longitude' ) {
1407 $ref = 'E';
1408 }
1409 }
1410
1411 $deg = floor( $nCoord );
1412 $min = floor( ( $nCoord - $deg ) * 60.0 );
1413 $sec = round( ( ( $nCoord - $deg ) - $min / 60 ) * 3600, 2 );
1414
1415 $deg = $this->formatNum( $deg );
1416 $min = $this->formatNum( $min );
1417 $sec = $this->formatNum( $sec );
1418
1419 return $this->msg( 'exif-coordinate-format', $deg, $min, $sec, $ref, $coord )->text();
1420 }
1421
1422 /**
1423 * Format the contact info field into a single value.
1424 *
1425 * This function might be called from
1426 * JpegHandler::convertMetadataVersion which is why it is
1427 * public.
1428 *
1429 * @param array $vals Array with fields of the ContactInfo
1430 * struct defined in the IPTC4XMP spec. Or potentially
1431 * an array with one element that is a free form text
1432 * value from the older iptc iim 1:118 prop.
1433 * @return string HTML-ish looking wikitext
1434 * @since 1.23 no longer static
1435 */
1436 public function collapseContactInfo( $vals ) {
1437 if ( !( isset( $vals['CiAdrExtadr'] )
1438 || isset( $vals['CiAdrCity'] )
1439 || isset( $vals['CiAdrCtry'] )
1440 || isset( $vals['CiEmailWork'] )
1441 || isset( $vals['CiTelWork'] )
1442 || isset( $vals['CiAdrPcode'] )
1443 || isset( $vals['CiAdrRegion'] )
1444 || isset( $vals['CiUrlWork'] )
1445 ) ) {
1446 // We don't have any sub-properties
1447 // This could happen if its using old
1448 // iptc that just had this as a free-form
1449 // text value.
1450 // Note: We run this through htmlspecialchars
1451 // partially to be consistent, and partially
1452 // because people often insert >, etc into
1453 // the metadata which should not be interpreted
1454 // but we still want to auto-link urls.
1455 foreach ( $vals as &$val ) {
1456 $val = htmlspecialchars( $val );
1457 }
1458
1459 return $this->flattenArrayReal( $vals );
1460 } else {
1461 // We have a real ContactInfo field.
1462 // Its unclear if all these fields have to be
1463 // set, so assume they do not.
1464 $url = $tel = $street = $city = $country = '';
1465 $email = $postal = $region = '';
1466
1467 // Also note, some of the class names this uses
1468 // are similar to those used by hCard. This is
1469 // mostly because they're sensible names. This
1470 // does not (and does not attempt to) output
1471 // stuff in the hCard microformat. However it
1472 // might output in the adr microformat.
1473
1474 if ( isset( $vals['CiAdrExtadr'] ) ) {
1475 // Todo: This can potentially be multi-line.
1476 // Need to check how that works in XMP.
1477 $street = '<span class="extended-address">'
1478 . htmlspecialchars(
1479 $vals['CiAdrExtadr'] )
1480 . '</span>';
1481 }
1482 if ( isset( $vals['CiAdrCity'] ) ) {
1483 $city = '<span class="locality">'
1484 . htmlspecialchars( $vals['CiAdrCity'] )
1485 . '</span>';
1486 }
1487 if ( isset( $vals['CiAdrCtry'] ) ) {
1488 $country = '<span class="country-name">'
1489 . htmlspecialchars( $vals['CiAdrCtry'] )
1490 . '</span>';
1491 }
1492 if ( isset( $vals['CiEmailWork'] ) ) {
1493 $emails = array();
1494 // Have to split multiple emails at commas/new lines.
1495 $splitEmails = explode( "\n", $vals['CiEmailWork'] );
1496 foreach ( $splitEmails as $e1 ) {
1497 // Also split on comma
1498 foreach ( explode( ',', $e1 ) as $e2 ) {
1499 $finalEmail = trim( $e2 );
1500 if ( $finalEmail == ',' || $finalEmail == '' ) {
1501 continue;
1502 }
1503 if ( strpos( $finalEmail, '<' ) !== false ) {
1504 // Don't do fancy formatting to
1505 // "My name" <foo@bar.com> style stuff
1506 $emails[] = $finalEmail;
1507 } else {
1508 $emails[] = '[mailto:'
1509 . $finalEmail
1510 . ' <span class="email">'
1511 . $finalEmail
1512 . '</span>]';
1513 }
1514 }
1515 }
1516 $email = implode( ', ', $emails );
1517 }
1518 if ( isset( $vals['CiTelWork'] ) ) {
1519 $tel = '<span class="tel">'
1520 . htmlspecialchars( $vals['CiTelWork'] )
1521 . '</span>';
1522 }
1523 if ( isset( $vals['CiAdrPcode'] ) ) {
1524 $postal = '<span class="postal-code">'
1525 . htmlspecialchars(
1526 $vals['CiAdrPcode'] )
1527 . '</span>';
1528 }
1529 if ( isset( $vals['CiAdrRegion'] ) ) {
1530 // Note this is province/state.
1531 $region = '<span class="region">'
1532 . htmlspecialchars(
1533 $vals['CiAdrRegion'] )
1534 . '</span>';
1535 }
1536 if ( isset( $vals['CiUrlWork'] ) ) {
1537 $url = '<span class="url">'
1538 . htmlspecialchars( $vals['CiUrlWork'] )
1539 . '</span>';
1540 }
1541
1542 return $this->msg( 'exif-contact-value', $email, $url,
1543 $street, $city, $region, $postal, $country,
1544 $tel )->text();
1545 }
1546 }
1547
1548 /**
1549 * Get a list of fields that are visible by default.
1550 *
1551 * @return array
1552 * @since 1.23
1553 */
1554 public static function getVisibleFields() {
1555 $fields = array();
1556 $lines = explode( "\n", wfMessage( 'metadata-fields' )->inContentLanguage()->text() );
1557 foreach ( $lines as $line ) {
1558 $matches = array();
1559 if ( preg_match( '/^\\*\s*(.*?)\s*$/', $line, $matches ) ) {
1560 $fields[] = $matches[1];
1561 }
1562 }
1563 $fields = array_map( 'strtolower', $fields );
1564
1565 return $fields;
1566 }
1567
1568 /**
1569 * Get an array of extended metadata. (See the imageinfo API for format.)
1570 *
1571 * @param File $file File to use
1572 * @return array [<property name> => ['value' => <value>]], or [] on error
1573 * @since 1.23
1574 */
1575 public function fetchExtendedMetadata( File $file ) {
1576 global $wgMemc;
1577
1578 // If revision deleted, exit immediately
1579 if ( $file->isDeleted( File::DELETED_FILE ) ) {
1580 return array();
1581 }
1582
1583 $cacheKey = wfMemcKey(
1584 'getExtendedMetadata',
1585 $this->getLanguage()->getCode(),
1586 (int)$this->singleLang,
1587 $file->getSha1()
1588 );
1589
1590 $cachedValue = $wgMemc->get( $cacheKey );
1591 if (
1592 $cachedValue
1593 && Hooks::run( 'ValidateExtendedMetadataCache', array( $cachedValue['timestamp'], $file ) )
1594 ) {
1595 $extendedMetadata = $cachedValue['data'];
1596 } else {
1597 $maxCacheTime = ( $file instanceof ForeignAPIFile ) ? 60 * 60 * 12 : 60 * 60 * 24 * 30;
1598 $fileMetadata = $this->getExtendedMetadataFromFile( $file );
1599 $extendedMetadata = $this->getExtendedMetadataFromHook( $file, $fileMetadata, $maxCacheTime );
1600 if ( $this->singleLang ) {
1601 $this->resolveMultilangMetadata( $extendedMetadata );
1602 }
1603 $this->discardMultipleValues( $extendedMetadata );
1604 // Make sure the metadata won't break the API when an XML format is used.
1605 // This is an API-specific function so it would be cleaner to call it from
1606 // outside fetchExtendedMetadata, but this way we don't need to redo the
1607 // computation on a cache hit.
1608 $this->sanitizeArrayForAPI( $extendedMetadata );
1609 $valueToCache = array( 'data' => $extendedMetadata, 'timestamp' => wfTimestampNow() );
1610 $wgMemc->set( $cacheKey, $valueToCache, $maxCacheTime );
1611 }
1612
1613 return $extendedMetadata;
1614 }
1615
1616 /**
1617 * Get file-based metadata in standardized format.
1618 *
1619 * Note that for a remote file, this might return metadata supplied by extensions.
1620 *
1621 * @param File $file File to use
1622 * @return array [<property name> => ['value' => <value>]], or [] on error
1623 * @since 1.23
1624 */
1625 protected function getExtendedMetadataFromFile( File $file ) {
1626 // If this is a remote file accessed via an API request, we already
1627 // have remote metadata so we just ignore any local one
1628 if ( $file instanceof ForeignAPIFile ) {
1629 // In case of error we pretend no metadata - this will get cached.
1630 // Might or might not be a good idea.
1631 return $file->getExtendedMetadata() ?: array();
1632 }
1633
1634 $uploadDate = wfTimestamp( TS_ISO_8601, $file->getTimestamp() );
1635
1636 $fileMetadata = array(
1637 // This is modification time, which is close to "upload" time.
1638 'DateTime' => array(
1639 'value' => $uploadDate,
1640 'source' => 'mediawiki-metadata',
1641 ),
1642 );
1643
1644 $title = $file->getTitle();
1645 if ( $title ) {
1646 $text = $title->getText();
1647 $pos = strrpos( $text, '.' );
1648
1649 if ( $pos ) {
1650 $name = substr( $text, 0, $pos );
1651 } else {
1652 $name = $text;
1653 }
1654
1655 $fileMetadata['ObjectName'] = array(
1656 'value' => $name,
1657 'source' => 'mediawiki-metadata',
1658 );
1659 }
1660
1661 return $fileMetadata;
1662 }
1663
1664 /**
1665 * Get additional metadata from hooks in standardized format.
1666 *
1667 * @param File $file File to use
1668 * @param array $extendedMetadata
1669 * @param int $maxCacheTime Hook handlers might use this parameter to override cache time
1670 *
1671 * @return array [<property name> => ['value' => <value>]], or [] on error
1672 * @since 1.23
1673 */
1674 protected function getExtendedMetadataFromHook( File $file, array $extendedMetadata,
1675 &$maxCacheTime
1676 ) {
1677
1678 Hooks::run( 'GetExtendedMetadata', array(
1679 &$extendedMetadata,
1680 $file,
1681 $this->getContext(),
1682 $this->singleLang,
1683 &$maxCacheTime
1684 ) );
1685
1686 $visible = array_flip( self::getVisibleFields() );
1687 foreach ( $extendedMetadata as $key => $value ) {
1688 if ( !isset( $visible[strtolower( $key )] ) ) {
1689 $extendedMetadata[$key]['hidden'] = '';
1690 }
1691 }
1692
1693 return $extendedMetadata;
1694 }
1695
1696 /**
1697 * Turns an XMP-style multilang array into a single value.
1698 * If the value is not a multilang array, it is returned unchanged.
1699 * See mediawiki.org/wiki/Manual:File_metadata_handling#Multi-language_array_format
1700 * @param mixed $value
1701 * @return mixed Value in best language, null if there were no languages at all
1702 * @since 1.23
1703 */
1704 protected function resolveMultilangValue( $value ) {
1705 if (
1706 !is_array( $value )
1707 || !isset( $value['_type'] )
1708 || $value['_type'] != 'lang'
1709 ) {
1710 return $value; // do nothing if not a multilang array
1711 }
1712
1713 // choose the language best matching user or site settings
1714 $priorityLanguages = $this->getPriorityLanguages();
1715 foreach ( $priorityLanguages as $lang ) {
1716 if ( isset( $value[$lang] ) ) {
1717 return $value[$lang];
1718 }
1719 }
1720
1721 // otherwise go with the default language, if set
1722 if ( isset( $value['x-default'] ) ) {
1723 return $value['x-default'];
1724 }
1725
1726 // otherwise just return any one language
1727 unset( $value['_type'] );
1728 if ( !empty( $value ) ) {
1729 return reset( $value );
1730 }
1731
1732 // this should not happen; signal error
1733 return null;
1734 }
1735
1736 /**
1737 * Turns an XMP-style multivalue array into a single value by dropping all but the first
1738 * value. If the value is not a multivalue array (or a multivalue array inside a multilang
1739 * array), it is returned unchanged.
1740 * See mediawiki.org/wiki/Manual:File_metadata_handling#Multi-language_array_format
1741 * @param mixed $value
1742 * @return mixed The value, or the first value if there were multiple ones
1743 * @since 1.25
1744 */
1745 protected function resolveMultivalueValue( $value ) {
1746 if ( !is_array( $value ) ) {
1747 return $value;
1748 } elseif ( isset( $value['_type'] ) && $value['_type'] === 'lang' ) {
1749 // if this is a multilang array, process fields separately
1750 $newValue = array();
1751 foreach ( $value as $k => $v ) {
1752 $newValue[$k] = $this->resolveMultivalueValue( $v );
1753 }
1754 return $newValue;
1755 } else { // _type is 'ul' or 'ol' or missing in which case it defaults to 'ul'
1756 list( $k, $v ) = each( $value );
1757 if ( $k === '_type' ) {
1758 $v = current( $value );
1759 }
1760 return $v;
1761 }
1762 }
1763
1764 /**
1765 * Takes an array returned by the getExtendedMetadata* functions,
1766 * and resolves multi-language values in it.
1767 * @param array $metadata
1768 * @since 1.23
1769 */
1770 protected function resolveMultilangMetadata( &$metadata ) {
1771 if ( !is_array( $metadata ) ) {
1772 return;
1773 }
1774 foreach ( $metadata as &$field ) {
1775 if ( isset( $field['value'] ) ) {
1776 $field['value'] = $this->resolveMultilangValue( $field['value'] );
1777 }
1778 }
1779 }
1780
1781 /**
1782 * Takes an array returned by the getExtendedMetadata* functions,
1783 * and turns all fields into single-valued ones by dropping extra values.
1784 * @param array $metadata
1785 * @since 1.25
1786 */
1787 protected function discardMultipleValues( &$metadata ) {
1788 if ( !is_array( $metadata ) ) {
1789 return;
1790 }
1791 foreach ( $metadata as $key => &$field ) {
1792 if ( $key === 'Software' || $key === 'Contact' ) {
1793 // we skip some fields which have composite values. They are not particularly interesting
1794 // and you can get them via the metadata / commonmetadata APIs anyway.
1795 continue;
1796 }
1797 if ( isset( $field['value'] ) ) {
1798 $field['value'] = $this->resolveMultivalueValue( $field['value'] );
1799 }
1800 }
1801
1802 }
1803
1804 /**
1805 * Makes sure the given array is a valid API response fragment
1806 * @param array $arr
1807 */
1808 protected function sanitizeArrayForAPI( &$arr ) {
1809 if ( !is_array( $arr ) ) {
1810 return;
1811 }
1812
1813 $counter = 1;
1814 foreach ( $arr as $key => &$value ) {
1815 $sanitizedKey = $this->sanitizeKeyForAPI( $key );
1816 if ( $sanitizedKey !== $key ) {
1817 if ( isset( $arr[$sanitizedKey] ) ) {
1818 // Make the sanitized keys hopefully unique.
1819 // To make it definitely unique would be too much effort, given that
1820 // sanitizing is only needed for misformatted metadata anyway, but
1821 // this at least covers the case when $arr is numeric.
1822 $sanitizedKey .= $counter;
1823 ++$counter;
1824 }
1825 $arr[$sanitizedKey] = $arr[$key];
1826 unset( $arr[$key] );
1827 }
1828 if ( is_array( $value ) ) {
1829 $this->sanitizeArrayForAPI( $value );
1830 }
1831 }
1832
1833 // Handle API metadata keys (particularly "_type")
1834 $keys = array_filter( array_keys( $arr ), 'ApiResult::isMetadataKey' );
1835 if ( $keys ) {
1836 ApiResult::setPreserveKeysList( $arr, $keys );
1837 }
1838 }
1839
1840 /**
1841 * Turns a string into a valid API identifier.
1842 * @param string $key
1843 * @return string
1844 * @since 1.23
1845 */
1846 protected function sanitizeKeyForAPI( $key ) {
1847 // drop all characters which are not valid in an XML tag name
1848 // a bunch of non-ASCII letters would be valid but probably won't
1849 // be used so we take the easy way
1850 $key = preg_replace( '/[^a-zA-z0-9_:.-]/', '', $key );
1851 // drop characters which are invalid at the first position
1852 $key = preg_replace( '/^[\d-.]+/', '', $key );
1853
1854 if ( $key == '' ) {
1855 $key = '_';
1856 }
1857
1858 // special case for an internal keyword
1859 if ( $key == '_element' ) {
1860 $key = 'element';
1861 }
1862
1863 return $key;
1864 }
1865
1866 /**
1867 * Returns a list of languages (first is best) to use when formatting multilang fields,
1868 * based on user and site preferences.
1869 * @return array
1870 * @since 1.23
1871 */
1872 protected function getPriorityLanguages() {
1873 $priorityLanguages =
1874 Language::getFallbacksIncludingSiteLanguage( $this->getLanguage()->getCode() );
1875 $priorityLanguages = array_merge(
1876 (array)$this->getLanguage()->getCode(),
1877 $priorityLanguages[0],
1878 $priorityLanguages[1]
1879 );
1880
1881 return $priorityLanguages;
1882 }
1883 }