*/
/**
-* Class for reading xmp data containing properties relevant to
-* images, and spitting out an array that FormatExif accepts.
-*
-* Note, this is not meant to recognize every possible thing you can
-* encode in XMP. It should recognize all the properties we want.
-* For example it doesn't have support for structures with multiple
-* nesting levels, as none of the properties we're supporting use that
-* feature. If it comes across properties it doesn't recognize, it should
-* ignore them.
-*
-* The public methods one would call in this class are
-* - parse( $content )
-* Reads in xmp content.
-* Can potentially be called multiple times with partial data each time.
-* - parseExtended( $content )
-* Reads XMPExtended blocks (jpeg files only).
-* - getResults
-* Outputs a results array.
-*
-* Note XMP kind of looks like rdf. They are not the same thing - XMP is
-* encoded as a specific subset of rdf. This class can read XMP. It cannot
-* read rdf.
-*
-*/
+ * Class for reading xmp data containing properties relevant to
+ * images, and spitting out an array that FormatExif accepts.
+ *
+ * Note, this is not meant to recognize every possible thing you can
+ * encode in XMP. It should recognize all the properties we want.
+ * For example it doesn't have support for structures with multiple
+ * nesting levels, as none of the properties we're supporting use that
+ * feature. If it comes across properties it doesn't recognize, it should
+ * ignore them.
+ *
+ * The public methods one would call in this class are
+ * - parse( $content )
+ * Reads in xmp content.
+ * Can potentially be called multiple times with partial data each time.
+ * - parseExtended( $content )
+ * Reads XMPExtended blocks (jpeg files only).
+ * - getResults
+ * Outputs a results array.
+ *
+ * Note XMP kind of looks like rdf. They are not the same thing - XMP is
+ * encoded as a specific subset of rdf. This class can read XMP. It cannot
+ * read rdf.
+ *
+ */
class XMPReader {
private $curItem = array(); // array to hold the current element (and previous element, and so on)
protected $items;
/**
- * These are various mode constants.
- * they are used to figure out what to do
- * with an element when its encountered.
- *
- * For example, MODE_IGNORE is used when processing
- * a property we're not interested in. So if a new
- * element pops up when we're in that mode, we ignore it.
- */
+ * These are various mode constants.
+ * they are used to figure out what to do
+ * with an element when its encountered.
+ *
+ * For example, MODE_IGNORE is used when processing
+ * a property we're not interested in. So if a new
+ * element pops up when we're in that mode, we ignore it.
+ */
const MODE_INITIAL = 0;
- const MODE_IGNORE = 1;
- const MODE_LI = 2;
+ const MODE_IGNORE = 1;
+ const MODE_LI = 2;
const MODE_LI_LANG = 3;
- const MODE_QDESC = 4;
+ const MODE_QDESC = 4;
// The following MODE constants are also used in the
// $items array to denote what type of property the item is.
- const MODE_SIMPLE = 10;
- const MODE_STRUCT = 11; // structure (associative array)
- const MODE_SEQ = 12; // ordered list
- const MODE_BAG = 13; // unordered list
- const MODE_LANG = 14;
- const MODE_ALT = 15; // non-language alt. Currently not implemented, and not needed atm.
+ const MODE_SIMPLE = 10;
+ const MODE_STRUCT = 11; // structure (associative array)
+ const MODE_SEQ = 12; // ordered list
+ const MODE_BAG = 13; // unordered list
+ const MODE_LANG = 14;
+ const MODE_ALT = 15; // non-language alt. Currently not implemented, and not needed atm.
const MODE_BAGSTRUCT = 16; // A BAG of Structs.
const NS_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
const NS_XML = 'http://www.w3.org/XML/1998/namespace';
-
/**
- * Constructor.
- *
- * Primary job is to initialize the XMLParser
- */
+ * Constructor.
+ *
+ * Primary job is to initialize the XMLParser
+ */
function __construct() {
if ( !function_exists( 'xml_parser_create_ns' ) ) {
}
/**
- * Main use is if a single item has multiple xmp documents describing it.
- * For example in jpeg's with extendedXMP
- */
+ * Main use is if a single item has multiple xmp documents describing it.
+ * For example in jpeg's with extendedXMP
+ */
private function resetXMLParser() {
if ( $this->xmlParser ) {
}
/** Destroy the xml parser
- *
- * Not sure if this is actually needed.
- */
+ *
+ * Not sure if this is actually needed.
+ */
function __destruct() {
// not sure if this is needed.
xml_parser_free( $this->xmlParser );
}
/** Get the result array. Do some post-processing before returning
- * the array, and transform any metadata that is special-cased.
- *
- * @return Array array of results as an array of arrays suitable for
- * FormatMetadata::getFormattedData().
- */
+ * the array, and transform any metadata that is special-cased.
+ *
+ * @return Array array of results as an array of arrays suitable for
+ * FormatMetadata::getFormattedData().
+ */
public function getResults() {
// xmp-special is for metadata that affects how stuff
// is extracted. For example xmpNote:HasExtendedXMP.
}
}
-
// We don't want to return the special values, since they're
// special and not info to be stored about the file.
unset( $data['xmp-special'] );
* Also catches any errors during processing, writes them to
* debug log, blanks result array and returns false.
*
- * @param $content String: XMP data
+ * @param string $content XMP data
* @param $allOfIt Boolean: If this is all the data (true) or if its split up (false). Default true
* @param $reset Boolean: does xml parser need to be reset. Default false
* @throws MWException
default:
//this should be impossible to get to
throw new MWException( "Invalid BOM" );
- break;
-
}
-
} else {
// standard specifically says, if no bom assume utf-8
$this->charset = 'UTF-8';
*
* @todo In serious need of testing
* @see http://www.adobe.ge/devnet/xmp/pdfs/XMPSpecificationPart3.pdf XMP spec part 3 page 20
- * @param String $content XMPExtended block minus the namespace signature
+ * @param string $content XMPExtended block minus the namespace signature
* @return Boolean If it succeeded.
*/
public function parseExtended( $content ) {
return false;
}
-
// we're not very robust here. we should accept it in the wrong order. To quote
// the xmp standard:
// "A JPEG writer should write the ExtendedXMP marker segments in order, immediately following the
}
/**
- * Character data handler
- * Called whenever character data is found in the xmp document.
- *
- * does nothing if we're in MODE_IGNORE or if the data is whitespace
- * throws an error if we're not in MODE_SIMPLE (as we're not allowed to have character
- * data in the other modes).
- *
- * As an example, this happens when we encounter XMP like:
- * <exif:DigitalZoomRatio>0/10</exif:DigitalZoomRatio>
- * and are processing the 0/10 bit.
- *
- * @param $parser XMLParser reference to the xml parser
- * @param $data String Character data
- * @throws MWException on invalid data
- */
+ * Character data handler
+ * Called whenever character data is found in the xmp document.
+ *
+ * does nothing if we're in MODE_IGNORE or if the data is whitespace
+ * throws an error if we're not in MODE_SIMPLE (as we're not allowed to have character
+ * data in the other modes).
+ *
+ * As an example, this happens when we encounter XMP like:
+ * <exif:DigitalZoomRatio>0/10</exif:DigitalZoomRatio>
+ * and are processing the 0/10 bit.
+ *
+ * @param $parser XMLParser reference to the xml parser
+ * @param string $data Character data
+ * @throws MWException on invalid data
+ */
function char( $parser, $data ) {
$data = trim( $data );
}
/** When we hit a closing element in MODE_IGNORE
- * Check to see if this is the element we started to ignore,
- * in which case we get out of MODE_IGNORE
- *
- * @param $elm String Namespace of element followed by a space and then tag name of element.
- */
- private function endElementModeIgnore ( $elm ) {
-
+ * Check to see if this is the element we started to ignore,
+ * in which case we get out of MODE_IGNORE
+ *
+ * @param string $elm Namespace of element followed by a space and then tag name of element.
+ */
+ private function endElementModeIgnore( $elm ) {
if ( $this->curItem[0] === $elm ) {
array_shift( $this->curItem );
array_shift( $this->mode );
}
- return;
-
}
/**
- * Hit a closing element when in MODE_SIMPLE.
- * This generally means that we finished processing a
- * property value, and now have to save the result to the
- * results array
- *
- * For example, when processing:
- * <exif:DigitalZoomRatio>0/10</exif:DigitalZoomRatio>
- * this deals with when we hit </exif:DigitalZoomRatio>.
- *
- * Or it could be if we hit the end element of a property
- * of a compound data structure (like a member of an array).
- *
- * @param $elm String namespace, space, and tag name.
- */
- private function endElementModeSimple ( $elm ) {
+ * Hit a closing element when in MODE_SIMPLE.
+ * This generally means that we finished processing a
+ * property value, and now have to save the result to the
+ * results array
+ *
+ * For example, when processing:
+ * <exif:DigitalZoomRatio>0/10</exif:DigitalZoomRatio>
+ * this deals with when we hit </exif:DigitalZoomRatio>.
+ *
+ * Or it could be if we hit the end element of a property
+ * of a compound data structure (like a member of an array).
+ *
+ * @param string $elm namespace, space, and tag name.
+ */
+ private function endElementModeSimple( $elm ) {
if ( $this->charContent !== false ) {
if ( $this->processingArray ) {
// if we're processing an array, use the original element
*
* This method is called when we hit the "</exif:ISOSpeedRatings>" tag.
*
- * @param $elm String namespace . space . tag name.
+ * @param string $elm namespace . space . tag name.
* @throws MWException
*/
private function endElementNested( $elm ) {
* (For comparison, we call endElementModeSimple when we
* hit the "</rdf:li>")
*
- * @param $elm String namespace . ' ' . element name
+ * @param string $elm namespace . ' ' . element name
* @throws MWException
*/
private function endElementModeLi( $elm ) {
}
/**
- * End element while in MODE_QDESC
- * mostly when ending an element when we have a simple value
- * that has qualifiers.
- *
- * Qualifiers aren't all that common, and we don't do anything
- * with them.
- *
- * @param $elm String namespace and element
- */
+ * End element while in MODE_QDESC
+ * mostly when ending an element when we have a simple value
+ * that has qualifiers.
+ *
+ * Qualifiers aren't all that common, and we don't do anything
+ * with them.
+ *
+ * @param string $elm namespace and element
+ */
private function endElementModeQDesc( $elm ) {
if ( $elm === self::NS_RDF . ' value' ) {
array_shift( $this->mode );
array_shift( $this->curItem );
}
-
-
}
/**
* xmp and have no meaning.
*
* @param $parser XMLParser
- * @param $elm String namespace . ' ' . element name
+ * @param string $elm namespace . ' ' . element name
* @throws MWException
*/
function endElement( $parser, $elm ) {
}
/**
- * Hit an opening element while in MODE_IGNORE
- *
- * XMP is extensible, so ignore any tag we don't understand.
- *
- * Mostly ignores, unless we encounter the element that we are ignoring.
- * in which case we add it to the item stack, so we can ignore things
- * that are nested, correctly.
- *
- * @param $elm String namespace . ' ' . tag name
- */
+ * Hit an opening element while in MODE_IGNORE
+ *
+ * XMP is extensible, so ignore any tag we don't understand.
+ *
+ * Mostly ignores, unless we encounter the element that we are ignoring.
+ * in which case we add it to the item stack, so we can ignore things
+ * that are nested, correctly.
+ *
+ * @param string $elm namespace . ' ' . tag name
+ */
private function startElementModeIgnore( $elm ) {
if ( $elm === $this->curItem[0] ) {
array_unshift( $this->curItem, $elm );
}
/**
- * Start element in MODE_BAG (unordered array)
- * this should always be <rdf:Bag>
- *
- * @param $elm String namespace . ' ' . tag
- * @throws MWException if we have an element that's not <rdf:Bag>
- */
+ * Start element in MODE_BAG (unordered array)
+ * this should always be <rdf:Bag>
+ *
+ * @param string $elm namespace . ' ' . tag
+ * @throws MWException if we have an element that's not <rdf:Bag>
+ */
private function startElementModeBag( $elm ) {
if ( $elm === self::NS_RDF . ' Bag' ) {
array_unshift( $this->mode, self::MODE_LI );
}
/**
- * Start element in MODE_SEQ (ordered array)
- * this should always be <rdf:Seq>
- *
- * @param $elm String namespace . ' ' . tag
- * @throws MWException if we have an element that's not <rdf:Seq>
- */
+ * Start element in MODE_SEQ (ordered array)
+ * this should always be <rdf:Seq>
+ *
+ * @param string $elm namespace . ' ' . tag
+ * @throws MWException if we have an element that's not <rdf:Seq>
+ */
private function startElementModeSeq( $elm ) {
if ( $elm === self::NS_RDF . ' Seq' ) {
array_unshift( $this->mode, self::MODE_LI );
}
/**
- * Start element in MODE_LANG (language alternative)
- * this should always be <rdf:Alt>
- *
- * This tag tends to be used for metadata like describe this
- * picture, which can be translated into multiple languages.
- *
- * XMP supports non-linguistic alternative selections,
- * which are really only used for thumbnails, which
- * we don't care about.
- *
- * @param $elm String namespace . ' ' . tag
- * @throws MWException if we have an element that's not <rdf:Alt>
- */
+ * Start element in MODE_LANG (language alternative)
+ * this should always be <rdf:Alt>
+ *
+ * This tag tends to be used for metadata like describe this
+ * picture, which can be translated into multiple languages.
+ *
+ * XMP supports non-linguistic alternative selections,
+ * which are really only used for thumbnails, which
+ * we don't care about.
+ *
+ * @param string $elm namespace . ' ' . tag
+ * @throws MWException if we have an element that's not <rdf:Alt>
+ */
private function startElementModeLang( $elm ) {
if ( $elm === self::NS_RDF . ' Alt' ) {
array_unshift( $this->mode, self::MODE_LI_LANG );
*
* This method is called when processing the <rdf:Description> element
*
- * @param $elm String namespace and tag names separated by space.
- * @param $attribs Array Attributes of the element.
+ * @param string $elm namespace and tag names separated by space.
+ * @param array $attribs Attributes of the element.
* @throws MWException
*/
private function startElementModeSimple( $elm, $attribs ) {
}
/**
- * Start an element when in MODE_QDESC.
- * This generally happens when a simple element has an inner
- * rdf:Description to hold qualifier elements.
- *
- * For example in:
- * <exif:DigitalZoomRatio><rdf:Description><rdf:value>0/10</rdf:value>
- * <foo:someQualifier>Bar</foo:someQualifier> </rdf:Description>
- * </exif:DigitalZoomRatio>
- * Called when processing the <rdf:value> or <foo:someQualifier>.
- *
- * @param $elm String namespace and tag name separated by a space.
- *
- */
+ * Start an element when in MODE_QDESC.
+ * This generally happens when a simple element has an inner
+ * rdf:Description to hold qualifier elements.
+ *
+ * For example in:
+ * <exif:DigitalZoomRatio><rdf:Description><rdf:value>0/10</rdf:value>
+ * <foo:someQualifier>Bar</foo:someQualifier> </rdf:Description>
+ * </exif:DigitalZoomRatio>
+ * Called when processing the <rdf:value> or <foo:someQualifier>.
+ *
+ * @param string $elm namespace and tag name separated by a space.
+ *
+ */
private function startElementModeQDesc( $elm ) {
if ( $elm === self::NS_RDF . ' value' ) {
return; // do nothing
*
* This is generally where most properties start.
*
- * @param $ns String Namespace
- * @param $tag String tag name (without namespace prefix)
- * @param $attribs Array array of attributes
+ * @param string $ns Namespace
+ * @param string $tag tag name (without namespace prefix)
+ * @param array $attribs array of attributes
* @throws MWException
*/
private function startElementModeInitial( $ns, $tag, $attribs ) {
* <exif:Flash rdf:parseType='Resource'> <exif:Fired>True</exif:Fired>
* <exif:Mode>1</exif:Mode></exif:Flash>
*
- * @param $ns String namespace
- * @param $tag String tag name (no ns)
- * @param $attribs Array array of attribs w/ values.
+ * @param string $ns namespace
+ * @param string $tag tag name (no ns)
+ * @param array $attribs array of attribs w/ values.
* @throws MWException
*/
private function startElementModeStruct( $ns, $tag, $attribs ) {
}
/**
- * opening element in MODE_LI
- * process elements of arrays.
- *
- * Example:
- * <exif:ISOSpeedRatings> <rdf:Seq> <rdf:li>64</rdf:li>
- * </rdf:Seq> </exif:ISOSpeedRatings>
- * This method is called when we hit the <rdf:li> element.
- *
- * @param $elm String: namespace . ' ' . tagname
- * @param $attribs Array: Attributes. (needed for BAGSTRUCTS)
- * @throws MWException if gets a tag other than <rdf:li>
- */
+ * opening element in MODE_LI
+ * process elements of arrays.
+ *
+ * Example:
+ * <exif:ISOSpeedRatings> <rdf:Seq> <rdf:li>64</rdf:li>
+ * </rdf:Seq> </exif:ISOSpeedRatings>
+ * This method is called when we hit the <rdf:li> element.
+ *
+ * @param string $elm namespace . ' ' . tagname
+ * @param array $attribs Attributes. (needed for BAGSTRUCTS)
+ * @throws MWException if gets a tag other than <rdf:li>
+ */
private function startElementModeLi( $elm, $attribs ) {
if ( ( $elm ) !== self::NS_RDF . ' li' ) {
throw new MWException( "<rdf:li> expected but got $elm." );
}
/**
- * Opening element in MODE_LI_LANG.
- * process elements of language alternatives
- *
- * Example:
- * <dc:title> <rdf:Alt> <rdf:li xml:lang="x-default">My house
- * </rdf:li> </rdf:Alt> </dc:title>
- *
- * This method is called when we hit the <rdf:li> element.
- *
- * @param $elm String namespace . ' ' . tag
- * @param $attribs array array of elements (most importantly xml:lang)
- * @throws MWException if gets a tag other than <rdf:li> or if no xml:lang
- */
+ * Opening element in MODE_LI_LANG.
+ * process elements of language alternatives
+ *
+ * Example:
+ * <dc:title> <rdf:Alt> <rdf:li xml:lang="x-default">My house
+ * </rdf:li> </rdf:Alt> </dc:title>
+ *
+ * This method is called when we hit the <rdf:li> element.
+ *
+ * @param string $elm namespace . ' ' . tag
+ * @param array $attribs array of elements (most importantly xml:lang)
+ * @throws MWException if gets a tag other than <rdf:li> or if no xml:lang
+ */
private function startElementModeLiLang( $elm, $attribs ) {
if ( $elm !== self::NS_RDF . ' li' ) {
throw new MWException( __METHOD__ . " <rdf:li> expected but got $elm." );
* Also does some initial set up for the wrapper element
*
* @param $parser XMLParser
- * @param $elm String namespace "<space>" element
- * @param $attribs Array attribute name => value
+ * @param string $elm namespace "<space>" element
+ * @param array $attribs attribute name => value
* @throws MWException
*/
function startElement( $parser, $elm, $attribs ) {
if ( $elm === self::NS_RDF . ' RDF'
|| $elm === 'adobe:ns:meta/ xmpmeta'
- || $elm === 'adobe:ns:meta/ xapmeta')
+ || $elm === 'adobe:ns:meta/ xapmeta' )
{
/* ignore. */
return;
if ( count( $this->mode ) === 0 ) {
// This should not happen.
- throw new MWException('Error extracting XMP, '
+ throw new MWException( 'Error extracting XMP, '
. "encountered <$elm> with no mode" );
}
break;
default:
throw new MWException( 'StartElement in unknown mode: ' . $this->mode[0] );
- break;
}
}
* <rdf:Description rdf:about="" xmlns:exif="http://ns.adobe.com/exif/1.0/" exif:DigitalZoomRatio="0/10">
* @endcode
*
- * @param $attribs Array attribute=>value array.
+ * @param array $attribs attribute=>value array.
* @throws MWException
*/
private function doAttribs( $attribs ) {
$this->mode[0] = self::MODE_QDESC;
}
foreach ( $attribs as $name => $val ) {
-
-
if ( strpos( $name, ' ' ) === false ) {
// This shouldn't happen, but so far some old software forgets namespace
// on rdf:about.
}
/**
- * Given an extracted value, save it to results array
- *
- * note also uses $this->ancestorStruct and
- * $this->processingArray to determine what name to
- * save the value under. (in addition to $tag).
- *
- * @param $ns String namespace of tag this is for
- * @param $tag String tag name
- * @param $val String value to save
- */
+ * Given an extracted value, save it to results array
+ *
+ * note also uses $this->ancestorStruct and
+ * $this->processingArray to determine what name to
+ * save the value under. (in addition to $tag).
+ *
+ * @param string $ns namespace of tag this is for
+ * @param string $tag tag name
+ * @param string $val value to save
+ */
private function saveValue( $ns, $tag, $val ) {
$info =& $this->items[$ns][$tag];