Introduce ContentHandler::exportTransform()
authordaniel <daniel.kinzler@wikimedia.de>
Tue, 20 May 2014 17:21:49 +0000 (19:21 +0200)
committerdaniel <daniel.kinzler@wikimedia.de>
Tue, 20 May 2014 17:21:49 +0000 (19:21 +0200)
ContentHandler::exportTransform() allows content handlers to apply
transformations upon export, such as conversion of legacy formats or
filtering of internal data.

Note that the transformation is applied to serialized content, since
the exporter will generally not unserialize the content blob to an
intermediate form before writing it to the dump. Implementations
may choose to unserialy, then transform and re-serialize.

Bug: 65256
Change-Id: Ic55a8bd8bea13041000b176c7c02c7c5ced76f6d

includes/Export.php
includes/content/ContentHandler.php

index 4c71eb9..21fcd5a 100644 (file)
@@ -670,12 +670,30 @@ class XmlDumpWriter {
                        $out .= "      " . Xml::elementClean( 'comment', array(), strval( $row->rev_comment ) ) . "\n";
                }
 
+               if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
+                       $content_model = strval( $row->rev_content_model );
+               } else {
+                       // probably using $wgContentHandlerUseDB = false;
+                       $title = Title::makeTitle( $row->page_namespace, $row->page_title );
+                       $content_model = ContentHandler::getDefaultModelFor( $title );
+               }
+
+               $content_handler = ContentHandler::getForModelID( $content_model );
+
+               if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
+                       $content_format = strval( $row->rev_content_format );
+               } else {
+                       // probably using $wgContentHandlerUseDB = false;
+                       $content_format = $content_handler->getDefaultFormat();
+               }
+
                $text = '';
                if ( isset( $row->rev_deleted ) && ( $row->rev_deleted & Revision::DELETED_TEXT ) ) {
                        $out .= "      " . Xml::element( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
                } elseif ( isset( $row->old_text ) ) {
                        // Raw text from the database may have invalid chars
                        $text = strval( Revision::getRevisionText( $row ) );
+                       $text = $content_handler->exportTransform( $text, $content_format );
                        $out .= "      " . Xml::elementClean( 'text',
                                array( 'xml:space' => 'preserve', 'bytes' => intval( $row->rev_len ) ),
                                strval( $text ) ) . "\n";
@@ -695,26 +713,7 @@ class XmlDumpWriter {
                        $out .= "      <sha1/>\n";
                }
 
-               if ( isset( $row->rev_content_model ) && !is_null( $row->rev_content_model ) ) {
-                       $content_model = strval( $row->rev_content_model );
-               } else {
-                       // probably using $wgContentHandlerUseDB = false;
-                       // @todo test!
-                       $title = Title::makeTitle( $row->page_namespace, $row->page_title );
-                       $content_model = ContentHandler::getDefaultModelFor( $title );
-               }
-
                $out .= "      " . Xml::element( 'model', null, strval( $content_model ) ) . "\n";
-
-               if ( isset( $row->rev_content_format ) && !is_null( $row->rev_content_format ) ) {
-                       $content_format = strval( $row->rev_content_format );
-               } else {
-                       // probably using $wgContentHandlerUseDB = false;
-                       // @todo test!
-                       $content_handler = ContentHandler::getForModelID( $content_model );
-                       $content_format = $content_handler->getDefaultFormat();
-               }
-
                $out .= "      " . Xml::element( 'format', null, strval( $content_format ) ) . "\n";
 
                wfRunHooks( 'XmlDumpWriterWriteRevision', array( &$this, &$out, $row, $text ) );
index dd7e27d..6bb69c1 100644 (file)
@@ -430,6 +430,20 @@ abstract class ContentHandler {
         */
        abstract public function serializeContent( Content $content, $format = null );
 
+       /**
+        * Applies transformations on export (returns the blob unchanged per default).
+        * Subclasses may override this to perform transformations such as conversion
+        * of legacy formats or filtering of internal meta-data.
+        *
+        * @param string $blob The blob to be exported
+        * @param string|null $format The blob's serialization format
+        *
+        * @return string
+        */
+       public function exportTransform( $blob, $format = null ) {
+               return $blob;
+       }
+
        /**
         * Unserializes a Content object of the type supported by this ContentHandler.
         *