Introduce ContentHandler::importTransform.
authordaniel <daniel.kinzler@wikimedia.de>
Tue, 20 May 2014 17:09:51 +0000 (19:09 +0200)
committerdaniel <daniel.kinzler@wikimedia.de>
Tue, 20 May 2014 17:12:35 +0000 (19:12 +0200)
ContentHandler::importTransform allows ContentHandler
implementations to apply transformations on page content
upon import. Such transformatiosn may by useful to update
from legacy formats, apply ID rewriting, etc.

Note that the transformation is done on the serialized content.
This allows for a "raw" import implementation that writes
improted blobs directly into a blob store without unserializing
them into an intermediary representation. Implementations may
choose to unserialize, transform, and then re-serialize.

Bug: 65256
Change-Id: I290fdf5589af43def8b3eddb68b5e1c23f6124e8

includes/Import.php
includes/content/ContentHandler.php

index 743037c..aa55290 100644 (file)
@@ -263,7 +263,7 @@ class WikiImporter {
         * @return bool
         */
        public function importRevision( $revision ) {
         * @return bool
         */
        public function importRevision( $revision ) {
-               if ( !$revision->getContent()->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
+               if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
                        $this->notice( 'import-error-bad-location',
                                $revision->getTitle()->getPrefixedText(),
                                $revision->getID(),
                        $this->notice( 'import-error-bad-location',
                                $revision->getTitle()->getPrefixedText(),
                                $revision->getID(),
@@ -694,9 +694,6 @@ class WikiImporter {
                if ( isset( $revisionInfo['id'] ) ) {
                        $revision->setID( $revisionInfo['id'] );
                }
                if ( isset( $revisionInfo['id'] ) ) {
                        $revision->setID( $revisionInfo['id'] );
                }
-               if ( isset( $revisionInfo['text'] ) ) {
-                       $revision->setText( $revisionInfo['text'] );
-               }
                if ( isset( $revisionInfo['model'] ) ) {
                        $revision->setModel( $revisionInfo['model'] );
                }
                if ( isset( $revisionInfo['model'] ) ) {
                        $revision->setModel( $revisionInfo['model'] );
                }
@@ -705,6 +702,14 @@ class WikiImporter {
                }
                $revision->setTitle( $pageInfo['_title'] );
 
                }
                $revision->setTitle( $pageInfo['_title'] );
 
+               if ( isset( $revisionInfo['text'] ) ) {
+                       $handler = $revision->getContentHandler();
+                       $text = $handler->importTransform(
+                               $revisionInfo['text'],
+                               $revision->getFormat() );
+
+                       $revision->setText( $text );
+               }
                if ( isset( $revisionInfo['timestamp'] ) ) {
                        $revision->setTimestamp( $revisionInfo['timestamp'] );
                } else {
                if ( isset( $revisionInfo['timestamp'] ) ) {
                        $revision->setTimestamp( $revisionInfo['timestamp'] );
                } else {
@@ -1087,6 +1092,9 @@ class WikiRevision {
        /** @var Content */
        protected $content = null;
 
        /** @var Content */
        protected $content = null;
 
+       /** @var ContentHandler */
+       protected $contentHandler = null;
+
        /** @var string */
        public $comment = "";
 
        /** @var string */
        public $comment = "";
 
@@ -1318,18 +1326,24 @@ class WikiRevision {
                return $this->text;
        }
 
                return $this->text;
        }
 
+       /**
+        * @return ContentHandler
+        */
+       function getContentHandler() {
+               if ( is_null( $this->contentHandler ) ) {
+                       $this->contentHandler = ContentHandler::getForModelID( $this->getModel() );
+               }
+
+               return $this->contentHandler;
+       }
+
        /**
         * @return Content
         */
        function getContent() {
                if ( is_null( $this->content ) ) {
        /**
         * @return Content
         */
        function getContent() {
                if ( is_null( $this->content ) ) {
-                       $this->content =
-                               ContentHandler::makeContent(
-                                       $this->text,
-                                       $this->getTitle(),
-                                       $this->getModel(),
-                                       $this->getFormat()
-                               );
+                       $handler = $this->getContentHandler();
+                       $this->content = $handler->unserializeContent( $this->text, $this->getFormat() );
                }
 
                return $this->content;
                }
 
                return $this->content;
@@ -1350,8 +1364,8 @@ class WikiRevision {
         * @return string
         */
        function getFormat() {
         * @return string
         */
        function getFormat() {
-               if ( is_null( $this->model ) ) {
-                       $this->format = ContentHandler::getForTitle( $this->getTitle() )->getDefaultFormat();
+               if ( is_null( $this->format ) ) {
+                       $this->format = $this->getContentHandler()->getDefaultFormat();
                }
 
                return $this->format;
                }
 
                return $this->format;
index dd7e27d..212dfd1 100644 (file)
@@ -442,6 +442,21 @@ abstract class ContentHandler {
         */
        abstract public function unserializeContent( $blob, $format = null );
 
         */
        abstract public function unserializeContent( $blob, $format = null );
 
+       /**
+        * Apply import transformation (per default, returns $blob unchanged).
+        * This gives subclasses an opportunity to transform data blobs on import.
+        *
+        * @singe 1.24
+        *
+        * @param string $blob
+        * @param string|null $format
+        *
+        * @return string
+        */
+       public function importTransform( $blob, $format = null ) {
+               return $blob;
+       }
+
        /**
         * Creates an empty Content object of the type supported by this
         * ContentHandler.
        /**
         * Creates an empty Content object of the type supported by this
         * ContentHandler.