Merge "Make Special:Contributions use OOUI"
[lhc/web/wiklou.git] / maintenance / categoryChangesAsRdf.php
index d613352..95a59d2 100644 (file)
@@ -46,6 +46,7 @@ SPARQL;
 DELETE {
 ?category ?x ?y
 } WHERE {
+   ?category ?x ?y
    VALUES ?category {
      %s
    }
@@ -62,6 +63,7 @@ DELETE {
 } INSERT {
 %s
 } WHERE {
+  ?category ?x ?y
    VALUES ?category {
      %s
    }
@@ -113,19 +115,18 @@ SPARQLDI;
        }
 
        public function execute() {
-               global $wgRCMaxAge;
-
                $this->initialize();
                $startTS = new MWTimestamp( $this->getOption( "start" ) );
 
                $endTS = new MWTimestamp( $this->getOption( "end" ) );
                $now = new MWTimestamp();
+               $rcMaxAge = $this->getConfig()->get( 'RCMaxAge' );
 
-               if ( $now->getTimestamp() - $startTS->getTimestamp() > $wgRCMaxAge ) {
-                       $this->error( "Start timestamp too old, maximum RC age is $wgRCMaxAge!" );
+               if ( $now->getTimestamp() - $startTS->getTimestamp() > $rcMaxAge ) {
+                       $this->error( "Start timestamp too old, maximum RC age is $rcMaxAge!" );
                }
-               if ( $now->getTimestamp() - $endTS->getTimestamp() > $wgRCMaxAge ) {
-                       $this->error( "End timestamp too old, maximum RC age is $wgRCMaxAge!" );
+               if ( $now->getTimestamp() - $endTS->getTimestamp() > $rcMaxAge ) {
+                       $this->error( "End timestamp too old, maximum RC age is $rcMaxAge!" );
                }
 
                $this->startTS = $startTS->getTimestamp();
@@ -157,8 +158,12 @@ SPARQLDI;
                $this->handleMoves( $dbr, $output );
                // We need to handle restores too since delete may have happened in previous update.
                $this->handleRestores( $dbr, $output );
+               // Process newly added pages
                $this->handleAdds( $dbr, $output );
-               $this->handleChanges( $dbr, $output );
+               // Process page edits
+               $this->handleEdits( $dbr, $output );
+               // Process categorization changes
+               $this->handleCategorization( $dbr, $output );
 
                // Update timestamp
                fwrite( $output, $this->updateTS( $this->endTS ) );
@@ -198,9 +203,10 @@ SPARQLDI;
        }
 
        /**
-        * Write data for a set of categories
+        * Write parent data for a set of categories.
+        * The list has the child categories.
         * @param IDatabase $dbr
-        * @param string[] $pages List of categories: id => title
+        * @param string[] $pages List of child categories: id => title
         */
        private function writeParentCategories( IDatabase $dbr, $pages ) {
                foreach ( $this->getCategoryLinksIterator( $dbr, array_keys( $pages ) ) as $row ) {
@@ -302,7 +308,7 @@ SPARQL;
                        'rc_type' => RC_LOG,
                ] );
                $it->addJoinConditions( [
-                       'page' => [ 'INNER JOIN', 'rc_cur_id = page_id' ],
+                       'page' => [ 'JOIN', 'rc_cur_id = page_id' ],
                ] );
                $this->addIndex( $it );
                return $it;
@@ -356,16 +362,17 @@ SPARQL;
        }
 
        /**
-        * Fetch categorization changes
+        * Fetch categorization changes or edits
         * @param IDatabase $dbr
         * @return BatchRowIterator
         */
-       protected function getChangedCatsIterator( IDatabase $dbr ) {
-               $it = $this->setupChangesIterator( $dbr );
+       protected function getChangedCatsIterator( IDatabase $dbr, $type ) {
+               $it =
+                       $this->setupChangesIterator( $dbr );
                $it->addConditions( [
                        'rc_namespace' => NS_CATEGORY,
                        'rc_new' => 0,
-                       'rc_type' => [ RC_EDIT, RC_CATEGORIZE ],
+                       'rc_type' => $type,
                ] );
                $this->addIndex( $it );
                return $it;
@@ -396,7 +403,7 @@ SPARQL;
        /**
         * Get iterator for links for categories.
         * @param IDatabase $dbr
-        * @param array $ids List of page IDs
+        * @param int[] $ids List of page IDs
         * @return Traversable
         */
        protected function getCategoryLinksIterator( IDatabase $dbr, array $ids ) {
@@ -540,14 +547,21 @@ SPARQL;
        }
 
        /**
+        * Handle edits for category texts
         * @param IDatabase $dbr
         * @param resource $output
         */
-       public function handleChanges( IDatabase $dbr, $output ) {
-               foreach ( $this->getChangedCatsIterator( $dbr ) as $batch ) {
+       public function handleEdits( IDatabase $dbr, $output ) {
+               // Editing category can change hidden flag and add new parents.
+               // TODO: it's pretty expensive to update all edited categories, and most edits
+               // aren't actually interesting for us. Some way to know which are interesting?
+               // We can capture recategorization on the next step, but not change in hidden status.
+               foreach ( $this->getChangedCatsIterator( $dbr, RC_EDIT ) as $batch ) {
                        $pages = [];
                        $deleteUrls = [];
                        foreach ( $batch as $row ) {
+                               // Note that on categorization event, cur_id points to
+                               // the child page, not the parent category!
                                if ( isset( $this->processed[$row->rc_cur_id] ) ) {
                                        // We already captured this one before
                                        continue;
@@ -558,6 +572,123 @@ SPARQL;
                                $deleteUrls[] = '<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) . '>';
                        }
 
+                       fwrite( $output, $this->getCategoriesUpdate( $dbr, $deleteUrls, $pages, "Edits" ) );
+               }
+       }
+
+       /**
+        * Handles categorization changes
+        * @param IDatabase $dbr
+        * @param resource $output
+        */
+       public function handleCategorization( IDatabase $dbr, $output ) {
+               $processedTitle = [];
+               // Categorization change can add new parents and change counts
+               // for the parent category.
+               foreach ( $this->getChangedCatsIterator( $dbr, RC_CATEGORIZE ) as $batch ) {
+                       /*
+                        * Note that on categorization event, cur_id points to
+                        * the child page, not the parent category!
+                        * So we need to have a two-stage process, since we have ID from one
+                        * category and title from another, and we need both for proper updates.
+                        * TODO: For now, we do full update even though some data hasn't changed,
+                        * e.g. parents for parent cat and counts for child cat.
+                        */
+                       $childPages = [];
+                       $parentCats = [];
+                       foreach ( $batch as $row ) {
+                               $childPages[$row->rc_cur_id] = true;
+                               $parentCats[$row->rc_title] = true;
+                       }
+
+                       $joinConditions = [
+                               'page_props' => [
+                                       'LEFT JOIN',
+                                       [ 'pp_propname' => 'hiddencat', 'pp_page = page_id' ],
+                               ],
+                               'category' => [
+                                       'LEFT JOIN',
+                                       [ 'cat_title = page_title' ],
+                               ],
+                       ];
+
+                       $pages = [];
+                       $deleteUrls = [];
+
+                       if ( $childPages ) {
+                               // Load child rows by ID
+                               $childRows = $dbr->select(
+                                       [ 'page', 'page_props', 'category' ],
+                                       [
+                                               'page_id',
+                                               'rc_title' => 'page_title',
+                                               'pp_propname',
+                                               'cat_pages',
+                                               'cat_subcats',
+                                               'cat_files',
+                                       ],
+                                       [ 'page_namespace' => NS_CATEGORY, 'page_id' => array_keys( $childPages ) ],
+                                       __METHOD__,
+                                       [],
+                                       $joinConditions
+                               );
+                               foreach ( $childRows as $row ) {
+                                       if ( isset( $this->processed[$row->page_id] ) ) {
+                                               // We already captured this one before
+                                               continue;
+                                       }
+                                       $this->writeCategoryData( $row );
+                                       $deleteUrls[] = '<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) . '>';
+                                       $this->processed[$row->page_id] = true;
+                               }
+                       }
+
+                       if ( $parentCats ) {
+                               // Load parent rows by title
+                               $joinConditions = [
+                                       'page' => [
+                                               'LEFT JOIN',
+                                               [ 'page_title = cat_title', 'page_namespace' => NS_CATEGORY ],
+                                       ],
+                                       'page_props' => [
+                                               'LEFT JOIN',
+                                               [ 'pp_propname' => 'hiddencat', 'pp_page = page_id' ],
+                                       ],
+                               ];
+
+                               $parentRows = $dbr->select(
+                                       [ 'category', 'page', 'page_props' ],
+                                       [
+                                               'page_id',
+                                               'rc_title' => 'cat_title',
+                                               'pp_propname',
+                                               'cat_pages',
+                                               'cat_subcats',
+                                               'cat_files',
+                                       ],
+                                       [ 'cat_title' => array_keys( $parentCats ) ],
+                                       __METHOD__,
+                                       [],
+                                       $joinConditions
+                               );
+                               foreach ( $parentRows as $row ) {
+                                       if ( $row->page_id && isset( $this->processed[$row->page_id] ) ) {
+                                               // We already captured this one before
+                                               continue;
+                                       }
+                                       if ( isset( $processedTitle[$row->rc_title] ) ) {
+                                               // We already captured this one before
+                                               continue;
+                                       }
+                                       $this->writeCategoryData( $row );
+                                       $deleteUrls[] = '<' . $this->categoriesRdf->labelToUrl( $row->rc_title ) . '>';
+                                       if ( $row->page_id ) {
+                                               $this->processed[$row->page_id] = true;
+                                       }
+                                       $processedTitle[$row->rc_title] = true;
+                               }
+                       }
+
                        fwrite( $output, $this->getCategoriesUpdate( $dbr, $deleteUrls, $pages, "Changes" ) );
                }
        }