public function getCategoryIterator( IDatabase $dbr ) {
$it = new BatchRowIterator(
$dbr,
- 'page',
+ [ 'page', 'page_props', 'category' ],
[ 'page_title' ],
- $this->mBatchSize
+ $this->getBatchSize()
);
$it->addConditions( [
'page_namespace' => NS_CATEGORY,
] );
- $it->setFetchColumns( [ 'page_title', 'page_id' ] );
+ $it->setFetchColumns( [
+ 'page_title',
+ 'page_id',
+ 'pp_propname',
+ 'cat_pages',
+ 'cat_subcats',
+ 'cat_files'
+ ] );
+ $it->addJoinConditions(
+ [
+ 'page_props' => [
+ 'LEFT JOIN', [ 'pp_propname' => 'hiddencat', 'pp_page = page_id' ]
+ ],
+ 'category' => [
+ 'LEFT JOIN', [ 'cat_title = page_title' ]
+ ]
+ ]
+
+ );
return $it;
}
/**
* Get iterator for links for categories.
* @param IDatabase $dbr
- * @param array $ids List of page IDs
+ * @param int[] $ids List of page IDs
* @return Traversable
*/
public function getCategoryLinksIterator( IDatabase $dbr, array $ids ) {
$dbr,
'categorylinks',
[ 'cl_from', 'cl_to' ],
- $this->mBatchSize
+ $this->getBatchSize()
);
$it->addConditions( [
'cl_type' => 'subcat',
return new RecursiveIteratorIterator( $it );
}
+ /**
+ * @param int $timestamp
+ */
public function addDumpHeader( $timestamp ) {
global $wgRightsUrl;
$licenseUrl = $wgRightsUrl;
if ( substr( $licenseUrl, 0, 2 ) == '//' ) {
$licenseUrl = 'https:' . $licenseUrl;
}
- $this->rdfWriter->about( wfExpandUrl( '/categoriesDump', PROTO_CANONICAL ) )
+ $this->rdfWriter->about( $this->categoriesRdf->getDumpURI() )
->a( 'schema', 'Dataset' )
->a( 'owl', 'Ontology' )
->say( 'cc', 'license' )->is( $licenseUrl )
foreach ( $this->getCategoryIterator( $dbr ) as $batch ) {
$pages = [];
foreach ( $batch as $row ) {
- $this->categoriesRdf->writeCategoryData( $row->page_title );
+ $this->categoriesRdf->writeCategoryData(
+ $row->page_title,
+ $row->pp_propname === 'hiddencat',
+ (int)$row->cat_pages - (int)$row->cat_subcats - (int)$row->cat_files,
+ (int)$row->cat_subcats
+ );
$pages[$row->page_id] = $row->page_title;
}
}
}
-$maintClass = "DumpCategoriesAsRdf";
+$maintClass = DumpCategoriesAsRdf::class;
require_once RUN_MAINTENANCE_IF_MAIN;