Merge "Port categories dump header fix"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Wed, 17 Jan 2018 00:19:42 +0000 (00:19 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Wed, 17 Jan 2018 00:19:43 +0000 (00:19 +0000)
includes/CategoriesRdf.php
maintenance/dumpCategoriesAsRdf.php
tests/phpunit/data/categoriesrdf/categoriesRdf-out.nt
tests/phpunit/maintenance/categoriesRdfTest.php

index e19dc2a..463f6e8 100644 (file)
@@ -37,7 +37,13 @@ class CategoriesRdf {
        /**
         * Current version of the dump format.
         */
-       const FORMAT_VERSION = "1.0";
+       const FORMAT_VERSION = "1.1";
+       /**
+        * Special page for Dump identification.
+        * Used as head URI for each wiki's category dump, e.g.:
+        * https://en.wikipedia.org/wiki/Special:CategoryDump
+        */
+       const SPECIAL_DUMP = 'Special:CategoryDump';
        /**
         * @var RdfWriter
         */
@@ -84,12 +90,30 @@ class CategoriesRdf {
                $this->rdfWriter->say( 'rdfs', 'label' )->value( $titletext );
        }
 
+       /**
+        * Make URL from title label
+        * @param string $titleLabel Short label (without namespace) of the category
+        * @return string URL for the category
+        */
+       public function labelToUrl( $titleLabel ) {
+               return $this->titleToUrl( Title::makeTitle( NS_CATEGORY, $titleLabel ) );
+       }
+
        /**
         * Convert Title to link to target page.
         * @param Title $title
-        * @return string
+        * @return string URL for the category
         */
        private function titleToUrl( Title $title ) {
                return $title->getFullURL( '', false, PROTO_CANONICAL );
        }
+
+       /**
+        * Get URI of the dump for this particular wiki.
+        * @return false|string
+        */
+       public function getDumpURI() {
+               return $this->titleToUrl( Title::makeTitle( NS_MAIN, self::SPECIAL_DUMP ) );
+       }
+
 }
index 282a04b..c1835d0 100644 (file)
@@ -96,7 +96,7 @@ class DumpCategoriesAsRdf extends Maintenance {
                if ( substr( $licenseUrl, 0, 2 ) == '//' ) {
                        $licenseUrl = 'https:' . $licenseUrl;
                }
-               $this->rdfWriter->about( wfExpandUrl( '/categoriesDump', PROTO_CANONICAL ) )
+               $this->rdfWriter->about( $this->categoriesRdf->getDumpURI() )
                        ->a( 'schema', 'Dataset' )
                        ->a( 'owl', 'Ontology' )
                        ->say( 'cc', 'license' )->is( $licenseUrl )
index d2d7ea8..b8bd8e0 100644 (file)
@@ -1,10 +1,10 @@
-<http://acme.test/categoriesDump> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Dataset> .
-<http://acme.test/categoriesDump> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Ontology> .
-<http://acme.test/categoriesDump> <http://creativecommons.org/ns#license> <https://creativecommons.org/licenses/by-sa/3.0/> .
-<http://acme.test/categoriesDump> <http://schema.org/softwareVersion> "1.0" .
-<http://acme.test/categoriesDump> <http://schema.org/dateModified> "{DATE}"^^<http://www.w3.org/2001/XMLSchema#dateTime> .
-<http://acme.test/categoriesDump> <http://schema.org/isPartOf> <http://acme.test/> .
-<http://acme.test/categoriesDump> <http://www.w3.org/2002/07/owl#imports> <https://www.mediawiki.org/ontology/ontology.owl> .
+<http://acme.test/wiki/Special:CategoryDump> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Dataset> .
+<http://acme.test/wiki/Special:CategoryDump> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Ontology> .
+<http://acme.test/wiki/Special:CategoryDump> <http://creativecommons.org/ns#license> <https://creativecommons.org/licenses/by-sa/3.0/> .
+<http://acme.test/wiki/Special:CategoryDump> <http://schema.org/softwareVersion> "1.1" .
+<http://acme.test/wiki/Special:CategoryDump> <http://schema.org/dateModified> "{DATE}"^^<http://www.w3.org/2001/XMLSchema#dateTime> .
+<http://acme.test/wiki/Special:CategoryDump> <http://schema.org/isPartOf> <http://acme.test/> .
+<http://acme.test/wiki/Special:CategoryDump> <http://www.w3.org/2002/07/owl#imports> <https://www.mediawiki.org/ontology/ontology.owl> .
 <http://acme.test/wiki/Category:Category_One> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.mediawiki.org/ontology#Category> .
 <http://acme.test/wiki/Category:Category_One> <http://www.w3.org/2000/01/rdf-schema#label> "Category One" .
 <http://acme.test/wiki/Category:2_Category_Two> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.mediawiki.org/ontology#Category> .
index b51c14c..2edbae1 100644 (file)
@@ -64,8 +64,8 @@ class CategoriesRdfTest extends MediaWikiLangTestCase {
                $dumpScript->execute();
                $actualOut = file_get_contents( $outFileName );
                $actualOut = preg_replace(
-                       '|<http://acme.test/categoriesDump> <http://schema.org/dateModified> "[^"]+?"|',
-                       '<http://acme.test/categoriesDump> <http://schema.org/dateModified> "{DATE}"',
+                       '|<http://acme.test/wiki/Special:CategoryDump> <http://schema.org/dateModified> "[^"]+?"|',
+                       '<http://acme.test/wiki/Special:CategoryDump> <http://schema.org/dateModified> "{DATE}"',
                        $actualOut
                );