3 * Creates a Google sitemap for the site
6 * @subpackage Maintenance
8 * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason
9 * @copyright Copyright © 2005, Jens Frank <jeluf@gmx.de>
10 * @copyright Copyright © 2005, Brion Vibber <brion@pobox.com>
12 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html
13 * @link http://www.google.com/schemas/sitemap/0.84/sitemap.xsd
15 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
18 $optionsWithArgs = array( 'host', 'path' );
20 require_once 'commandLine.inc';
22 if ( ! isset( $options['host'] ) ) {
23 echo "Usage: php generateSitemap.php --host=hostname [--path=/pa/th/]\n";
26 $_SERVER['HOSTNAME'] = $options['host'];
29 $gs = new GenerateSitemap( $options['host'], $options['path'] );
32 class GenerateSitemap
{
35 var $priorities = array(
36 // Custom main namespaces
38 // Custom talk namesspaces
43 NS_USER_TALK
=> '0.1',
45 NS_PROJECT_TALK
=> '0.1',
47 NS_IMAGE_TALK
=> '0.1',
48 NS_MEDIAWIKI
=> '0.0',
49 NS_MEDIAWIKI_TALK
=> '0.1',
51 NS_TEMPLATE_TALK
=> '0.1',
53 NS_HELP_TALK
=> '0.1',
55 NS_CATEGORY_TALK
=> '0.1',
57 var $namespaces = array();
59 var $path, $file, $findex;
62 function GenerateSitemap( $host, $path ) {
65 $this->path
= isset( $path ) ?
$path : '';
66 $this->stderr
= fopen( 'php://stderr', 'wt' );
69 $this->dbr
=& wfGetDB( DB_SLAVE
);
70 $this->generateNamespaces();
71 $this->findex
= fopen( "{$this->path}sitemap-index-$wgDBname.xml", 'wb' );
74 function generateNamespaces() {
75 $fname = 'GenerateSitemap::generateNamespaces';
77 $res = $this->dbr
->select( 'page',
78 array( 'page_namespace' ),
82 'GROUP BY' => 'page_namespace',
83 'ORDER BY' => 'page_namespace',
87 while ( $row = $this->dbr
->fetchObject( $res ) )
88 $this->namespaces
[] = $row->page_namespace
;
91 function priority( $namespace ) {
92 return isset( $this->priorities
[$namespace] ) ?
$this->priorities
[$namespace] : $this->guessPriority( $namespace );
95 function guessPriority( $namespace ) {
96 return Namespace::isMain( $namespace ) ?
$this->priorities
[-2] : $this->priorities
[-1];
99 function getPageRes( $namespace ) {
100 $fname = 'GenerateSitemap::getPageRes';
102 return $this->dbr
->select( 'page',
109 array( 'page_namespace' => $namespace ),
117 fwrite( $this->findex
, $this->openIndex() );
119 foreach ( $this->namespaces
as $namespace ) {
120 $res = $this->getPageRes( $namespace );
124 $this->debug( $namespace );
125 while ( $row = $this->dbr
->fetchObject( $res ) ) {
126 if ( $i %
$this->cutoff
== 0 ) {
127 if ( $this->file
!== false ) {
128 gzwrite( $this->file
, $this->closeFile() );
129 gzclose( $this->file
);
131 $filename = "{$this->path}sitemap-$wgDBname-NS_$namespace-$smcount.xml.gz";
133 $this->file
= gzopen( $filename, 'wb' );
134 gzwrite( $this->file
, $this->openFile() );
135 fwrite( $this->findex
, $this->indexEntry( $filename ) );
136 $this->debug( "\t$filename" );
139 $title = Title
::makeTitle( $row->page_namespace
, $row->page_title
);
140 $date = $this->ISO8601( $row->page_touched
);
141 gzwrite( $this->file
, $this->fileEntry( $title->getFullURL(), $date, $this->priority( $namespace ) ) );
144 gzwrite( $this->file
, $this->closeFile() );
145 gzclose( $this->file
);
148 fwrite( $this->findex
, $this->closeIndex() );
149 fclose( $this->findex
);
153 return '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
156 function xmlSchema() {
157 return 'http://www.google.com/schemas/sitemap/0.84';
160 function openIndex() {
161 return $this->xmlHead() . '<sitemapindex xmlns="' . $this->xmlSchema() . '">' . "\n";
164 function indexEntry( $filename ) {
169 "\t\t<loc>$wgServer/$filename</log>\n" .
173 function closeIndex() {
174 return "</sitemapindex>\n";
177 function openFile() {
178 return $this->xmlHead() . '<urlset xmlns="' . $this->xmlSchema() . '">' . "\n";
181 function fileEntry( $url, $date, $priority ) {
184 "\t\t<loc>$url</loc>\n" .
185 "\t\t<lastmod>$date</lastmod>\n" .
186 "\t\t<priority>$priority</priority>\n" .
190 function closeFile() {
191 return "</urlset>\n";
194 function ISO8601( $timestamp ) {
195 return substr( wfTimestamp( TS_DB
, $timestamp ), 0, 4 +
1 +
2 +
1 +
2 );
198 function debug( $str ) {
199 fwrite( $this->stderr
, "$str\n" );