* Added support for a --path option to dump the output at a given path
[lhc/web/wiklou.git] / maintenance / generateSitemap.php
1 <?php
2 /**
3 * Creates a Google sitemap for the site
4 *
5 * @package MediaWiki
6 * @subpackage Maintenance
7 *
8 * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason
9 * @copyright Copyright © 2005, Jens Frank <jeluf@gmx.de>
10 * @copyright Copyright © 2005, Brion Vibber <brion@pobox.com>
11 *
12 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html
13 * @link http://www.google.com/schemas/sitemap/0.84/sitemap.xsd
14 *
15 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
16 */
17
18 $optionsWithArgs = array( 'host', 'path' );
19 /* */
20 require_once 'commandLine.inc';
21
22 if ( ! isset( $options['host'] ) ) {
23 echo "Usage: php generateSitemap.php --host=hostname [--path=/pa/th/]\n";
24 exit(1);
25 } else {
26 $_SERVER['HOSTNAME'] = $options['host'];
27 }
28
29 $gs = new GenerateSitemap( $options['host'], $options['path'] );
30 $gs->main();
31
32 class GenerateSitemap {
33 var $host;
34 var $cutoff = 9000;
35 var $priorities = array(
36 // Custom main namespaces
37 -2 => '0.5',
38 // Custom talk namesspaces
39 -1 => '0.1',
40 NS_MAIN => '1.0',
41 NS_TALK => '0.1',
42 NS_USER => '0.5',
43 NS_USER_TALK => '0.1',
44 NS_PROJECT => '0.5',
45 NS_PROJECT_TALK => '0.1',
46 NS_IMAGE => '0.5',
47 NS_IMAGE_TALK => '0.1',
48 NS_MEDIAWIKI => '0.0',
49 NS_MEDIAWIKI_TALK => '0.1',
50 NS_TEMPLATE => '0.0',
51 NS_TEMPLATE_TALK => '0.1',
52 NS_HELP => '0.5',
53 NS_HELP_TALK => '0.1',
54 NS_CATEGORY => '0.5',
55 NS_CATEGORY_TALK => '0.1',
56 );
57 var $namespaces = array();
58 var $dbr;
59 var $path, $file, $findex;
60 var $stderr;
61
62 function GenerateSitemap( $host, $path ) {
63 global $wgDBname;
64
65 $this->path = isset( $path ) ? $path : '';
66 $this->stderr = fopen( 'php://stderr', 'wt' );
67
68 $this->host = $host;
69 $this->dbr =& wfGetDB( DB_SLAVE );
70 $this->generateNamespaces();
71 $this->findex = fopen( "{$this->path}sitemap-index-$wgDBname.xml", 'wb' );
72 }
73
74 function generateNamespaces() {
75 $fname = 'GenerateSitemap::generateNamespaces';
76
77 $res = $this->dbr->select( 'page',
78 array( 'page_namespace' ),
79 array(),
80 $fname,
81 array(
82 'GROUP BY' => 'page_namespace',
83 'ORDER BY' => 'page_namespace',
84 )
85 );
86
87 while ( $row = $this->dbr->fetchObject( $res ) )
88 $this->namespaces[] = $row->page_namespace;
89 }
90
91 function priority( $namespace ) {
92 return isset( $this->priorities[$namespace] ) ? $this->priorities[$namespace] : $this->guessPriority( $namespace );
93 }
94
95 function guessPriority( $namespace ) {
96 return Namespace::isMain( $namespace ) ? $this->priorities[-2] : $this->priorities[-1];
97 }
98
99 function getPageRes( $namespace ) {
100 $fname = 'GenerateSitemap::getPageRes';
101
102 return $this->dbr->select( 'page',
103 array(
104 'page_namespace',
105 'page_title',
106 'page_is_redirect',
107 'page_touched',
108 ),
109 array( 'page_namespace' => $namespace ),
110 $fname
111 );
112 }
113
114 function main() {
115 global $wgDBname;
116
117 fwrite( $this->findex, $this->openIndex() );
118
119 foreach ( $this->namespaces as $namespace ) {
120 $res = $this->getPageRes( $namespace );
121 $this->file = false;
122 $i = $smcount = 0;
123
124 $this->debug( $namespace );
125 while ( $row = $this->dbr->fetchObject( $res ) ) {
126 if ( $i % $this->cutoff == 0 ) {
127 if ( $this->file !== false ) {
128 gzwrite( $this->file, $this->closeFile() );
129 gzclose( $this->file );
130 }
131 $filename = "{$this->path}sitemap-$wgDBname-NS_$namespace-$smcount.xml.gz";
132 ++$smcount;
133 $this->file = gzopen( $filename, 'wb' );
134 gzwrite( $this->file, $this->openFile() );
135 fwrite( $this->findex, $this->indexEntry( $filename ) );
136 $this->debug( "\t$filename" );
137 }
138 ++$i;
139 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
140 $date = $this->ISO8601( $row->page_touched );
141 gzwrite( $this->file, $this->fileEntry( $title->getFullURL(), $date, $this->priority( $namespace ) ) );
142 }
143 if ( $this->file ) {
144 gzwrite( $this->file, $this->closeFile() );
145 gzclose( $this->file );
146 }
147 }
148 fwrite( $this->findex, $this->closeIndex() );
149 fclose( $this->findex );
150 }
151
152 function xmlHead() {
153 return '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
154 }
155
156 function xmlSchema() {
157 return 'http://www.google.com/schemas/sitemap/0.84';
158 }
159
160 function openIndex() {
161 return $this->xmlHead() . '<sitemapindex xmlns="' . $this->xmlSchema() . '">' . "\n";
162 }
163
164 function indexEntry( $filename ) {
165 global $wgServer;
166
167 return
168 "\t<sitemap>\n" .
169 "\t\t<loc>$wgServer/$filename</log>\n" .
170 "\t</sitemap>\n";
171 }
172
173 function closeIndex() {
174 return "</sitemapindex>\n";
175 }
176
177 function openFile() {
178 return $this->xmlHead() . '<urlset xmlns="' . $this->xmlSchema() . '">' . "\n";
179 }
180
181 function fileEntry( $url, $date, $priority ) {
182 return
183 "\t<url>\n" .
184 "\t\t<loc>$url</loc>\n" .
185 "\t\t<lastmod>$date</lastmod>\n" .
186 "\t\t<priority>$priority</priority>\n" .
187 "\t</url>\n";
188 }
189
190 function closeFile() {
191 return "</urlset>\n";
192 }
193
194 function ISO8601( $timestamp ) {
195 return substr( wfTimestamp( TS_DB, $timestamp ), 0, 4 + 1 + 2 + 1 + 2 );
196 }
197
198 function debug( $str ) {
199 fwrite( $this->stderr, "$str\n" );
200 }
201 }
202
203 ?>