Merge "Handle missing namespace prefix in XML dumps more gracefully"
[lhc/web/wiklou.git] / maintenance / populateInterwiki.php
1 <?php
2
3 /**
4 * Maintenance script that populates the interwiki table with list of sites from
5 * a source wiki, such as English Wikipedia. (the default source)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
21 *
22 * @file
23 * @ingroup Maintenance
24 * @author Katie Filbert < aude.wiki@gmail.com >
25 */
26
27 require_once __DIR__ . '/Maintenance.php';
28
29 class PopulateInterwiki extends Maintenance {
30
31 /**
32 * @var string
33 */
34 private $source;
35
36 public function __construct() {
37 parent::__construct();
38
39 $this->addDescription( <<<TEXT
40 This script will populate the interwiki table, pulling in interwiki links that are used on Wikipedia
41 or another MediaWiki wiki.
42
43 When the script has finished, it will make a note of this in the database, and will not run again
44 without the --force option.
45
46 --source parameter is the url for the source wiki api, such as "https://en.wikipedia.org/w/api.php"
47 (the default) from which the script fetches the interwiki data and uses here to populate
48 the interwiki database table.
49 TEXT
50 );
51
52 $this->addOption( 'source', 'Source wiki for interwiki table, such as '
53 . 'https://en.wikipedia.org/w/api.php (the default)', false, true );
54 $this->addOption( 'force', 'Run regardless of whether the database says it has '
55 . 'been run already.' );
56 }
57
58 public function execute() {
59 $force = $this->getOption( 'force', false );
60 $this->source = $this->getOption( 'source', 'https://en.wikipedia.org/w/api.php' );
61
62 $data = $this->fetchLinks();
63
64 if ( $data === false ) {
65 $this->error( "Error during fetching data." );
66 } else {
67 $this->doPopulate( $data, $force );
68 }
69 }
70
71 /**
72 * @return array[]|bool The 'interwikimap' sub-array or false on failure.
73 */
74 protected function fetchLinks() {
75 $url = wfArrayToCgi( [
76 'action' => 'query',
77 'meta' => 'siteinfo',
78 'siprop' => 'interwikimap',
79 'sifilteriw' => 'local',
80 'format' => 'json'
81 ] );
82
83 if ( !empty( $this->source ) ) {
84 $url = rtrim( $this->source, '?' ) . '?' . $url;
85 }
86
87 $json = Http::get( $url );
88 $data = json_decode( $json, true );
89
90 if ( is_array( $data ) ) {
91 return $data['query']['interwikimap'];
92 } else {
93 return false;
94 }
95 }
96
97 /**
98 * @param array[] $data
99 * @param bool $force
100 *
101 * @return bool
102 */
103 protected function doPopulate( array $data, $force ) {
104 $dbw = wfGetDB( DB_MASTER );
105
106 if ( !$force ) {
107 $row = $dbw->selectRow(
108 'updatelog',
109 '1',
110 [ 'ul_key' => 'populate interwiki' ],
111 __METHOD__
112 );
113
114 if ( $row ) {
115 $this->output( "Interwiki table already populated. Use php " .
116 "maintenance/populateInterwiki.php\n--force from the command line " .
117 "to override.\n" );
118 return true;
119 }
120 }
121
122 foreach ( $data as $d ) {
123 $prefix = $d['prefix'];
124
125 $row = $dbw->selectRow(
126 'interwiki',
127 '1',
128 [ 'iw_prefix' => $prefix ],
129 __METHOD__
130 );
131
132 if ( !$row ) {
133 $dbw->insert(
134 'interwiki',
135 [
136 'iw_prefix' => $prefix,
137 'iw_url' => $d['url'],
138 'iw_local' => 1
139 ],
140 __METHOD__,
141 'IGNORE'
142 );
143 }
144
145 Interwiki::invalidateCache( $prefix );
146 }
147
148 $this->output( "Interwiki links are populated.\n" );
149
150 return true;
151 }
152
153 }
154
155 $maintClass = PopulateInterwiki::class;
156 require_once RUN_MAINTENANCE_IF_MAIN;