Merge "Title: Title::getSubpage should not lose the interwiki prefix"
[lhc/web/wiklou.git] / includes / CategoryFinder.php
1 <?php
2 /**
3 * Recent changes filtering by category.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 use Wikimedia\Rdbms\IDatabase;
24
25 /**
26 * The "CategoryFinder" class takes a list of articles, creates an internal
27 * representation of all their parent categories (as well as parents of
28 * parents etc.). From this representation, it determines which of these
29 * articles are in one or all of a given subset of categories.
30 *
31 * Example use :
32 * @code
33 * # Determines whether the article with the page_id 12345 is in both
34 * # "Category 1" and "Category 2" or their subcategories, respectively
35 *
36 * $cf = new CategoryFinder;
37 * $cf->seed(
38 * [ 12345 ],
39 * [ 'Category 1', 'Category 2' ],
40 * 'AND'
41 * );
42 * $a = $cf->run();
43 * print implode( ',' , $a );
44 * @endcode
45 *
46 * @deprecated since 1.31
47 */
48 class CategoryFinder {
49 /** @var int[] The original article IDs passed to the seed function */
50 protected $articles = [];
51
52 /** @var array Array of DBKEY category names for categories that don't have a page */
53 protected $deadend = [];
54
55 /** @var array Array of [ ID => [] ] */
56 protected $parents = [];
57
58 /** @var array Array of article/category IDs */
59 protected $next = [];
60
61 /** @var int Max layer depth **/
62 protected $maxdepth = -1;
63
64 /** @var array Array of DBKEY category names */
65 protected $targets = [];
66
67 /** @var array */
68 protected $name2id = [];
69
70 /** @var string "AND" or "OR" */
71 protected $mode;
72
73 /** @var IDatabase Read-DB replica DB */
74 protected $dbr;
75
76 /**
77 * Initializes the instance. Do this prior to calling run().
78 * @param array $articleIds Array of article IDs
79 * @param array $categories FIXME
80 * @param string $mode FIXME, default 'AND'.
81 * @param int $maxdepth Maximum layer depth. Where:
82 * -1 means deep recursion (default);
83 * 0 means no-parents;
84 * 1 means one parent layer, etc.
85 * @todo FIXME: $categories/$mode
86 */
87 public function seed( $articleIds, $categories, $mode = 'AND', $maxdepth = -1 ) {
88 $this->articles = $articleIds;
89 $this->next = $articleIds;
90 $this->mode = $mode;
91 $this->maxdepth = $maxdepth;
92
93 # Set the list of target categories; convert them to DBKEY form first
94 $this->targets = [];
95 foreach ( $categories as $c ) {
96 $ct = Title::makeTitleSafe( NS_CATEGORY, $c );
97 if ( $ct ) {
98 $c = $ct->getDBkey();
99 $this->targets[$c] = $c;
100 }
101 }
102 }
103
104 /**
105 * Iterates through the parent tree starting with the seed values,
106 * then checks the articles if they match the conditions
107 * @return array Array of page_ids (those given to seed() that match the conditions)
108 */
109 public function run() {
110 $this->dbr = wfGetDB( DB_REPLICA );
111
112 $i = 0;
113 $dig = true;
114 while ( count( $this->next ) && $dig ) {
115 $this->scanNextLayer();
116
117 // Is there any depth limit?
118 if ( $this->maxdepth !== -1 ) {
119 $dig = $i < $this->maxdepth;
120 $i++;
121 }
122 }
123
124 # Now check if this applies to the individual articles
125 $ret = [];
126
127 foreach ( $this->articles as $article ) {
128 $conds = $this->targets;
129 if ( $this->check( $article, $conds ) ) {
130 # Matches the conditions
131 $ret[] = $article;
132 }
133 }
134 return $ret;
135 }
136
137 /**
138 * Get the parents. Only really useful if run() has been called already
139 * @return array
140 */
141 public function getParents() {
142 return $this->parents;
143 }
144
145 /**
146 * This functions recurses through the parent representation, trying to match the conditions
147 * @param int $id The article/category to check
148 * @param array $conds The array of categories to match
149 * @param array $path Used to check for recursion loops
150 * @return bool Does this match the conditions?
151 */
152 private function check( $id, &$conds, $path = [] ) {
153 // Check for loops and stop!
154 if ( in_array( $id, $path ) ) {
155 return false;
156 }
157
158 $path[] = $id;
159
160 # Shortcut (runtime paranoia): No conditions=all matched
161 if ( count( $conds ) == 0 ) {
162 return true;
163 }
164
165 if ( !isset( $this->parents[$id] ) ) {
166 return false;
167 }
168
169 # iterate through the parents
170 foreach ( $this->parents[$id] as $p ) {
171 $pname = $p->cl_to;
172
173 # Is this a condition?
174 if ( isset( $conds[$pname] ) ) {
175 # This key is in the category list!
176 if ( $this->mode == 'OR' ) {
177 # One found, that's enough!
178 $conds = [];
179 return true;
180 } else {
181 # Assuming "AND" as default
182 unset( $conds[$pname] );
183 if ( count( $conds ) == 0 ) {
184 # All conditions met, done
185 return true;
186 }
187 }
188 }
189
190 # Not done yet, try sub-parents
191 if ( !isset( $this->name2id[$pname] ) ) {
192 # No sub-parent
193 continue;
194 }
195 $done = $this->check( $this->name2id[$pname], $conds, $path );
196 if ( $done || count( $conds ) == 0 ) {
197 # Subparents have done it!
198 return true;
199 }
200 }
201 return false;
202 }
203
204 /**
205 * Scans a "parent layer" of the articles/categories in $this->next
206 */
207 private function scanNextLayer() {
208 # Find all parents of the article currently in $this->next
209 $layer = [];
210 $res = $this->dbr->select(
211 /* FROM */ 'categorylinks',
212 /* SELECT */ [ 'cl_to', 'cl_from' ],
213 /* WHERE */ [ 'cl_from' => $this->next ],
214 __METHOD__ . '-1'
215 );
216 foreach ( $res as $row ) {
217 $k = $row->cl_to;
218
219 # Update parent tree
220 if ( !isset( $this->parents[$row->cl_from] ) ) {
221 $this->parents[$row->cl_from] = [];
222 }
223 $this->parents[$row->cl_from][$k] = $row;
224
225 # Ignore those we already have
226 if ( in_array( $k, $this->deadend ) ) {
227 continue;
228 }
229
230 if ( isset( $this->name2id[$k] ) ) {
231 continue;
232 }
233
234 # Hey, new category!
235 $layer[$k] = $k;
236 }
237
238 $this->next = [];
239
240 # Find the IDs of all category pages in $layer, if they exist
241 if ( count( $layer ) > 0 ) {
242 $res = $this->dbr->select(
243 /* FROM */ 'page',
244 /* SELECT */ [ 'page_id', 'page_title' ],
245 /* WHERE */ [ 'page_namespace' => NS_CATEGORY, 'page_title' => $layer ],
246 __METHOD__ . '-2'
247 );
248 foreach ( $res as $row ) {
249 $id = $row->page_id;
250 $name = $row->page_title;
251 $this->name2id[$name] = $id;
252 $this->next[] = $id;
253 unset( $layer[$name] );
254 }
255 }
256
257 # Mark dead ends
258 foreach ( $layer as $v ) {
259 $this->deadend[$v] = $v;
260 }
261 }
262 }