widget: Fix changes of copyright year
[lhc/web/wiklou.git] / includes / CategoryFinder.php
1 <?php
2 /**
3 * Recent changes filtering by category.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 use Wikimedia\Rdbms\IDatabase;
24
25 /**
26 * The "CategoryFinder" class takes a list of articles, creates an internal
27 * representation of all their parent categories (as well as parents of
28 * parents etc.). From this representation, it determines which of these
29 * articles are in one or all of a given subset of categories.
30 *
31 * Example use :
32 * @code
33 * # Determines whether the article with the page_id 12345 is in both
34 * # "Category 1" and "Category 2" or their subcategories, respectively
35 *
36 * $cf = new CategoryFinder;
37 * $cf->seed(
38 * [ 12345 ],
39 * [ 'Category 1', 'Category 2' ],
40 * 'AND'
41 * );
42 * $a = $cf->run();
43 * print implode( ',' , $a );
44 * @endcode
45 */
46 class CategoryFinder {
47 /** @var int[] The original article IDs passed to the seed function */
48 protected $articles = [];
49
50 /** @var array Array of DBKEY category names for categories that don't have a page */
51 protected $deadend = [];
52
53 /** @var array Array of [ ID => [] ] */
54 protected $parents = [];
55
56 /** @var array Array of article/category IDs */
57 protected $next = [];
58
59 /** @var int Max layer depth **/
60 protected $maxdepth = -1;
61
62 /** @var array Array of DBKEY category names */
63 protected $targets = [];
64
65 /** @var array */
66 protected $name2id = [];
67
68 /** @var string "AND" or "OR" */
69 protected $mode;
70
71 /** @var IDatabase Read-DB replica DB */
72 protected $dbr;
73
74 /**
75 * Initializes the instance. Do this prior to calling run().
76 * @param array $articleIds Array of article IDs
77 * @param array $categories FIXME
78 * @param string $mode FIXME, default 'AND'.
79 * @param int $maxdepth Maximum layer depth. Where:
80 * -1 means deep recursion (default);
81 * 0 means no-parents;
82 * 1 means one parent layer, etc.
83 * @todo FIXME: $categories/$mode
84 */
85 public function seed( $articleIds, $categories, $mode = 'AND', $maxdepth = -1 ) {
86 $this->articles = $articleIds;
87 $this->next = $articleIds;
88 $this->mode = $mode;
89 $this->maxdepth = $maxdepth;
90
91 # Set the list of target categories; convert them to DBKEY form first
92 $this->targets = [];
93 foreach ( $categories as $c ) {
94 $ct = Title::makeTitleSafe( NS_CATEGORY, $c );
95 if ( $ct ) {
96 $c = $ct->getDBkey();
97 $this->targets[$c] = $c;
98 }
99 }
100 }
101
102 /**
103 * Iterates through the parent tree starting with the seed values,
104 * then checks the articles if they match the conditions
105 * @return array Array of page_ids (those given to seed() that match the conditions)
106 */
107 public function run() {
108 $this->dbr = wfGetDB( DB_REPLICA );
109
110 $i = 0;
111 $dig = true;
112 while ( count( $this->next ) && $dig ) {
113 $this->scanNextLayer();
114
115 // Is there any depth limit?
116 if ( $this->maxdepth !== -1 ) {
117 $dig = $i < $this->maxdepth;
118 $i++;
119 }
120 }
121
122 # Now check if this applies to the individual articles
123 $ret = [];
124
125 foreach ( $this->articles as $article ) {
126 $conds = $this->targets;
127 if ( $this->check( $article, $conds ) ) {
128 # Matches the conditions
129 $ret[] = $article;
130 }
131 }
132 return $ret;
133 }
134
135 /**
136 * Get the parents. Only really useful if run() has been called already
137 * @return array
138 */
139 public function getParents() {
140 return $this->parents;
141 }
142
143 /**
144 * This functions recurses through the parent representation, trying to match the conditions
145 * @param int $id The article/category to check
146 * @param array $conds The array of categories to match
147 * @param array $path Used to check for recursion loops
148 * @return bool Does this match the conditions?
149 */
150 private function check( $id, &$conds, $path = [] ) {
151 // Check for loops and stop!
152 if ( in_array( $id, $path ) ) {
153 return false;
154 }
155
156 $path[] = $id;
157
158 # Shortcut (runtime paranoia): No conditions=all matched
159 if ( count( $conds ) == 0 ) {
160 return true;
161 }
162
163 if ( !isset( $this->parents[$id] ) ) {
164 return false;
165 }
166
167 # iterate through the parents
168 foreach ( $this->parents[$id] as $p ) {
169 $pname = $p->cl_to;
170
171 # Is this a condition?
172 if ( isset( $conds[$pname] ) ) {
173 # This key is in the category list!
174 if ( $this->mode == 'OR' ) {
175 # One found, that's enough!
176 $conds = [];
177 return true;
178 } else {
179 # Assuming "AND" as default
180 unset( $conds[$pname] );
181 if ( count( $conds ) == 0 ) {
182 # All conditions met, done
183 return true;
184 }
185 }
186 }
187
188 # Not done yet, try sub-parents
189 if ( !isset( $this->name2id[$pname] ) ) {
190 # No sub-parent
191 continue;
192 }
193 $done = $this->check( $this->name2id[$pname], $conds, $path );
194 if ( $done || count( $conds ) == 0 ) {
195 # Subparents have done it!
196 return true;
197 }
198 }
199 return false;
200 }
201
202 /**
203 * Scans a "parent layer" of the articles/categories in $this->next
204 */
205 private function scanNextLayer() {
206 # Find all parents of the article currently in $this->next
207 $layer = [];
208 $res = $this->dbr->select(
209 /* FROM */ 'categorylinks',
210 /* SELECT */ [ 'cl_to', 'cl_from' ],
211 /* WHERE */ [ 'cl_from' => $this->next ],
212 __METHOD__ . '-1'
213 );
214 foreach ( $res as $o ) {
215 $k = $o->cl_to;
216
217 # Update parent tree
218 if ( !isset( $this->parents[$o->cl_from] ) ) {
219 $this->parents[$o->cl_from] = [];
220 }
221 $this->parents[$o->cl_from][$k] = $o;
222
223 # Ignore those we already have
224 if ( in_array( $k, $this->deadend ) ) {
225 continue;
226 }
227
228 if ( isset( $this->name2id[$k] ) ) {
229 continue;
230 }
231
232 # Hey, new category!
233 $layer[$k] = $k;
234 }
235
236 $this->next = [];
237
238 # Find the IDs of all category pages in $layer, if they exist
239 if ( count( $layer ) > 0 ) {
240 $res = $this->dbr->select(
241 /* FROM */ 'page',
242 /* SELECT */ [ 'page_id', 'page_title' ],
243 /* WHERE */ [ 'page_namespace' => NS_CATEGORY, 'page_title' => $layer ],
244 __METHOD__ . '-2'
245 );
246 foreach ( $res as $o ) {
247 $id = $o->page_id;
248 $name = $o->page_title;
249 $this->name2id[$name] = $id;
250 $this->next[] = $id;
251 unset( $layer[$name] );
252 }
253 }
254
255 # Mark dead ends
256 foreach ( $layer as $v ) {
257 $this->deadend[$v] = $v;
258 }
259 }
260 }