(bug 37755) Set robot meta tags for 'view source' pages
[lhc/web/wiklou.git] / includes / Categoryfinder.php
1 <?php
2 /**
3 * Recent changes filtering by category.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 /**
24 * The "Categoryfinder" class takes a list of articles, creates an internal
25 * representation of all their parent categories (as well as parents of
26 * parents etc.). From this representation, it determines which of these
27 * articles are in one or all of a given subset of categories.
28 *
29 * Example use :
30 * <code>
31 * # Determines whether the article with the page_id 12345 is in both
32 * # "Category 1" and "Category 2" or their subcategories, respectively
33 *
34 * $cf = new Categoryfinder;
35 * $cf->seed(
36 * array( 12345 ),
37 * array( 'Category 1', 'Category 2' ),
38 * 'AND'
39 * );
40 * $a = $cf->run();
41 * print implode( ',' , $a );
42 * </code>
43 *
44 */
45 class Categoryfinder {
46 var $articles = array(); # The original article IDs passed to the seed function
47 var $deadend = array(); # Array of DBKEY category names for categories that don't have a page
48 var $parents = array(); # Array of [ID => array()]
49 var $next = array(); # Array of article/category IDs
50 var $targets = array(); # Array of DBKEY category names
51 var $name2id = array();
52 var $mode; # "AND" or "OR"
53
54 /**
55 * @var DatabaseBase
56 */
57 var $dbr; # Read-DB slave
58
59 /**
60 * Constructor (currently empty).
61 */
62 function __construct() {
63 }
64
65 /**
66 * Initializes the instance. Do this prior to calling run().
67 * @param $article_ids Array of article IDs
68 * @param $categories FIXME
69 * @param $mode String: FIXME, default 'AND'.
70 * @todo FIXME: $categories/$mode
71 */
72 function seed( $article_ids, $categories, $mode = 'AND' ) {
73 $this->articles = $article_ids;
74 $this->next = $article_ids;
75 $this->mode = $mode;
76
77 # Set the list of target categories; convert them to DBKEY form first
78 $this->targets = array();
79 foreach ( $categories as $c ) {
80 $ct = Title::makeTitleSafe( NS_CATEGORY, $c );
81 if ( $ct ) {
82 $c = $ct->getDBkey();
83 $this->targets[$c] = $c;
84 }
85 }
86 }
87
88 /**
89 * Iterates through the parent tree starting with the seed values,
90 * then checks the articles if they match the conditions
91 * @return array of page_ids (those given to seed() that match the conditions)
92 */
93 function run() {
94 $this->dbr = wfGetDB( DB_SLAVE );
95 while ( count( $this->next ) > 0 ) {
96 $this->scan_next_layer();
97 }
98
99 # Now check if this applies to the individual articles
100 $ret = array();
101
102 foreach ( $this->articles as $article ) {
103 $conds = $this->targets;
104 if ( $this->check( $article, $conds ) ) {
105 # Matches the conditions
106 $ret[] = $article;
107 }
108 }
109 return $ret;
110 }
111
112 /**
113 * This functions recurses through the parent representation, trying to match the conditions
114 * @param $id int The article/category to check
115 * @param $conds array The array of categories to match
116 * @param $path array used to check for recursion loops
117 * @return bool Does this match the conditions?
118 */
119 function check( $id, &$conds, $path = array() ) {
120 // Check for loops and stop!
121 if ( in_array( $id, $path ) ) {
122 return false;
123 }
124
125 $path[] = $id;
126
127 # Shortcut (runtime paranoia): No contitions=all matched
128 if ( count( $conds ) == 0 ) {
129 return true;
130 }
131
132 if ( !isset( $this->parents[$id] ) ) {
133 return false;
134 }
135
136 # iterate through the parents
137 foreach ( $this->parents[$id] as $p ) {
138 $pname = $p->cl_to ;
139
140 # Is this a condition?
141 if ( isset( $conds[$pname] ) ) {
142 # This key is in the category list!
143 if ( $this->mode == 'OR' ) {
144 # One found, that's enough!
145 $conds = array();
146 return true;
147 } else {
148 # Assuming "AND" as default
149 unset( $conds[$pname] );
150 if ( count( $conds ) == 0 ) {
151 # All conditions met, done
152 return true;
153 }
154 }
155 }
156
157 # Not done yet, try sub-parents
158 if ( !isset( $this->name2id[$pname] ) ) {
159 # No sub-parent
160 continue;
161 }
162 $done = $this->check( $this->name2id[$pname], $conds, $path );
163 if ( $done || count( $conds ) == 0 ) {
164 # Subparents have done it!
165 return true;
166 }
167 }
168 return false;
169 }
170
171 /**
172 * Scans a "parent layer" of the articles/categories in $this->next
173 */
174 function scan_next_layer() {
175 # Find all parents of the article currently in $this->next
176 $layer = array();
177 $res = $this->dbr->select(
178 /* FROM */ 'categorylinks',
179 /* SELECT */ '*',
180 /* WHERE */ array( 'cl_from' => $this->next ),
181 __METHOD__ . '-1'
182 );
183 foreach ( $res as $o ) {
184 $k = $o->cl_to;
185
186 # Update parent tree
187 if ( !isset( $this->parents[$o->cl_from] ) ) {
188 $this->parents[$o->cl_from] = array();
189 }
190 $this->parents[$o->cl_from][$k] = $o;
191
192 # Ignore those we already have
193 if ( in_array( $k, $this->deadend ) ) {
194 continue;
195 }
196
197 if ( isset( $this->name2id[$k] ) ) {
198 continue;
199 }
200
201 # Hey, new category!
202 $layer[$k] = $k;
203 }
204
205 $this->next = array();
206
207 # Find the IDs of all category pages in $layer, if they exist
208 if ( count( $layer ) > 0 ) {
209 $res = $this->dbr->select(
210 /* FROM */ 'page',
211 /* SELECT */ array( 'page_id', 'page_title' ),
212 /* WHERE */ array( 'page_namespace' => NS_CATEGORY , 'page_title' => $layer ),
213 __METHOD__ . '-2'
214 );
215 foreach ( $res as $o ) {
216 $id = $o->page_id;
217 $name = $o->page_title;
218 $this->name2id[$name] = $id;
219 $this->next[] = $id;
220 unset( $layer[$name] );
221 }
222 }
223
224 # Mark dead ends
225 foreach ( $layer as $v ) {
226 $this->deadend[$v] = $v;
227 }
228 }
229
230 }