Follow up r62436 comments.
[lhc/web/wiklou.git] / includes / Categoryfinder.php
1 <?php
2 /**
3 * The "Categoryfinder" class takes a list of articles, creates an internal
4 * representation of all their parent categories (as well as parents of
5 * parents etc.). From this representation, it determines which of these
6 * articles are in one or all of a given subset of categories.
7 *
8 * Example use :
9 * <code>
10 * # Determines whether the article with the page_id 12345 is in both
11 * # "Category 1" and "Category 2" or their subcategories, respectively
12 *
13 * $cf = new Categoryfinder ;
14 * $cf->seed (
15 * array ( 12345 ) ,
16 * array ( "Category 1","Category 2" ) ,
17 * "AND"
18 * ) ;
19 * $a = $cf->run() ;
20 * print implode ( "," , $a ) ;
21 * </code>
22 *
23 */
24 class Categoryfinder {
25
26 var $articles = array(); # The original article IDs passed to the seed function
27 var $deadend = array(); # Array of DBKEY category names for categories that don't have a page
28 var $parents = array(); # Array of [ID => array()]
29 var $next = array(); # Array of article/category IDs
30 var $targets = array(); # Array of DBKEY category names
31 var $name2id = array();
32 var $mode; # "AND" or "OR"
33 var $dbr; # Read-DB slave
34
35 /**
36 * Constructor (currently empty).
37 */
38 function __construct() {
39 }
40
41 /**
42 * Initializes the instance. Do this prior to calling run().
43 * @param $article_ids Array of article IDs
44 * @param $categories FIXME
45 * @param $mode String: FIXME, default 'AND'.
46 */
47 function seed( $article_ids, $categories, $mode = "AND" ) {
48 $this->articles = $article_ids;
49 $this->next = $article_ids;
50 $this->mode = $mode;
51
52 # Set the list of target categories; convert them to DBKEY form first
53 $this->targets = array();
54 foreach ( $categories as $c ) {
55 $ct = Title::makeTitleSafe( NS_CATEGORY, $c );
56 if( $ct ) {
57 $c = $ct->getDBkey();
58 $this->targets[$c] = $c;
59 }
60 }
61 }
62
63 /**
64 * Iterates through the parent tree starting with the seed values,
65 * then checks the articles if they match the conditions
66 * @return array of page_ids (those given to seed() that match the conditions)
67 */
68 function run () {
69 $this->dbr = wfGetDB( DB_SLAVE );
70 while ( count ( $this->next ) > 0 ) {
71 $this->scan_next_layer();
72 }
73
74 # Now check if this applies to the individual articles
75 $ret = array();
76 foreach ( $this->articles as $article ) {
77 $conds = $this->targets;
78 if ( $this->check( $article, $conds ) ) {
79 # Matches the conditions
80 $ret[] = $article;
81 }
82 }
83 return $ret;
84 }
85
86 /**
87 * This functions recurses through the parent representation, trying to match the conditions
88 * @param $id The article/category to check
89 * @param $conds The array of categories to match
90 * @param $path used to check for recursion loops
91 * @return bool Does this match the conditions?
92 */
93 function check( $id , &$conds, $path = array() ) {
94 // Check for loops and stop!
95 if( in_array( $id, $path ) )
96 return false;
97 $path[] = $id;
98
99 # Shortcut (runtime paranoia): No contitions=all matched
100 if ( count( $conds ) == 0 ) return true;
101
102 if ( !isset( $this->parents[$id] ) ) return false;
103
104 # iterate through the parents
105 foreach ( $this->parents[$id] as $p ) {
106 $pname = $p->cl_to ;
107
108 # Is this a condition?
109 if ( isset( $conds[$pname] ) ) {
110 # This key is in the category list!
111 if ( $this->mode == "OR" ) {
112 # One found, that's enough!
113 $conds = array();
114 return true;
115 } else {
116 # Assuming "AND" as default
117 unset( $conds[$pname] ) ;
118 if ( count( $conds ) == 0 ) {
119 # All conditions met, done
120 return true;
121 }
122 }
123 }
124
125 # Not done yet, try sub-parents
126 if ( !isset( $this->name2id[$pname] ) ) {
127 # No sub-parent
128 continue ;
129 }
130 $done = $this->check( $this->name2id[$pname], $conds,$path );
131 if ( $done || count( $conds ) == 0 ) {
132 # Subparents have done it!
133 return true;
134 }
135 }
136 return false;
137 }
138
139 /**
140 * Scans a "parent layer" of the articles/categories in $this->next
141 */
142 function scan_next_layer() {
143 # Find all parents of the article currently in $this->next
144 $layer = array();
145 $res = $this->dbr->select(
146 /* FROM */ 'categorylinks',
147 /* SELECT */ '*',
148 /* WHERE */ array( 'cl_from' => $this->next ),
149 __METHOD__ . "-1"
150 );
151 while ( $o = $this->dbr->fetchObject( $res ) ) {
152 $k = $o->cl_to ;
153
154 # Update parent tree
155 if ( !isset( $this->parents[$o->cl_from] ) ) {
156 $this->parents[$o->cl_from] = array();
157 }
158 $this->parents[$o->cl_from][$k] = $o;
159
160 # Ignore those we already have
161 if ( in_array ( $k , $this->deadend ) ) continue;
162 if ( isset ( $this->name2id[$k] ) ) continue;
163
164 # Hey, new category!
165 $layer[$k] = $k;
166 }
167
168 $this->next = array();
169
170 # Find the IDs of all category pages in $layer, if they exist
171 if ( count ( $layer ) > 0 ) {
172 $res = $this->dbr->select(
173 /* FROM */ 'page',
174 /* SELECT */ array( 'page_id', 'page_title' ),
175 /* WHERE */ array( 'page_namespace' => NS_CATEGORY , 'page_title' => $layer ),
176 __METHOD__ . "-2"
177 );
178 while ( $o = $this->dbr->fetchObject( $res ) ) {
179 $id = $o->page_id;
180 $name = $o->page_title;
181 $this->name2id[$name] = $id;
182 $this->next[] = $id;
183 unset( $layer[$name] );
184 }
185 }
186
187 # Mark dead ends
188 foreach ( $layer as $v ) {
189 $this->deadend[$v] = $v;
190 }
191 }
192
193 } # END OF CLASS "Categoryfinder"