* Document a bit
[lhc/web/wiklou.git] / includes / Categoryfinder.php
1 <?php
2
3 /**
4 * The "Categoryfinder" class takes a list of articles, creates an internal
5 * representation of all their parent categories (as well as parents of
6 * parents etc.). From this representation, it determines which of these
7 * articles are in one or all of a given subset of categories.
8 *
9 * Example use :
10 * <code>
11 * # Determines whether the article with the page_id 12345 is in both
12 * # "Category 1" and "Category 2" or their subcategories, respectively
13 *
14 * $cf = new Categoryfinder ;
15 * $cf->seed (
16 * array ( 12345 ) ,
17 * array ( "Category 1","Category 2" ) ,
18 * "AND"
19 * ) ;
20 * $a = $cf->run() ;
21 * print implode ( "," , $a ) ;
22 * </code>
23 *
24 */
25 class Categoryfinder {
26
27 var $articles = array () ; # The original article IDs passed to the seed function
28 var $deadend = array () ; # Array of DBKEY category names for categories that don't have a page
29 var $parents = array () ; # Array of [ID => array()]
30 var $next = array () ; # Array of article/category IDs
31 var $targets = array () ; # Array of DBKEY category names
32 var $name2id = array () ;
33 var $mode ; # "AND" or "OR"
34 var $dbr ; # Read-DB slave
35
36 /**
37 * Constructor (currently empty).
38 */
39 function __construct() {
40 }
41
42 /**
43 * Initializes the instance. Do this prior to calling run().
44 * @param $article_ids Array of article IDs
45 * @param $categories FIXME
46 * @param $mode String: FIXME, default 'AND'.
47 */
48 function seed ( $article_ids , $categories , $mode = "AND" ) {
49 $this->articles = $article_ids ;
50 $this->next = $article_ids ;
51 $this->mode = $mode ;
52
53 # Set the list of target categories; convert them to DBKEY form first
54 $this->targets = array () ;
55 foreach ( $categories AS $c ) {
56 $ct = Title::makeTitleSafe( NS_CATEGORY, $c );
57 if( $ct ) {
58 $c = $ct->getDBkey();
59 $this->targets[$c] = $c;
60 }
61 }
62 }
63
64 /**
65 * Iterates through the parent tree starting with the seed values,
66 * then checks the articles if they match the conditions
67 * @return array of page_ids (those given to seed() that match the conditions)
68 */
69 function run () {
70 $this->dbr = wfGetDB( DB_SLAVE );
71 while ( count ( $this->next ) > 0 ) {
72 $this->scan_next_layer () ;
73 }
74
75 # Now check if this applies to the individual articles
76 $ret = array () ;
77 foreach ( $this->articles AS $article ) {
78 $conds = $this->targets ;
79 if ( $this->check ( $article , $conds ) ) {
80 # Matches the conditions
81 $ret[] = $article ;
82 }
83 }
84 return $ret ;
85 }
86
87 /**
88 * This functions recurses through the parent representation, trying to match the conditions
89 * @param $id The article/category to check
90 * @param $conds The array of categories to match
91 * @param $path used to check for recursion loops
92 * @return bool Does this match the conditions?
93 */
94 function check ( $id , &$conds, $path=array() ) {
95 // Check for loops and stop!
96 if( in_array( $id, $path ) )
97 return false;
98 $path[] = $id;
99
100 # Shortcut (runtime paranoia): No contitions=all matched
101 if ( count ( $conds ) == 0 ) return true ;
102
103 if ( !isset ( $this->parents[$id] ) ) return false ;
104
105 # iterate through the parents
106 foreach ( $this->parents[$id] AS $p ) {
107 $pname = $p->cl_to ;
108
109 # Is this a condition?
110 if ( isset ( $conds[$pname] ) ) {
111 # This key is in the category list!
112 if ( $this->mode == "OR" ) {
113 # One found, that's enough!
114 $conds = array () ;
115 return true ;
116 } else {
117 # Assuming "AND" as default
118 unset ( $conds[$pname] ) ;
119 if ( count ( $conds ) == 0 ) {
120 # All conditions met, done
121 return true ;
122 }
123 }
124 }
125
126 # Not done yet, try sub-parents
127 if ( !isset ( $this->name2id[$pname] ) ) {
128 # No sub-parent
129 continue ;
130 }
131 $done = $this->check ( $this->name2id[$pname] , $conds, $path );
132 if ( $done OR count ( $conds ) == 0 ) {
133 # Subparents have done it!
134 return true ;
135 }
136 }
137 return false ;
138 }
139
140 /**
141 * Scans a "parent layer" of the articles/categories in $this->next
142 */
143 function scan_next_layer () {
144 $fname = "Categoryfinder::scan_next_layer" ;
145
146 # Find all parents of the article currently in $this->next
147 $layer = array () ;
148 $res = $this->dbr->select(
149 /* FROM */ 'categorylinks',
150 /* SELECT */ '*',
151 /* WHERE */ array( 'cl_from' => $this->next ),
152 $fname."-1"
153 );
154 while ( $o = $this->dbr->fetchObject( $res ) ) {
155 $k = $o->cl_to ;
156
157 # Update parent tree
158 if ( !isset ( $this->parents[$o->cl_from] ) ) {
159 $this->parents[$o->cl_from] = array () ;
160 }
161 $this->parents[$o->cl_from][$k] = $o ;
162
163 # Ignore those we already have
164 if ( in_array ( $k , $this->deadend ) ) continue ;
165 if ( isset ( $this->name2id[$k] ) ) continue ;
166
167 # Hey, new category!
168 $layer[$k] = $k ;
169 }
170 $this->dbr->freeResult( $res ) ;
171
172 $this->next = array() ;
173
174 # Find the IDs of all category pages in $layer, if they exist
175 if ( count ( $layer ) > 0 ) {
176 $res = $this->dbr->select(
177 /* FROM */ 'page',
178 /* SELECT */ 'page_id,page_title',
179 /* WHERE */ array( 'page_namespace' => NS_CATEGORY , 'page_title' => $layer ),
180 $fname."-2"
181 );
182 while ( $o = $this->dbr->fetchObject( $res ) ) {
183 $id = $o->page_id ;
184 $name = $o->page_title ;
185 $this->name2id[$name] = $id ;
186 $this->next[] = $id ;
187 unset ( $layer[$name] ) ;
188 }
189 $this->dbr->freeResult( $res ) ;
190 }
191
192 # Mark dead ends
193 foreach ( $layer AS $v ) {
194 $this->deadend[$v] = $v ;
195 }
196 }
197
198 } # END OF CLASS "Categoryfinder"