Limit test leakage, $wgCapitalLinks expected to be true
[mediawiki.git] / includes / Categoryfinder.php
blobb9cbc9a3f5fef2dbdbdad5883537ae7016936122
1 <?php
2 /**
3 * Recent changes filtering by category.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
23 /**
24 * The "Categoryfinder" class takes a list of articles, creates an internal
25 * representation of all their parent categories (as well as parents of
26 * parents etc.). From this representation, it determines which of these
27 * articles are in one or all of a given subset of categories.
29 * Example use :
30 * <code>
31 * # Determines whether the article with the page_id 12345 is in both
32 * # "Category 1" and "Category 2" or their subcategories, respectively
34 * $cf = new Categoryfinder;
35 * $cf->seed(
36 * array( 12345 ),
37 * array( 'Category 1', 'Category 2' ),
38 * 'AND'
39 * );
40 * $a = $cf->run();
41 * print implode( ',' , $a );
42 * </code>
45 class Categoryfinder {
46 /** @var int[] The original article IDs passed to the seed function */
47 protected $articles = array();
49 /** @var array Array of DBKEY category names for categories that don't have a page */
50 protected $deadend = array();
52 /** @var array Array of [ID => array()] */
53 protected $parents = array();
55 /** @var array Array of article/category IDs */
56 protected $next = array();
58 /** @var array Array of DBKEY category names */
59 protected $targets = array();
61 /** @var array */
62 protected $name2id = array();
64 /** @var "AND" or "OR" */
65 protected $mode;
67 /** @var DatabaseBase Read-DB slave */
68 protected $dbr;
70 function __construct() {
73 /**
74 * Initializes the instance. Do this prior to calling run().
75 * @param array $article_ids Array of article IDs
76 * @param array $categories FIXME
77 * @param string $mode FIXME, default 'AND'.
78 * @todo FIXME: $categories/$mode
80 function seed( $article_ids, $categories, $mode = 'AND' ) {
81 $this->articles = $article_ids;
82 $this->next = $article_ids;
83 $this->mode = $mode;
85 # Set the list of target categories; convert them to DBKEY form first
86 $this->targets = array();
87 foreach ( $categories as $c ) {
88 $ct = Title::makeTitleSafe( NS_CATEGORY, $c );
89 if ( $ct ) {
90 $c = $ct->getDBkey();
91 $this->targets[$c] = $c;
96 /**
97 * Iterates through the parent tree starting with the seed values,
98 * then checks the articles if they match the conditions
99 * @return array of page_ids (those given to seed() that match the conditions)
101 function run() {
102 $this->dbr = wfGetDB( DB_SLAVE );
103 while ( count( $this->next ) > 0 ) {
104 $this->scan_next_layer();
107 # Now check if this applies to the individual articles
108 $ret = array();
110 foreach ( $this->articles as $article ) {
111 $conds = $this->targets;
112 if ( $this->check( $article, $conds ) ) {
113 # Matches the conditions
114 $ret[] = $article;
117 return $ret;
121 * This functions recurses through the parent representation, trying to match the conditions
122 * @param int $id The article/category to check
123 * @param array $conds The array of categories to match
124 * @param array $path Used to check for recursion loops
125 * @return bool Does this match the conditions?
127 function check( $id, &$conds, $path = array() ) {
128 // Check for loops and stop!
129 if ( in_array( $id, $path ) ) {
130 return false;
133 $path[] = $id;
135 # Shortcut (runtime paranoia): No conditions=all matched
136 if ( count( $conds ) == 0 ) {
137 return true;
140 if ( !isset( $this->parents[$id] ) ) {
141 return false;
144 # iterate through the parents
145 foreach ( $this->parents[$id] as $p ) {
146 $pname = $p->cl_to;
148 # Is this a condition?
149 if ( isset( $conds[$pname] ) ) {
150 # This key is in the category list!
151 if ( $this->mode == 'OR' ) {
152 # One found, that's enough!
153 $conds = array();
154 return true;
155 } else {
156 # Assuming "AND" as default
157 unset( $conds[$pname] );
158 if ( count( $conds ) == 0 ) {
159 # All conditions met, done
160 return true;
165 # Not done yet, try sub-parents
166 if ( !isset( $this->name2id[$pname] ) ) {
167 # No sub-parent
168 continue;
170 $done = $this->check( $this->name2id[$pname], $conds, $path );
171 if ( $done || count( $conds ) == 0 ) {
172 # Subparents have done it!
173 return true;
176 return false;
180 * Scans a "parent layer" of the articles/categories in $this->next
182 function scan_next_layer() {
183 wfProfileIn( __METHOD__ );
185 # Find all parents of the article currently in $this->next
186 $layer = array();
187 $res = $this->dbr->select(
188 /* FROM */ 'categorylinks',
189 /* SELECT */ '*',
190 /* WHERE */ array( 'cl_from' => $this->next ),
191 __METHOD__ . '-1'
193 foreach ( $res as $o ) {
194 $k = $o->cl_to;
196 # Update parent tree
197 if ( !isset( $this->parents[$o->cl_from] ) ) {
198 $this->parents[$o->cl_from] = array();
200 $this->parents[$o->cl_from][$k] = $o;
202 # Ignore those we already have
203 if ( in_array( $k, $this->deadend ) ) {
204 continue;
207 if ( isset( $this->name2id[$k] ) ) {
208 continue;
211 # Hey, new category!
212 $layer[$k] = $k;
215 $this->next = array();
217 # Find the IDs of all category pages in $layer, if they exist
218 if ( count( $layer ) > 0 ) {
219 $res = $this->dbr->select(
220 /* FROM */ 'page',
221 /* SELECT */ array( 'page_id', 'page_title' ),
222 /* WHERE */ array( 'page_namespace' => NS_CATEGORY, 'page_title' => $layer ),
223 __METHOD__ . '-2'
225 foreach ( $res as $o ) {
226 $id = $o->page_id;
227 $name = $o->page_title;
228 $this->name2id[$name] = $id;
229 $this->next[] = $id;
230 unset( $layer[$name] );
234 # Mark dead ends
235 foreach ( $layer as $v ) {
236 $this->deadend[$v] = $v;
239 wfProfileOut( __METHOD__ );