Merge "rcfeed: Add basic PHPUnit integration test"
[mediawiki.git] / includes / CategoryFinder.php
blob504b35f885d9c6a674a6a56fdefa4bc42963a87f
1 <?php
2 /**
3 * Recent changes filtering by category.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
23 /**
24 * The "CategoryFinder" class takes a list of articles, creates an internal
25 * representation of all their parent categories (as well as parents of
26 * parents etc.). From this representation, it determines which of these
27 * articles are in one or all of a given subset of categories.
29 * Example use :
30 * @code
31 * # Determines whether the article with the page_id 12345 is in both
32 * # "Category 1" and "Category 2" or their subcategories, respectively
34 * $cf = new CategoryFinder;
35 * $cf->seed(
36 * [ 12345 ],
37 * [ 'Category 1', 'Category 2' ],
38 * 'AND'
39 * );
40 * $a = $cf->run();
41 * print implode( ',' , $a );
42 * @endcode
44 class CategoryFinder {
45 /** @var int[] The original article IDs passed to the seed function */
46 protected $articles = [];
48 /** @var array Array of DBKEY category names for categories that don't have a page */
49 protected $deadend = [];
51 /** @var array Array of [ ID => [] ] */
52 protected $parents = [];
54 /** @var array Array of article/category IDs */
55 protected $next = [];
57 /** @var array Array of DBKEY category names */
58 protected $targets = [];
60 /** @var array */
61 protected $name2id = [];
63 /** @var string "AND" or "OR" */
64 protected $mode;
66 /** @var IDatabase Read-DB replica DB */
67 protected $dbr;
69 /**
70 * Initializes the instance. Do this prior to calling run().
71 * @param array $articleIds Array of article IDs
72 * @param array $categories FIXME
73 * @param string $mode FIXME, default 'AND'.
74 * @todo FIXME: $categories/$mode
76 public function seed( $articleIds, $categories, $mode = 'AND' ) {
77 $this->articles = $articleIds;
78 $this->next = $articleIds;
79 $this->mode = $mode;
81 # Set the list of target categories; convert them to DBKEY form first
82 $this->targets = [];
83 foreach ( $categories as $c ) {
84 $ct = Title::makeTitleSafe( NS_CATEGORY, $c );
85 if ( $ct ) {
86 $c = $ct->getDBkey();
87 $this->targets[$c] = $c;
92 /**
93 * Iterates through the parent tree starting with the seed values,
94 * then checks the articles if they match the conditions
95 * @return array Array of page_ids (those given to seed() that match the conditions)
97 public function run() {
98 $this->dbr = wfGetDB( DB_REPLICA );
99 while ( count( $this->next ) > 0 ) {
100 $this->scanNextLayer();
103 # Now check if this applies to the individual articles
104 $ret = [];
106 foreach ( $this->articles as $article ) {
107 $conds = $this->targets;
108 if ( $this->check( $article, $conds ) ) {
109 # Matches the conditions
110 $ret[] = $article;
113 return $ret;
117 * Get the parents. Only really useful if run() has been called already
118 * @return array
120 public function getParents() {
121 return $this->parents;
125 * This functions recurses through the parent representation, trying to match the conditions
126 * @param int $id The article/category to check
127 * @param array $conds The array of categories to match
128 * @param array $path Used to check for recursion loops
129 * @return bool Does this match the conditions?
131 private function check( $id, &$conds, $path = [] ) {
132 // Check for loops and stop!
133 if ( in_array( $id, $path ) ) {
134 return false;
137 $path[] = $id;
139 # Shortcut (runtime paranoia): No conditions=all matched
140 if ( count( $conds ) == 0 ) {
141 return true;
144 if ( !isset( $this->parents[$id] ) ) {
145 return false;
148 # iterate through the parents
149 foreach ( $this->parents[$id] as $p ) {
150 $pname = $p->cl_to;
152 # Is this a condition?
153 if ( isset( $conds[$pname] ) ) {
154 # This key is in the category list!
155 if ( $this->mode == 'OR' ) {
156 # One found, that's enough!
157 $conds = [];
158 return true;
159 } else {
160 # Assuming "AND" as default
161 unset( $conds[$pname] );
162 if ( count( $conds ) == 0 ) {
163 # All conditions met, done
164 return true;
169 # Not done yet, try sub-parents
170 if ( !isset( $this->name2id[$pname] ) ) {
171 # No sub-parent
172 continue;
174 $done = $this->check( $this->name2id[$pname], $conds, $path );
175 if ( $done || count( $conds ) == 0 ) {
176 # Subparents have done it!
177 return true;
180 return false;
184 * Scans a "parent layer" of the articles/categories in $this->next
186 private function scanNextLayer() {
188 # Find all parents of the article currently in $this->next
189 $layer = [];
190 $res = $this->dbr->select(
191 /* FROM */ 'categorylinks',
192 /* SELECT */ '*',
193 /* WHERE */ [ 'cl_from' => $this->next ],
194 __METHOD__ . '-1'
196 foreach ( $res as $o ) {
197 $k = $o->cl_to;
199 # Update parent tree
200 if ( !isset( $this->parents[$o->cl_from] ) ) {
201 $this->parents[$o->cl_from] = [];
203 $this->parents[$o->cl_from][$k] = $o;
205 # Ignore those we already have
206 if ( in_array( $k, $this->deadend ) ) {
207 continue;
210 if ( isset( $this->name2id[$k] ) ) {
211 continue;
214 # Hey, new category!
215 $layer[$k] = $k;
218 $this->next = [];
220 # Find the IDs of all category pages in $layer, if they exist
221 if ( count( $layer ) > 0 ) {
222 $res = $this->dbr->select(
223 /* FROM */ 'page',
224 /* SELECT */ [ 'page_id', 'page_title' ],
225 /* WHERE */ [ 'page_namespace' => NS_CATEGORY, 'page_title' => $layer ],
226 __METHOD__ . '-2'
228 foreach ( $res as $o ) {
229 $id = $o->page_id;
230 $name = $o->page_title;
231 $this->name2id[$name] = $id;
232 $this->next[] = $id;
233 unset( $layer[$name] );
237 # Mark dead ends
238 foreach ( $layer as $v ) {
239 $this->deadend[$v] = $v;