3 * Recent changes filtering by category.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
23 use Wikimedia\Rdbms\IDatabase
;
26 * The "CategoryFinder" class takes a list of articles, creates an internal
27 * representation of all their parent categories (as well as parents of
28 * parents etc.). From this representation, it determines which of these
29 * articles are in one or all of a given subset of categories.
33 * # Determines whether the article with the page_id 12345 is in both
34 * # "Category 1" and "Category 2" or their subcategories, respectively
36 * $cf = new CategoryFinder;
39 * [ 'Category 1', 'Category 2' ],
43 * print implode( ',' , $a );
46 class CategoryFinder
{
47 /** @var int[] The original article IDs passed to the seed function */
48 protected $articles = [];
50 /** @var array Array of DBKEY category names for categories that don't have a page */
51 protected $deadend = [];
53 /** @var array Array of [ ID => [] ] */
54 protected $parents = [];
56 /** @var array Array of article/category IDs */
59 /** @var array Array of DBKEY category names */
60 protected $targets = [];
63 protected $name2id = [];
65 /** @var string "AND" or "OR" */
68 /** @var IDatabase Read-DB replica DB */
72 * Initializes the instance. Do this prior to calling run().
73 * @param array $articleIds Array of article IDs
74 * @param array $categories FIXME
75 * @param string $mode FIXME, default 'AND'.
76 * @todo FIXME: $categories/$mode
78 public function seed( $articleIds, $categories, $mode = 'AND' ) {
79 $this->articles
= $articleIds;
80 $this->next
= $articleIds;
83 # Set the list of target categories; convert them to DBKEY form first
85 foreach ( $categories as $c ) {
86 $ct = Title
::makeTitleSafe( NS_CATEGORY
, $c );
89 $this->targets
[$c] = $c;
95 * Iterates through the parent tree starting with the seed values,
96 * then checks the articles if they match the conditions
97 * @return array Array of page_ids (those given to seed() that match the conditions)
99 public function run() {
100 $this->dbr
= wfGetDB( DB_REPLICA
);
101 while ( count( $this->next
) > 0 ) {
102 $this->scanNextLayer();
105 # Now check if this applies to the individual articles
108 foreach ( $this->articles
as $article ) {
109 $conds = $this->targets
;
110 if ( $this->check( $article, $conds ) ) {
111 # Matches the conditions
119 * Get the parents. Only really useful if run() has been called already
122 public function getParents() {
123 return $this->parents
;
127 * This functions recurses through the parent representation, trying to match the conditions
128 * @param int $id The article/category to check
129 * @param array $conds The array of categories to match
130 * @param array $path Used to check for recursion loops
131 * @return bool Does this match the conditions?
133 private function check( $id, &$conds, $path = [] ) {
134 // Check for loops and stop!
135 if ( in_array( $id, $path ) ) {
141 # Shortcut (runtime paranoia): No conditions=all matched
142 if ( count( $conds ) == 0 ) {
146 if ( !isset( $this->parents
[$id] ) ) {
150 # iterate through the parents
151 foreach ( $this->parents
[$id] as $p ) {
154 # Is this a condition?
155 if ( isset( $conds[$pname] ) ) {
156 # This key is in the category list!
157 if ( $this->mode
== 'OR' ) {
158 # One found, that's enough!
162 # Assuming "AND" as default
163 unset( $conds[$pname] );
164 if ( count( $conds ) == 0 ) {
165 # All conditions met, done
171 # Not done yet, try sub-parents
172 if ( !isset( $this->name2id
[$pname] ) ) {
176 $done = $this->check( $this->name2id
[$pname], $conds, $path );
177 if ( $done ||
count( $conds ) == 0 ) {
178 # Subparents have done it!
186 * Scans a "parent layer" of the articles/categories in $this->next
188 private function scanNextLayer() {
189 # Find all parents of the article currently in $this->next
191 $res = $this->dbr
->select(
192 /* FROM */ 'categorylinks',
194 /* WHERE */ [ 'cl_from' => $this->next
],
197 foreach ( $res as $o ) {
201 if ( !isset( $this->parents
[$o->cl_from
] ) ) {
202 $this->parents
[$o->cl_from
] = [];
204 $this->parents
[$o->cl_from
][$k] = $o;
206 # Ignore those we already have
207 if ( in_array( $k, $this->deadend
) ) {
211 if ( isset( $this->name2id
[$k] ) ) {
221 # Find the IDs of all category pages in $layer, if they exist
222 if ( count( $layer ) > 0 ) {
223 $res = $this->dbr
->select(
225 /* SELECT */ [ 'page_id', 'page_title' ],
226 /* WHERE */ [ 'page_namespace' => NS_CATEGORY
, 'page_title' => $layer ],
229 foreach ( $res as $o ) {
231 $name = $o->page_title
;
232 $this->name2id
[$name] = $id;
234 unset( $layer[$name] );
239 foreach ( $layer as $v ) {
240 $this->deadend
[$v] = $v;