Fix namespace handling for uncategorized-categories-exceptionlist
[mediawiki.git] / tests / phpunit / includes / LinkFilterTest.php
blob428b0129a0d9e3bbb877cc5bbcdeceb685e366f8
1 <?php
3 /**
4 * @group Database
5 */
6 class LinkFilterTest extends MediaWikiLangTestCase {
8 protected function setUp() {
9 parent::setUp();
11 $this->setMwGlobals( 'wgUrlProtocols', [
12 'http://',
13 'https://',
14 'ftp://',
15 'irc://',
16 'ircs://',
17 'gopher://',
18 'telnet://',
19 'nntp://',
20 'worldwind://',
21 'mailto:',
22 'news:',
23 'svn://',
24 'git://',
25 'mms://',
26 '//',
27 ] );
30 /**
31 * createRegexFromLike($like)
33 * Takes an array as created by LinkFilter::makeLikeArray() and creates a regex from it
35 * @param array $like Array as created by LinkFilter::makeLikeArray()
36 * @return string Regex
38 function createRegexFromLIKE( $like ) {
39 $regex = '!^';
41 foreach ( $like as $item ) {
42 if ( $item instanceof LikeMatch ) {
43 if ( $item->toString() == '%' ) {
44 $regex .= '.*';
45 } elseif ( $item->toString() == '_' ) {
46 $regex .= '.';
48 } else {
49 $regex .= preg_quote( $item, '!' );
54 $regex .= '$!';
56 return $regex;
59 /**
60 * provideValidPatterns()
62 * @return array
64 public static function provideValidPatterns() {
65 return [
66 // Protocol, Search pattern, URL which matches the pattern
67 [ 'http://', '*.test.com', 'http://www.test.com' ],
68 [ 'http://', 'test.com:8080/dir/file', 'http://name:pass@test.com:8080/dir/file' ],
69 [ 'https://', '*.com', 'https://s.s.test..com:88/dir/file?a=1&b=2' ],
70 [ 'https://', '*.com', 'https://name:pass@secure.com/index.html' ],
71 [ 'http://', 'name:pass@test.com', 'http://test.com' ],
72 [ 'http://', 'test.com', 'http://name:pass@test.com' ],
73 [ 'http://', '*.test.com', 'http://a.b.c.test.com/dir/dir/file?a=6' ],
74 [ null, 'http://*.test.com', 'http://www.test.com' ],
75 [ 'mailto:', 'name@mail.test123.com', 'mailto:name@mail.test123.com' ],
76 [ '',
77 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg',
78 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg'
80 [ '', 'http://name:pass@*.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg',
81 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg' ],
82 [ '', 'http://name:wrongpass@*.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]',
83 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg' ],
84 [ 'http://', 'name:pass@*.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg',
85 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg' ],
86 [ '', 'http://name:pass@www.test.com:12345',
87 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg' ],
88 [ 'ftp://', 'user:pass@ftp.test.com:1233/home/user/file;type=efw',
89 'ftp://user:pass@ftp.test.com:1233/home/user/file;type=efw' ],
90 [ null, 'ftp://otheruser:otherpass@ftp.test.com:1233/home/user/file;type=',
91 'ftp://user:pass@ftp.test.com:1233/home/user/file;type=efw' ],
92 [ null, 'ftp://@ftp.test.com:1233/home/user/file;type=',
93 'ftp://user:pass@ftp.test.com:1233/home/user/file;type=efw' ],
94 [ null, 'ftp://ftp.test.com/',
95 'ftp://user:pass@ftp.test.com/home/user/file;type=efw' ],
96 [ null, 'ftp://ftp.test.com/',
97 'ftp://user:pass@ftp.test.com/home/user/file;type=efw' ],
98 [ null, 'ftp://*.test.com:222/',
99 'ftp://user:pass@ftp.test.com:222/home' ],
100 [ 'irc://', '*.myserver:6667/', 'irc://test.myserver:6667/' ],
101 [ 'irc://', 'name:pass@*.myserver/', 'irc://test.myserver:6667/' ],
102 [ 'irc://', 'name:pass@*.myserver/', 'irc://other:@test.myserver:6667/' ],
103 [ '', 'irc://test/name,string,abc?msg=t', 'irc://test/name,string,abc?msg=test' ],
104 [ '', 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z',
105 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z' ],
106 [ '', 'https://gerrit.wikimedia.org',
107 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z' ],
108 [ 'mailto:', '*.test.com', 'mailto:name@pop3.test.com' ],
109 [ 'mailto:', 'test.com', 'mailto:name@test.com' ],
110 [ 'news:', 'test.1234afc@news.test.com', 'news:test.1234afc@news.test.com' ],
111 [ 'news:', '*.test.com', 'news:test.1234afc@news.test.com' ],
112 [ '', 'news:4df8kh$iagfewewf(at)newsbf02aaa.news.aol.com',
113 'news:4df8kh$iagfewewf(at)newsbf02aaa.news.aol.com' ],
114 [ '', 'news:*.aol.com',
115 'news:4df8kh$iagfewewf(at)newsbf02aaa.news.aol.com' ],
116 [ '', 'git://github.com/prwef/abc-def.git', 'git://github.com/prwef/abc-def.git' ],
117 [ 'git://', 'github.com/', 'git://github.com/prwef/abc-def.git' ],
118 [ 'git://', '*.github.com/', 'git://a.b.c.d.e.f.github.com/prwef/abc-def.git' ],
119 [ '', 'gopher://*.test.com/', 'gopher://gopher.test.com/0/v2/vstat' ],
120 [ 'telnet://', '*.test.com', 'telnet://shell.test.com/~home/' ],
121 [ '', 'http://test.com', 'http://test.com/index?arg=1' ],
122 [ 'http://', '*.test.com', 'http://www.test.com/index?arg=1' ],
123 [ '' ,
124 'http://xx23124:__ffdfdef__@www.test.com:12345/dir' ,
125 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg'
128 // Tests for false positives
129 [ 'http://', 'test.com', 'http://www.test.com', false ],
130 [ 'http://', 'www1.test.com', 'http://www.test.com', false ],
131 [ 'http://', '*.test.com', 'http://www.test.t.com', false ],
132 [ '', 'http://test.com:8080', 'http://www.test.com:8080', false ],
133 [ '', 'https://test.com', 'http://test.com', false ],
134 [ '', 'http://test.com', 'https://test.com', false ],
135 [ 'http://', 'http://test.com', 'http://test.com', false ],
136 [ null, 'http://www.test.com', 'http://www.test.com:80', false ],
137 [ null, 'http://www.test.com:80', 'http://www.test.com', false ],
138 [ null, 'http://*.test.com:80', 'http://www.test.com', false ],
139 [ '', 'https://gerrit.wikimedia.org/r/#/XXX/status:open,n,z',
140 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z', false ],
141 [ '', 'https://*.wikimedia.org/r/#/q/status:open,n,z',
142 'https://gerrit.wikimedia.org/r/#/XXX/status:open,n,z', false ],
143 [ 'mailto:', '@test.com', '@abc.test.com', false ],
144 [ 'mailto:', 'mail@test.com', 'mail2@test.com', false ],
145 [ '', 'mailto:mail@test.com', 'mail2@test.com', false ],
146 [ '', 'mailto:@test.com', '@abc.test.com', false ],
147 [ 'ftp://', '*.co', 'ftp://www.co.uk', false ],
148 [ 'ftp://', '*.co', 'ftp://www.co.m', false ],
149 [ 'ftp://', '*.co/dir/', 'ftp://www.co/dir2/', false ],
150 [ 'ftp://', 'www.co/dir/', 'ftp://www.co/dir2/', false ],
151 [ 'ftp://', 'test.com/dir/', 'ftp://test.com/', false ],
152 [ '', 'http://test.com:8080/dir/', 'http://test.com:808/dir/', false ],
153 [ '', 'http://test.com/dir/index.html', 'http://test.com/dir/index.php', false ],
155 // These are false positives too and ideally shouldn't match, but that
156 // would require using regexes and RLIKE instead of LIKE
157 // [ null, 'http://*.test.com', 'http://www.test.com:80', false ],
158 // [ '', 'https://*.wikimedia.org/r/#/q/status:open,n,z',
159 // 'https://gerrit.wikimedia.org/XXX/r/#/q/status:open,n,z', false ],
164 * testMakeLikeArrayWithValidPatterns()
166 * Tests whether the LIKE clause produced by LinkFilter::makeLikeArray($pattern, $protocol)
167 * will find one of the URL indexes produced by wfMakeUrlIndexes($url)
169 * @dataProvider provideValidPatterns
171 * @param string $protocol Protocol, e.g. 'http://' or 'mailto:'
172 * @param string $pattern Search pattern to feed to LinkFilter::makeLikeArray
173 * @param string $url URL to feed to wfMakeUrlIndexes
174 * @param bool $shouldBeFound Should the URL be found? (defaults true)
176 function testMakeLikeArrayWithValidPatterns( $protocol, $pattern, $url, $shouldBeFound = true ) {
177 $indexes = wfMakeUrlIndexes( $url );
178 $likeArray = LinkFilter::makeLikeArray( $pattern, $protocol );
180 $this->assertTrue( $likeArray !== false,
181 "LinkFilter::makeLikeArray('$pattern', '$protocol') returned false on a valid pattern"
184 $regex = $this->createRegexFromLIKE( $likeArray );
185 $debugmsg = "Regex: '" . $regex . "'\n";
186 $debugmsg .= count( $indexes ) . " index(es) created by wfMakeUrlIndexes():\n";
188 $matches = 0;
190 foreach ( $indexes as $index ) {
191 $matches += preg_match( $regex, $index );
192 $debugmsg .= "\t'$index'\n";
195 if ( $shouldBeFound ) {
196 $this->assertTrue(
197 $matches > 0,
198 "Search pattern '$protocol$pattern' does not find url '$url' \n$debugmsg"
200 } else {
201 $this->assertFalse(
202 $matches > 0,
203 "Search pattern '$protocol$pattern' should not find url '$url' \n$debugmsg"
209 * provideInvalidPatterns()
211 * @return array
213 public static function provideInvalidPatterns() {
214 return [
215 [ '' ],
216 [ '*' ],
217 [ 'http://*' ],
218 [ 'http://*/' ],
219 [ 'http://*/dir/file' ],
220 [ 'test.*.com' ],
221 [ 'http://test.*.com' ],
222 [ 'test.*.com' ],
223 [ 'http://*.test.*' ],
224 [ 'http://*test.com' ],
225 [ 'https://*' ],
226 [ '*://test.com' ],
227 [ 'mailto:name:pass@t*est.com' ],
228 [ 'http://*:888/' ],
229 [ '*http://' ],
230 [ 'test.com/*/index' ],
231 [ 'test.com/dir/index?arg=*' ],
236 * testMakeLikeArrayWithInvalidPatterns()
238 * Tests whether LinkFilter::makeLikeArray($pattern) will reject invalid search patterns
240 * @dataProvider provideInvalidPatterns
242 * @param string $pattern Invalid search pattern
244 function testMakeLikeArrayWithInvalidPatterns( $pattern ) {
245 $this->assertFalse(
246 LinkFilter::makeLikeArray( $pattern ),
247 "'$pattern' is not a valid pattern and should be rejected"