Import: Handle uploads with sha1 starting with 0 properly
[mediawiki.git] / tests / phpunit / includes / LinkFilterTest.php
blob9093564fde11a9305bc5224531e4702fe67bd883
1 <?php
3 /**
4 * @group Database
5 */
6 class LinkFilterTest extends MediaWikiLangTestCase {
8 protected function setUp() {
10 parent::setUp();
12 $this->setMwGlobals( 'wgUrlProtocols', array(
13 'http://',
14 'https://',
15 'ftp://',
16 'irc://',
17 'ircs://',
18 'gopher://',
19 'telnet://',
20 'nntp://',
21 'worldwind://',
22 'mailto:',
23 'news:',
24 'svn://',
25 'git://',
26 'mms://',
27 '//',
28 ) );
32 /**
33 * createRegexFromLike($like)
35 * Takes an array as created by LinkFilter::makeLikeArray() and creates a regex from it
37 * @param array $like Array as created by LinkFilter::makeLikeArray()
38 * @return string Regex
40 function createRegexFromLIKE( $like ) {
42 $regex = '!^';
44 foreach ( $like as $item ) {
46 if ( $item instanceof LikeMatch ) {
47 if ( $item->toString() == '%' ) {
48 $regex .= '.*';
49 } elseif ( $item->toString() == '_' ) {
50 $regex .= '.';
52 } else {
53 $regex .= preg_quote( $item, '!' );
58 $regex .= '$!';
60 return $regex;
64 /**
65 * provideValidPatterns()
67 * @return array
69 public static function provideValidPatterns() {
71 return array(
72 // Protocol, Search pattern, URL which matches the pattern
73 array( 'http://', '*.test.com', 'http://www.test.com' ),
74 array( 'http://', 'test.com:8080/dir/file', 'http://name:pass@test.com:8080/dir/file' ),
75 array( 'https://', '*.com', 'https://s.s.test..com:88/dir/file?a=1&b=2' ),
76 array( 'https://', '*.com', 'https://name:pass@secure.com/index.html' ),
77 array( 'http://', 'name:pass@test.com', 'http://test.com' ),
78 array( 'http://', 'test.com', 'http://name:pass@test.com' ),
79 array( 'http://', '*.test.com', 'http://a.b.c.test.com/dir/dir/file?a=6' ),
80 array( null, 'http://*.test.com', 'http://www.test.com' ),
81 array( 'mailto:', 'name@mail.test123.com', 'mailto:name@mail.test123.com' ),
82 array( '',
83 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg',
84 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg'
86 array( '', 'http://name:pass@*.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg',
87 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg' ),
88 array( '', 'http://name:wrongpass@*.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]',
89 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg' ),
90 array( 'http://', 'name:pass@*.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg',
91 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg' ),
92 array( '', 'http://name:pass@www.test.com:12345',
93 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg' ),
94 array( 'ftp://', 'user:pass@ftp.test.com:1233/home/user/file;type=efw',
95 'ftp://user:pass@ftp.test.com:1233/home/user/file;type=efw' ),
96 array( null, 'ftp://otheruser:otherpass@ftp.test.com:1233/home/user/file;type=',
97 'ftp://user:pass@ftp.test.com:1233/home/user/file;type=efw' ),
98 array( null, 'ftp://@ftp.test.com:1233/home/user/file;type=',
99 'ftp://user:pass@ftp.test.com:1233/home/user/file;type=efw' ),
100 array( null, 'ftp://ftp.test.com/',
101 'ftp://user:pass@ftp.test.com/home/user/file;type=efw' ),
102 array( null, 'ftp://ftp.test.com/',
103 'ftp://user:pass@ftp.test.com/home/user/file;type=efw' ),
104 array( null, 'ftp://*.test.com:222/',
105 'ftp://user:pass@ftp.test.com:222/home' ),
106 array( 'irc://', '*.myserver:6667/', 'irc://test.myserver:6667/' ),
107 array( 'irc://', 'name:pass@*.myserver/', 'irc://test.myserver:6667/' ),
108 array( 'irc://', 'name:pass@*.myserver/', 'irc://other:@test.myserver:6667/' ),
109 array( '', 'irc://test/name,string,abc?msg=t', 'irc://test/name,string,abc?msg=test' ),
110 array( '', 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z',
111 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z' ),
112 array( '', 'https://gerrit.wikimedia.org',
113 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z' ),
114 array( 'mailto:', '*.test.com', 'mailto:name@pop3.test.com' ),
115 array( 'mailto:', 'test.com', 'mailto:name@test.com' ),
116 array( 'news:', 'test.1234afc@news.test.com', 'news:test.1234afc@news.test.com' ),
117 array( 'news:', '*.test.com', 'news:test.1234afc@news.test.com' ),
118 array( '', 'news:4df8kh$iagfewewf(at)newsbf02aaa.news.aol.com',
119 'news:4df8kh$iagfewewf(at)newsbf02aaa.news.aol.com' ),
120 array( '', 'news:*.aol.com',
121 'news:4df8kh$iagfewewf(at)newsbf02aaa.news.aol.com' ),
122 array( '', 'git://github.com/prwef/abc-def.git', 'git://github.com/prwef/abc-def.git' ),
123 array( 'git://', 'github.com/', 'git://github.com/prwef/abc-def.git' ),
124 array( 'git://', '*.github.com/', 'git://a.b.c.d.e.f.github.com/prwef/abc-def.git' ),
125 array( '', 'gopher://*.test.com/', 'gopher://gopher.test.com/0/v2/vstat' ),
126 array( 'telnet://', '*.test.com', 'telnet://shell.test.com/~home/' ),
128 // The following only work in PHP >= 5.3.7, due to a bug in parse_url which eats
129 // the path from the url (https://bugs.php.net/bug.php?id=54180)
130 // array( '', 'http://test.com', 'http://test.com/index?arg=1' ),
131 // array( 'http://', '*.test.com', 'http://www.test.com/index?arg=1' ),
132 // array( '' ,
133 // 'http://xx23124:__ffdfdef__@www.test.com:12345/dir' ,
134 // 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg'
135 // ),
137 // Tests for false positives
138 array( 'http://', 'test.com', 'http://www.test.com', false ),
139 array( 'http://', 'www1.test.com', 'http://www.test.com', false ),
140 array( 'http://', '*.test.com', 'http://www.test.t.com', false ),
141 array( '', 'http://test.com:8080', 'http://www.test.com:8080', false ),
142 array( '', 'https://test.com', 'http://test.com', false ),
143 array( '', 'http://test.com', 'https://test.com', false ),
144 array( 'http://', 'http://test.com', 'http://test.com', false ),
145 array( null, 'http://www.test.com', 'http://www.test.com:80', false ),
146 array( null, 'http://www.test.com:80', 'http://www.test.com', false ),
147 array( null, 'http://*.test.com:80', 'http://www.test.com', false ),
148 array( '', 'https://gerrit.wikimedia.org/r/#/XXX/status:open,n,z',
149 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z', false ),
150 array( '', 'https://*.wikimedia.org/r/#/q/status:open,n,z',
151 'https://gerrit.wikimedia.org/r/#/XXX/status:open,n,z', false ),
152 array( 'mailto:', '@test.com', '@abc.test.com', false ),
153 array( 'mailto:', 'mail@test.com', 'mail2@test.com', false ),
154 array( '', 'mailto:mail@test.com', 'mail2@test.com', false ),
155 array( '', 'mailto:@test.com', '@abc.test.com', false ),
156 array( 'ftp://', '*.co', 'ftp://www.co.uk', false ),
157 array( 'ftp://', '*.co', 'ftp://www.co.m', false ),
158 array( 'ftp://', '*.co/dir/', 'ftp://www.co/dir2/', false ),
159 array( 'ftp://', 'www.co/dir/', 'ftp://www.co/dir2/', false ),
160 array( 'ftp://', 'test.com/dir/', 'ftp://test.com/', false ),
161 array( '', 'http://test.com:8080/dir/', 'http://test.com:808/dir/', false ),
162 array( '', 'http://test.com/dir/index.html', 'http://test.com/dir/index.php', false ),
164 // These are false positives too and ideally shouldn't match, but that
165 // would require using regexes and RLIKE instead of LIKE
166 // array( null, 'http://*.test.com', 'http://www.test.com:80', false ),
167 // array( '', 'https://*.wikimedia.org/r/#/q/status:open,n,z',
168 // 'https://gerrit.wikimedia.org/XXX/r/#/q/status:open,n,z', false ),
174 * testMakeLikeArrayWithValidPatterns()
176 * Tests whether the LIKE clause produced by LinkFilter::makeLikeArray($pattern, $protocol)
177 * will find one of the URL indexes produced by wfMakeUrlIndexes($url)
179 * @dataProvider provideValidPatterns
181 * @param string $protocol Protocol, e.g. 'http://' or 'mailto:'
182 * @param string $pattern Search pattern to feed to LinkFilter::makeLikeArray
183 * @param string $url URL to feed to wfMakeUrlIndexes
184 * @param bool $shouldBeFound Should the URL be found? (defaults true)
186 function testMakeLikeArrayWithValidPatterns( $protocol, $pattern, $url, $shouldBeFound = true ) {
188 $indexes = wfMakeUrlIndexes( $url );
189 $likeArray = LinkFilter::makeLikeArray( $pattern, $protocol );
191 $this->assertTrue( $likeArray !== false,
192 "LinkFilter::makeLikeArray('$pattern', '$protocol') returned false on a valid pattern"
195 $regex = $this->createRegexFromLIKE( $likeArray );
196 $debugmsg = "Regex: '" . $regex . "'\n";
197 $debugmsg .= count( $indexes ) . " index(es) created by wfMakeUrlIndexes():\n";
199 $matches = 0;
201 foreach ( $indexes as $index ) {
202 $matches += preg_match( $regex, $index );
203 $debugmsg .= "\t'$index'\n";
206 if ( $shouldBeFound ) {
207 $this->assertTrue(
208 $matches > 0,
209 "Search pattern '$protocol$pattern' does not find url '$url' \n$debugmsg"
211 } else {
212 $this->assertFalse(
213 $matches > 0,
214 "Search pattern '$protocol$pattern' should not find url '$url' \n$debugmsg"
221 * provideInvalidPatterns()
223 * @return array
225 public static function provideInvalidPatterns() {
227 return array(
228 array( '' ),
229 array( '*' ),
230 array( 'http://*' ),
231 array( 'http://*/' ),
232 array( 'http://*/dir/file' ),
233 array( 'test.*.com' ),
234 array( 'http://test.*.com' ),
235 array( 'test.*.com' ),
236 array( 'http://*.test.*' ),
237 array( 'http://*test.com' ),
238 array( 'https://*' ),
239 array( '*://test.com' ),
240 array( 'mailto:name:pass@t*est.com' ),
241 array( 'http://*:888/' ),
242 array( '*http://' ),
243 array( 'test.com/*/index' ),
244 array( 'test.com/dir/index?arg=*' ),
250 * testMakeLikeArrayWithInvalidPatterns()
252 * Tests whether LinkFilter::makeLikeArray($pattern) will reject invalid search patterns
254 * @dataProvider provideInvalidPatterns
256 * @param string $pattern Invalid search pattern
258 function testMakeLikeArrayWithInvalidPatterns( $pattern ) {
260 $this->assertFalse(
261 LinkFilter::makeLikeArray( $pattern ),
262 "'$pattern' is not a valid pattern and should be rejected"