3 * base include file for SimpleTest
5 * @subpackage WebTester
6 * @version $Id: url.php,v 1.29 2006/01/14 02:45:34 lastcraft Exp $
10 * include other SimpleTest class files
12 require_once(dirname(__FILE__
) . '/encoding.php');
16 * URL parser to replace parse_url() PHP function which
17 * got broken in PHP 4.3.0. Adds some browser specific
18 * functionality such as expandomatics.
19 * Guesses a bit trying to separate the host from
20 * the path and tries to keep a raw, possibly unparsable,
21 * request string as long as possible.
23 * @subpackage WebTester
40 * Constructor. Parses URL into sections.
41 * @param string $url Incoming URL.
44 function SimpleUrl($url) {
45 list($x, $y) = $this->_chompCoordinates($url);
46 $this->setCoordinates($x, $y);
47 $this->_scheme
= $this->_chompScheme($url);
48 list($this->_username
, $this->_password
) = $this->_chompLogin($url);
49 $this->_host
= $this->_chompHost($url);
51 if (preg_match('/(.*?):(.*)/', $this->_host
, $host_parts)) {
52 $this->_host
= $host_parts[1];
53 $this->_port
= (integer)$host_parts[2];
55 $this->_path
= $this->_chompPath($url);
56 $this->_request
= $this->_parseRequest($this->_chompRequest($url));
57 $this->_fragment
= (strncmp($url, "#", 1) == 0 ?
substr($url, 1) : false);
58 $this->_target
= false;
62 * Extracts the X, Y coordinate pair from an image map.
63 * @param string $url URL so far. The coordinates will be
65 * @return array X, Y as a pair of integers.
68 function _chompCoordinates(&$url) {
69 if (preg_match('/(.*)\?(\d+),(\d+)$/', $url, $matches)) {
71 return array((integer)$matches[2], (integer)$matches[3]);
73 return array(false, false);
77 * Extracts the scheme part of an incoming URL.
78 * @param string $url URL so far. The scheme will be
80 * @return string Scheme part or false.
83 function _chompScheme(&$url) {
84 if (preg_match('/(.*?):(\/\/)(.*)/', $url, $matches)) {
85 $url = $matches[2] . $matches[3];
92 * Extracts the username and password from the
93 * incoming URL. The // prefix will be reattached
94 * to the URL after the doublet is extracted.
95 * @param string $url URL so far. The username and
96 * password are removed.
97 * @return array Two item list of username and
98 * password. Will urldecode() them.
101 function _chompLogin(&$url) {
103 if (preg_match('/^(\/\/)(.*)/', $url, $matches)) {
104 $prefix = $matches[1];
107 if (preg_match('/(.*?)@(.*)/', $url, $matches)) {
108 $url = $prefix . $matches[2];
109 $parts = split(":", $matches[1]);
111 urldecode($parts[0]),
112 isset($parts[1]) ?
urldecode($parts[1]) : false);
114 $url = $prefix . $url;
115 return array(false, false);
119 * Extracts the host part of an incoming URL.
120 * Includes the port number part. Will extract
121 * the host if it starts with // or it has
122 * a top level domain or it has at least two
124 * @param string $url URL so far. The host will be
126 * @return string Host part guess or false.
129 function _chompHost(&$url) {
130 if (preg_match('/^(\/\/)(.*?)(\/.*|\?.*|#.*|$)/', $url, $matches)) {
134 if (preg_match('/(.*?)(\.\.\/|\.\/|\/|\?|#|$)(.*)/', $url, $matches)) {
135 $tlds = SimpleUrl
::getAllTopLevelDomains();
136 if (preg_match('/[a-z0-9\-]+\.(' . $tlds . ')/i', $matches[1])) {
137 $url = $matches[2] . $matches[3];
139 } elseif (preg_match('/[a-z0-9\-]+\.[a-z0-9\-]+\.[a-z0-9\-]+/i', $matches[1])) {
140 $url = $matches[2] . $matches[3];
148 * Extracts the path information from the incoming
149 * URL. Strips this path from the URL.
150 * @param string $url URL so far. The host will be
152 * @return string Path part or '/'.
155 function _chompPath(&$url) {
156 if (preg_match('/(.*?)(\?|#|$)(.*)/', $url, $matches)) {
157 $url = $matches[2] . $matches[3];
158 return ($matches[1] ?
$matches[1] : '');
164 * Strips off the request data.
165 * @param string $url URL so far. The request will be
167 * @return string Raw request part.
170 function _chompRequest(&$url) {
171 if (preg_match('/\?(.*?)(#|$)(.*)/', $url, $matches)) {
172 $url = $matches[2] . $matches[3];
179 * Breaks the request down into an object.
180 * @param string $raw Raw request.
181 * @return SimpleFormEncoding Parsed data.
184 function _parseRequest($raw) {
186 $request = new SimpleGetEncoding();
187 foreach (split("&", $raw) as $pair) {
188 if (preg_match('/(.*?)=(.*)/', $pair, $matches)) {
189 $request->add($matches[1], urldecode($matches[2]));
191 $request->add($pair, '');
198 * Accessor for protocol part.
199 * @param string $default Value to use if not present.
200 * @return string Scheme name, e.g "http".
203 function getScheme($default = false) {
204 return $this->_scheme ?
$this->_scheme
: $default;
208 * Accessor for user name.
209 * @return string Username preceding host.
212 function getUsername() {
213 return $this->_username
;
217 * Accessor for password.
218 * @return string Password preceding host.
221 function getPassword() {
222 return $this->_password
;
226 * Accessor for hostname and port.
227 * @param string $default Value to use if not present.
228 * @return string Hostname only.
231 function getHost($default = false) {
232 return $this->_host ?
$this->_host
: $default;
236 * Accessor for top level domain.
237 * @return string Last part of host.
241 $path_parts = pathinfo($this->getHost());
242 return (isset($path_parts['extension']) ?
$path_parts['extension'] : false);
246 * Accessor for port number.
247 * @return integer TCP/IP port number.
256 * @return string Full path including leading slash if implied.
260 if (! $this->_path
&& $this->_host
) {
267 * Accessor for page if any. This may be a
268 * directory name if ambiguious.
273 if (! preg_match('/([^\/]*?)$/', $this->getPath(), $matches)) {
280 * Gets the path to the page.
281 * @return string Path less the page.
284 function getBasePath() {
285 if (! preg_match('/(.*\/)[^\/]*?$/', $this->getPath(), $matches)) {
292 * Accessor for fragment at end of URL after the "#".
293 * @return string Part after "#".
296 function getFragment() {
297 return $this->_fragment
;
301 * Sets image coordinates. Set to false to clear
303 * @param integer $x Horizontal position.
304 * @param integer $y Vertical position.
307 function setCoordinates($x = false, $y = false) {
308 if (($x === false) ||
($y === false)) {
309 $this->_x
= $this->_y
= false;
312 $this->_x
= (integer)$x;
313 $this->_y
= (integer)$y;
317 * Accessor for horizontal image coordinate.
318 * @return integer X value.
326 * Accessor for vertical image coordinate.
327 * @return integer Y value.
335 * Accessor for current request parameters
336 * in URL string form. Will return teh original request
337 * if at all possible even if it doesn't make much
339 * @return string Form is string "?a=1&b=2", etc.
342 function getEncodedRequest() {
344 $encoded = $this->_raw
;
346 $encoded = $this->_request
->asUrlRequest();
349 return '?' . preg_replace('/^\?/', '', $encoded);
355 * Adds an additional parameter to the request.
356 * @param string $key Name of parameter.
357 * @param string $value Value as string.
360 function addRequestParameter($key, $value) {
362 $this->_request
->add($key, $value);
366 * Adds additional parameters to the request.
367 * @param hash/SimpleFormEncoding $parameters Additional
371 function addRequestParameters($parameters) {
373 $this->_request
->merge($parameters);
377 * Clears down all parameters.
380 function clearRequest() {
382 $this->_request
= &new SimpleGetEncoding();
386 * Gets the frame target if present. Although
387 * not strictly part of the URL specification it
388 * acts as similarily to the browser.
389 * @return boolean/string Frame name or false if none.
392 function getTarget() {
393 return $this->_target
;
397 * Attaches a frame target.
398 * @param string $frame Name of frame.
401 function setTarget($frame) {
403 $this->_target
= $frame;
407 * Renders the URL back into a string.
408 * @return string URL in canonical form.
411 function asString() {
412 $scheme = $identity = $host = $path = $encoded = $fragment = '';
413 if ($this->_username
&& $this->_password
) {
414 $identity = $this->_username
. ':' . $this->_password
. '@';
416 if ($this->getHost()) {
417 $scheme = $this->getScheme() ?
$this->getScheme() : 'http';
418 $host = $this->getHost();
420 if (substr($this->_path
, 0, 1) == '/') {
421 $path = $this->normalisePath($this->_path
);
423 $encoded = $this->getEncodedRequest();
424 $fragment = $this->getFragment() ?
'#'. $this->getFragment() : '';
425 $coords = $this->getX() === false ?
'' : '?' . $this->getX() . ',' . $this->getY();
426 return "$scheme://$identity$host$path$encoded$fragment$coords";
430 * Replaces unknown sections to turn a relative
431 * URL into an absolute one. The base URL can
432 * be either a string or a SimpleUrl object.
433 * @param string/SimpleUrl $base Base URL.
436 function makeAbsolute($base) {
437 if (! is_object($base)) {
438 $base = new SimpleUrl($base);
440 $scheme = $this->getScheme() ?
$this->getScheme() : $base->getScheme();
441 if ($this->getHost()) {
442 $host = $this->getHost();
443 $port = $this->getPort() ?
':' . $this->getPort() : '';
444 $identity = $this->getIdentity() ?
$this->getIdentity() . '@' : '';
446 $identity = $base->getIdentity() ?
$base->getIdentity() . '@' : '';
449 $host = $base->getHost();
450 $port = $base->getPort() ?
':' . $base->getPort() : '';
451 $identity = $base->getIdentity() ?
$base->getIdentity() . '@' : '';
453 $path = $this->normalisePath($this->_extractAbsolutePath($base));
454 $encoded = $this->getEncodedRequest();
455 $fragment = $this->getFragment() ?
'#'. $this->getFragment() : '';
456 $coords = $this->getX() === false ?
'' : '?' . $this->getX() . ',' . $this->getY();
457 return new SimpleUrl("$scheme://$identity$host$port$path$encoded$fragment$coords");
461 * Replaces unknown sections of the path with base parts
462 * to return a complete absolute one.
463 * @param string/SimpleUrl $base Base URL.
464 * @param string Absolute path.
467 function _extractAbsolutePath($base) {
468 if ($this->getHost()) {
471 if (! $this->_isRelativePath($this->_path
)) {
475 return $base->getBasePath() . $this->_path
;
477 return $base->getPath();
481 * Simple test to see if a path part is relative.
482 * @param string $path Path to test.
483 * @return boolean True if starts with a "/".
486 function _isRelativePath($path) {
487 return (substr($path, 0, 1) != '/');
491 * Extracts the username and password for use in rendering
493 * @return string/boolean Form of username:password or false.
496 function getIdentity() {
497 if ($this->_username
&& $this->_password
) {
498 return $this->_username
. ':' . $this->_password
;
504 * Replaces . and .. sections of the path.
505 * @param string $path Unoptimised path.
506 * @return string Path with dots removed if possible.
509 function normalisePath($path) {
510 $path = preg_replace('|/[^/]+/\.\./|', '/', $path);
511 return preg_replace('|/\./|', '/', $path);
515 * A pipe seperated list of all TLDs that result in two part
517 * @return string Pipe separated list.
521 function getAllTopLevelDomains() {
522 return 'com|edu|net|org|gov|mil|int|biz|info|name|pro|aero|coop|museum';