3 * base include file for SimpleTest
5 * @subpackage WebTester
6 * @version $Id: url.php 2011 2011-04-29 08:22:48Z pp11 $
10 * include other SimpleTest class files
12 require_once(dirname(__FILE__
) . '/encoding.php');
16 * URL parser to replace parse_url() PHP function which
17 * got broken in PHP 4.3.0. Adds some browser specific
18 * functionality such as expandomatics.
19 * Guesses a bit trying to separate the host from
20 * the path and tries to keep a raw, possibly unparsable,
21 * request string as long as possible.
23 * @subpackage WebTester
40 * Constructor. Parses URL into sections.
41 * @param string $url Incoming URL.
44 function __construct($url = '') {
45 list($x, $y) = $this->chompCoordinates($url);
46 $this->setCoordinates($x, $y);
47 $this->scheme
= $this->chompScheme($url);
48 if ($this->scheme
=== 'file') {
49 // Unescaped backslashes not used in directory separator context
50 // will get caught by this, but they should have been urlencoded
51 // anyway so we don't care. If this ends up being a problem, the
52 // host regexp must be modified to match for backslashes when
53 // the scheme is file.
54 $url = str_replace('\\', '/', $url);
56 list($this->username
, $this->password
) = $this->chompLogin($url);
57 $this->host
= $this->chompHost($url);
59 if (preg_match('/(.*?):(.*)/', $this->host
, $host_parts)) {
60 if ($this->scheme
=== 'file' && strlen($this->host
) === 2) {
61 // DOS drive was placed in authority; promote it to path.
62 $url = '/' . $this->host
. $url;
65 $this->host
= $host_parts[1];
66 $this->port
= (integer)$host_parts[2];
69 $this->path
= $this->chompPath($url);
70 $this->request
= $this->parseRequest($this->chompRequest($url));
71 $this->fragment
= (strncmp($url, "#", 1) == 0 ?
substr($url, 1) : false);
72 $this->target
= false;
76 * Extracts the X, Y coordinate pair from an image map.
77 * @param string $url URL so far. The coordinates will be
79 * @return array X, Y as a pair of integers.
82 protected function chompCoordinates(&$url) {
83 if (preg_match('/(.*)\?(\d+),(\d+)$/', $url, $matches)) {
85 return array((integer)$matches[2], (integer)$matches[3]);
87 return array(false, false);
91 * Extracts the scheme part of an incoming URL.
92 * @param string $url URL so far. The scheme will be
94 * @return string Scheme part or false.
97 protected function chompScheme(&$url) {
98 if (preg_match('#^([^/:]*):(//)(.*)#', $url, $matches)) {
99 $url = $matches[2] . $matches[3];
106 * Extracts the username and password from the
107 * incoming URL. The // prefix will be reattached
108 * to the URL after the doublet is extracted.
109 * @param string $url URL so far. The username and
110 * password are removed.
111 * @return array Two item list of username and
112 * password. Will urldecode() them.
115 protected function chompLogin(&$url) {
117 if (preg_match('#^(//)(.*)#', $url, $matches)) {
118 $prefix = $matches[1];
121 if (preg_match('#^([^/]*)@(.*)#', $url, $matches)) {
122 $url = $prefix . $matches[2];
123 $parts = explode(":", $matches[1]);
125 urldecode($parts[0]),
126 isset($parts[1]) ?
urldecode($parts[1]) : false);
128 $url = $prefix . $url;
129 return array(false, false);
133 * Extracts the host part of an incoming URL.
134 * Includes the port number part. Will extract
135 * the host if it starts with // or it has
136 * a top level domain or it has at least two
138 * @param string $url URL so far. The host will be
140 * @return string Host part guess or false.
143 protected function chompHost(&$url) {
144 if (preg_match('!^(//)(.*?)(/.*|\?.*|#.*|$)!', $url, $matches)) {
148 if (preg_match('!(.*?)(\.\./|\./|/|\?|#|$)(.*)!', $url, $matches)) {
149 $tlds = SimpleUrl
::getAllTopLevelDomains();
150 if (preg_match('/[a-z0-9\-]+\.(' . $tlds . ')/i', $matches[1])) {
151 $url = $matches[2] . $matches[3];
153 } elseif (preg_match('/[a-z0-9\-]+\.[a-z0-9\-]+\.[a-z0-9\-]+/i', $matches[1])) {
154 $url = $matches[2] . $matches[3];
162 * Extracts the path information from the incoming
163 * URL. Strips this path from the URL.
164 * @param string $url URL so far. The host will be
166 * @return string Path part or '/'.
169 protected function chompPath(&$url) {
170 if (preg_match('/(.*?)(\?|#|$)(.*)/', $url, $matches)) {
171 $url = $matches[2] . $matches[3];
172 return ($matches[1] ?
$matches[1] : '');
178 * Strips off the request data.
179 * @param string $url URL so far. The request will be
181 * @return string Raw request part.
184 protected function chompRequest(&$url) {
185 if (preg_match('/\?(.*?)(#|$)(.*)/', $url, $matches)) {
186 $url = $matches[2] . $matches[3];
193 * Breaks the request down into an object.
194 * @param string $raw Raw request.
195 * @return SimpleFormEncoding Parsed data.
198 protected function parseRequest($raw) {
200 $request = new SimpleGetEncoding();
201 foreach (explode("&", $raw) as $pair) {
202 if (preg_match('/(.*?)=(.*)/', $pair, $matches)) {
203 $request->add(urldecode($matches[1]), urldecode($matches[2]));
205 $request->add(urldecode($pair), '');
212 * Accessor for protocol part.
213 * @param string $default Value to use if not present.
214 * @return string Scheme name, e.g "http".
217 function getScheme($default = false) {
218 return $this->scheme ?
$this->scheme
: $default;
222 * Accessor for user name.
223 * @return string Username preceding host.
226 function getUsername() {
227 return $this->username
;
231 * Accessor for password.
232 * @return string Password preceding host.
235 function getPassword() {
236 return $this->password
;
240 * Accessor for hostname and port.
241 * @param string $default Value to use if not present.
242 * @return string Hostname only.
245 function getHost($default = false) {
246 return $this->host ?
$this->host
: $default;
250 * Accessor for top level domain.
251 * @return string Last part of host.
255 $path_parts = pathinfo($this->getHost());
256 return (isset($path_parts['extension']) ?
$path_parts['extension'] : false);
260 * Accessor for port number.
261 * @return integer TCP/IP port number.
270 * @return string Full path including leading slash if implied.
274 if (! $this->path
&& $this->host
) {
281 * Accessor for page if any. This may be a
282 * directory name if ambiguious.
287 if (! preg_match('/([^\/]*?)$/', $this->getPath(), $matches)) {
294 * Gets the path to the page.
295 * @return string Path less the page.
298 function getBasePath() {
299 if (! preg_match('/(.*\/)[^\/]*?$/', $this->getPath(), $matches)) {
306 * Accessor for fragment at end of URL after the "#".
307 * @return string Part after "#".
310 function getFragment() {
311 return $this->fragment
;
315 * Sets image coordinates. Set to false to clear
317 * @param integer $x Horizontal position.
318 * @param integer $y Vertical position.
321 function setCoordinates($x = false, $y = false) {
322 if (($x === false) ||
($y === false)) {
323 $this->x
= $this->y
= false;
326 $this->x
= (integer)$x;
327 $this->y
= (integer)$y;
331 * Accessor for horizontal image coordinate.
332 * @return integer X value.
340 * Accessor for vertical image coordinate.
341 * @return integer Y value.
349 * Accessor for current request parameters
350 * in URL string form. Will return teh original request
351 * if at all possible even if it doesn't make much
353 * @return string Form is string "?a=1&b=2", etc.
356 function getEncodedRequest() {
358 $encoded = $this->raw
;
360 $encoded = $this->request
->asUrlRequest();
363 return '?' . preg_replace('/^\?/', '', $encoded);
369 * Adds an additional parameter to the request.
370 * @param string $key Name of parameter.
371 * @param string $value Value as string.
374 function addRequestParameter($key, $value) {
376 $this->request
->add($key, $value);
380 * Adds additional parameters to the request.
381 * @param hash/SimpleFormEncoding $parameters Additional
385 function addRequestParameters($parameters) {
387 $this->request
->merge($parameters);
391 * Clears down all parameters.
394 function clearRequest() {
396 $this->request
= new SimpleGetEncoding();
400 * Gets the frame target if present. Although
401 * not strictly part of the URL specification it
402 * acts as similarily to the browser.
403 * @return boolean/string Frame name or false if none.
406 function getTarget() {
407 return $this->target
;
411 * Attaches a frame target.
412 * @param string $frame Name of frame.
415 function setTarget($frame) {
417 $this->target
= $frame;
421 * Renders the URL back into a string.
422 * @return string URL in canonical form.
425 function asString() {
427 $scheme = $identity = $host = $port = $encoded = $fragment = '';
428 if ($this->username
&& $this->password
) {
429 $identity = $this->username
. ':' . $this->password
. '@';
431 if ($this->getHost()) {
432 $scheme = $this->getScheme() ?
$this->getScheme() : 'http';
434 $host = $this->getHost();
435 } elseif ($this->getScheme() === 'file') {
436 // Safest way; otherwise, file URLs on Windows have an extra
437 // leading slash. It might be possible to convert file://
438 // URIs to local file paths, but that requires more research.
441 if ($this->getPort() && $this->getPort() != 80 ) {
442 $port = ':'.$this->getPort();
445 if (substr($this->path
, 0, 1) == '/') {
446 $path = $this->normalisePath($this->path
);
448 $encoded = $this->getEncodedRequest();
449 $fragment = $this->getFragment() ?
'#'. $this->getFragment() : '';
450 $coords = $this->getX() === false ?
'' : '?' . $this->getX() . ',' . $this->getY();
451 return "$scheme$identity$host$port$path$encoded$fragment$coords";
455 * Replaces unknown sections to turn a relative
456 * URL into an absolute one. The base URL can
457 * be either a string or a SimpleUrl object.
458 * @param string/SimpleUrl $base Base URL.
461 function makeAbsolute($base) {
462 if (! is_object($base)) {
463 $base = new SimpleUrl($base);
465 if ($this->getHost()) {
466 $scheme = $this->getScheme();
467 $host = $this->getHost();
468 $port = $this->getPort() ?
':' . $this->getPort() : '';
469 $identity = $this->getIdentity() ?
$this->getIdentity() . '@' : '';
471 $identity = $base->getIdentity() ?
$base->getIdentity() . '@' : '';
474 $scheme = $base->getScheme();
475 $host = $base->getHost();
476 $port = $base->getPort() ?
':' . $base->getPort() : '';
477 $identity = $base->getIdentity() ?
$base->getIdentity() . '@' : '';
479 $path = $this->normalisePath($this->extractAbsolutePath($base));
480 $encoded = $this->getEncodedRequest();
481 $fragment = $this->getFragment() ?
'#'. $this->getFragment() : '';
482 $coords = $this->getX() === false ?
'' : '?' . $this->getX() . ',' . $this->getY();
483 return new SimpleUrl("$scheme://$identity$host$port$path$encoded$fragment$coords");
487 * Replaces unknown sections of the path with base parts
488 * to return a complete absolute one.
489 * @param string/SimpleUrl $base Base URL.
490 * @param string Absolute path.
493 protected function extractAbsolutePath($base) {
494 if ($this->getHost()) {
497 if (! $this->isRelativePath($this->path
)) {
501 return $base->getBasePath() . $this->path
;
503 return $base->getPath();
507 * Simple test to see if a path part is relative.
508 * @param string $path Path to test.
509 * @return boolean True if starts with a "/".
512 protected function isRelativePath($path) {
513 return (substr($path, 0, 1) != '/');
517 * Extracts the username and password for use in rendering
519 * @return string/boolean Form of username:password or false.
522 function getIdentity() {
523 if ($this->username
&& $this->password
) {
524 return $this->username
. ':' . $this->password
;
530 * Replaces . and .. sections of the path.
531 * @param string $path Unoptimised path.
532 * @return string Path with dots removed if possible.
535 function normalisePath($path) {
536 $path = preg_replace('|/\./|', '/', $path);
537 return preg_replace('|/[^/]+/\.\./|', '/', $path);
541 * A pipe seperated list of all TLDs that result in two part
543 * @return string Pipe separated list.
546 static function getAllTopLevelDomains() {
547 return 'com|edu|net|org|gov|mil|int|biz|info|name|pro|aero|coop|museum';