3 * Base include file for SimpleTest
5 * @subpackage WebTester
6 * @version $Id: page.php 1938 2009-08-05 17:16:23Z dgheath $
10 * include other SimpleTest class files
12 require_once(dirname(__FILE__
) . '/http.php');
13 require_once(dirname(__FILE__
) . '/php_parser.php');
14 require_once(dirname(__FILE__
) . '/tag.php');
15 require_once(dirname(__FILE__
) . '/form.php');
16 require_once(dirname(__FILE__
) . '/selector.php');
20 * A wrapper for a web page.
22 * @subpackage WebTester
25 private $links = array();
26 private $title = false;
29 private $forms = array();
30 private $frames = array();
31 private $transport_error;
33 private $text = false;
38 private $base = false;
39 private $request_data;
42 * Parses a page ready to access it's contents.
43 * @param SimpleHttpResponse $response Result of HTTP fetch.
46 function __construct($response = false) {
48 $this->extractResponse($response);
55 * Extracts all of the response information.
56 * @param SimpleHttpResponse $response Response being parsed.
59 protected function extractResponse($response) {
60 $this->transport_error
= $response->getError();
61 $this->raw
= $response->getContent();
62 $this->sent
= $response->getSent();
63 $this->headers
= $response->getHeaders();
64 $this->method
= $response->getMethod();
65 $this->url
= $response->getUrl();
66 $this->request_data
= $response->getRequestData();
70 * Sets up a missing response.
73 protected function noResponse() {
74 $this->transport_error
= 'No page fetched yet';
77 $this->headers
= false;
78 $this->method
= 'GET';
80 $this->request_data
= false;
84 * Original request as bytes sent down the wire.
85 * @return mixed Sent content.
88 function getRequest() {
93 * Accessor for raw text of page.
94 * @return string Raw unparsed content.
102 * Accessor for plain text of page as a text browser
104 * @return string Plain text of page.
109 $this->text
= SimplePage
::normalise($this->raw
);
115 * Accessor for raw headers of page.
116 * @return string Header block as text.
119 function getHeaders() {
120 if ($this->headers
) {
121 return $this->headers
->getRaw();
127 * Original request method.
128 * @return string GET, POST or HEAD.
131 function getMethod() {
132 return $this->method
;
136 * Original resource name.
137 * @return SimpleUrl Current url.
145 * Base URL if set via BASE tag page url otherwise
146 * @return SimpleUrl Base url.
149 function getBaseUrl() {
154 * Original request data.
155 * @return mixed Sent content.
158 function getRequestData() {
159 return $this->request_data
;
163 * Accessor for last error.
164 * @return string Error from last response.
167 function getTransportError() {
168 return $this->transport_error
;
172 * Accessor for current MIME type.
173 * @return string MIME type as string; e.g. 'text/html'
176 function getMimeType() {
177 if ($this->headers
) {
178 return $this->headers
->getMimeType();
184 * Accessor for HTTP response code.
185 * @return integer HTTP response code received.
188 function getResponseCode() {
189 if ($this->headers
) {
190 return $this->headers
->getResponseCode();
196 * Accessor for last Authentication type. Only valid
197 * straight after a challenge (401).
198 * @return string Description of challenge type.
201 function getAuthentication() {
202 if ($this->headers
) {
203 return $this->headers
->getAuthentication();
209 * Accessor for last Authentication realm. Only valid
210 * straight after a challenge (401).
211 * @return string Name of security realm.
214 function getRealm() {
215 if ($this->headers
) {
216 return $this->headers
->getRealm();
222 * Accessor for current frame focus. Will be
223 * false as no frames.
224 * @return array Always empty.
227 function getFrameFocus() {
232 * Sets the focus by index. The integer index starts from 1.
233 * @param integer $choice Chosen frame.
234 * @return boolean Always false.
237 function setFrameFocusByIndex($choice) {
242 * Sets the focus by name. Always fails for a leaf page.
243 * @param string $name Chosen frame.
244 * @return boolean False as no frames.
247 function setFrameFocus($name) {
252 * Clears the frame focus. Does nothing for a leaf page.
255 function clearFrameFocus() {
261 function setFrames($frames) {
262 $this->frames
= $frames;
266 * Test to see if link is an absolute one.
267 * @param string $url Url to test.
268 * @return boolean True if absolute.
271 protected function linkIsAbsolute($url) {
272 $parsed = new SimpleUrl($url);
273 return (boolean
)($parsed->getScheme() && $parsed->getHost());
277 * Adds a link to the page.
278 * @param SimpleAnchorTag $tag Link to accept.
280 function addLink($tag) {
281 $this->links
[] = $tag;
286 * @param array $forms An array of SimpleForm objects
288 function setForms($forms) {
289 $this->forms
= $forms;
293 * Test for the presence of a frameset.
294 * @return boolean True if frameset.
297 function hasFrames() {
298 return count($this->frames
) > 0;
302 * Accessor for frame name and source URL for every frame that
303 * will need to be loaded. Immediate children only.
304 * @return boolean/array False if no frameset or
305 * otherwise a hash of frame URLs.
306 * The key is either a numerical
307 * base one index or the name attribute.
310 function getFrameset() {
311 if (! $this->hasFrames()) {
315 for ($i = 0; $i < count($this->frames
); $i++
) {
316 $name = $this->frames
[$i]->getAttribute('name');
317 $url = new SimpleUrl($this->frames
[$i]->getAttribute('src'));
318 $urls[$name ?
$name : $i +
1] = $this->expandUrl($url);
324 * Fetches a list of loaded frames.
325 * @return array/string Just the URL for a single page.
328 function getFrames() {
329 $url = $this->expandUrl($this->getUrl());
330 return $url->asString();
334 * Accessor for a list of all links.
335 * @return array List of urls with scheme of
336 * http or https and hostname.
341 foreach ($this->links
as $link) {
342 $url = $this->getUrlFromLink($link);
343 $all[] = $url->asString();
349 * Accessor for URLs by the link label. Label will match
350 * regardess of whitespace issues and case.
351 * @param string $label Text of link.
352 * @return array List of links with that label.
355 function getUrlsByLabel($label) {
357 foreach ($this->links
as $link) {
358 if ($link->getText() == $label) {
359 $matches[] = $this->getUrlFromLink($link);
366 * Accessor for a URL by the id attribute.
367 * @param string $id Id attribute of link.
368 * @return SimpleUrl URL with that id of false if none.
371 function getUrlById($id) {
372 foreach ($this->links
as $link) {
373 if ($link->getAttribute('id') === (string)$id) {
374 return $this->getUrlFromLink($link);
381 * Converts a link tag into a target URL.
382 * @param SimpleAnchor $link Parsed link.
383 * @return SimpleUrl URL with frame target if any.
386 protected function getUrlFromLink($link) {
387 $url = $this->expandUrl($link->getHref());
388 if ($link->getAttribute('target')) {
389 $url->setTarget($link->getAttribute('target'));
395 * Expands expandomatic URLs into fully qualified
397 * @param SimpleUrl $url Relative URL.
398 * @return SimpleUrl Absolute URL.
401 function expandUrl($url) {
402 if (! is_object($url)) {
403 $url = new SimpleUrl($url);
405 $location = $this->getBaseUrl() ?
$this->getBaseUrl() : new SimpleUrl();
406 return $url->makeAbsolute($location->makeAbsolute($this->getUrl()));
410 * Sets the base url for the page.
411 * @param string $url Base URL for page.
413 function setBase($url) {
414 $this->base
= new SimpleUrl($url);
418 * Sets the title tag contents.
419 * @param SimpleTitleTag $tag Title of page.
421 function setTitle($tag) {
426 * Accessor for parsed title.
427 * @return string Title or false if no title is present.
430 function getTitle() {
432 return $this->title
->getText();
438 * Finds a held form by button label. Will only
439 * search correctly built forms.
440 * @param SimpleSelector $selector Button finder.
441 * @return SimpleForm Form object containing
445 function getFormBySubmit($selector) {
446 for ($i = 0; $i < count($this->forms
); $i++
) {
447 if ($this->forms
[$i]->hasSubmit($selector)) {
448 return $this->forms
[$i];
455 * Finds a held form by image using a selector.
456 * Will only search correctly built forms.
457 * @param SimpleSelector $selector Image finder.
458 * @return SimpleForm Form object containing
462 function getFormByImage($selector) {
463 for ($i = 0; $i < count($this->forms
); $i++
) {
464 if ($this->forms
[$i]->hasImage($selector)) {
465 return $this->forms
[$i];
472 * Finds a held form by the form ID. A way of
473 * identifying a specific form when we have control
475 * @param string $id Form label.
476 * @return SimpleForm Form object containing the matching ID.
479 function getFormById($id) {
480 for ($i = 0; $i < count($this->forms
); $i++
) {
481 if ($this->forms
[$i]->getId() == $id) {
482 return $this->forms
[$i];
489 * Sets a field on each form in which the field is
491 * @param SimpleSelector $selector Field finder.
492 * @param string $value Value to set field to.
493 * @return boolean True if value is valid.
496 function setField($selector, $value, $position=false) {
498 for ($i = 0; $i < count($this->forms
); $i++
) {
499 if ($this->forms
[$i]->setField($selector, $value, $position)) {
507 * Accessor for a form element value within a page.
508 * @param SimpleSelector $selector Field finder.
509 * @return string/boolean A string if the field is
510 * present, false if unchecked
511 * and null if missing.
514 function getField($selector) {
515 for ($i = 0; $i < count($this->forms
); $i++
) {
516 $value = $this->forms
[$i]->getValue($selector);
525 * Turns HTML into text browser visible text. Images
526 * are converted to their alt text and tags are supressed.
527 * Entities are converted to their visible representation.
528 * @param string $html HTML to convert.
529 * @return string Plain text.
532 static function normalise($html) {
533 $text = preg_replace('#<!--.*?-->#si', '', $html);
534 $text = preg_replace('#<(script|option|textarea)[^>]*>.*?</\1>#si', '', $text);
535 $text = preg_replace('#<img[^>]*alt\s*=\s*("([^"]*)"|\'([^\']*)\'|([a-zA-Z_]+))[^>]*>#', ' \2\3\4 ', $text);
536 $text = preg_replace('#<[^>]*>#', '', $text);
537 $text = html_entity_decode($text, ENT_QUOTES
);
538 $text = preg_replace('#\s+#', ' ', $text);
539 return trim(trim($text), "\xA0"); // TODO: The \xAO is a . Add a test for this.