4 * Copyright 2004-2006, Steve Blinch
5 * http://code.blitzaffe.com
6 * ============================================================================
10 * Provides a pure-PHP implementation of an HTTP v1.1 client, including support
11 * for chunked transfer encoding and user agent spoofing. Both GET and POST
12 * requests are supported.
14 * This can be used in place of something like CURL or WGET for HTTP requests.
15 * Native SSL (HTTPS) requests are also supported if the OpenSSL extension is
16 * installed under PHP v4.3.0 or greater.
18 * If native SSL support is not available, the class will also check for the
19 * CURL extension; if it's installed, it will transparently be used for SSL
22 * If neither native SSL support nor the CURL extension are available, and
23 * libcurlemu (a CURL emulation library available from our web site) is found,
24 * the class will also check for the CURL console binary (usually in
25 * /usr/bin/curl); if it's installed, it will transparently be used for SSL
28 * In short, if it's possible to make an HTTP/HTTPS request from your server,
29 * this class can most likely do it.
35 * - Added set_transfer_display() and default_transfer_callback()
36 * methods for transfer progress tracking
37 * - Suppressed possible "fatal protocol error" when remote SSL server
38 * closes the connection early
39 * - Added get_content_type() method
40 * - make_query_string() now handles arrays
43 * - Added set_progress_display() and default_progress_callback()
44 * methods for debug output
45 * - Added support for relative URLs in HTTP redirects
46 * - Added cookie support (sending and receiving)
47 * - Numerous bug fixes
50 * - Added support for automatically following HTTP redirects
51 * - Added ::get_error() method to get any available error message (be
52 * it an HTTP result error or an internal/connection error)
53 * - Added ::cache_hit variable to determine whether the page was cached
56 * - Added stream_timeout class variable.
57 * - Added progress_callback class variable.
58 * - Added support for braindead servers that ignore Connection: close
63 * // HTTPRetriever usage example
64 * require_once("class_HTTPRetriever.php");
65 * $http = &new HTTPRetriever();
68 * // Example GET request:
69 * // ----------------------------------------------------------------------------
70 * $keyword = "blitzaffe code"; // search Google for this keyword
71 * if (!$http->get("http://www.google.com/search?hl=en&q=%22".urlencode($keyword)."%22&btnG=Search&meta=")) {
72 * echo "HTTP request error: #{$http->result_code}: {$http->result_text}";
75 * echo "HTTP response headers:<br><pre>";
76 * var_dump($http->response_headers);
79 * echo "Page content:<br><pre>";
80 * echo $http->response;
82 * // ----------------------------------------------------------------------------
85 * // Example POST request:
86 * // ----------------------------------------------------------------------------
87 * $keyword = "blitzaffe code"; // search Google for this keyword
90 * "q"=>"%22".urlencode($keyword)."%22",
94 * // Note: This example is just to demonstrate the POST equivalent of the GET
95 * // example above; running this script will return a 501 Not Implemented, as
96 * // Google does not support POST requests.
97 * if (!$http->post("http://www.google.com/search",$http->make_query_string($values))) {
98 * echo "HTTP request error: #{$http->result_code}: {$http->result_text}";
101 * echo "HTTP response headers:<br><pre>";
102 * var_dump($http->response_headers);
105 * echo "Page content:<br><pre>";
106 * echo $http->response;
108 * // ----------------------------------------------------------------------------
113 * This script is free software; you can redistribute it and/or modify it under the
114 * terms of the GNU General Public License as published by the Free Software
115 * Foundation; either version 2 of the License, or (at your option) any later
118 * This script is distributed in the hope that it will be useful, but WITHOUT ANY
119 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
120 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
123 * You should have received a copy of the GNU General Public License along
124 * with this script; if not, write to the Free Software Foundation, Inc.,
125 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
128 // define user agent ID's
129 define("UA_EXPLORER",0);
130 define("UA_MOZILLA",1);
131 define("UA_FIREFOX",2);
132 define("UA_OPERA",3);
134 // define progress message severity levels
135 define('HRP_DEBUG',0);
136 define('HRP_INFO',1);
137 define('HRP_ERROR',2);
139 if (!defined("CURL_PATH")) define("CURL_PATH","/usr/bin/curl");
141 // if the CURL extension is not loaded, but the CURL Emulation Library is found, try
143 if (!extension_loaded("curl") && !defined('HTTPR_NO_REDECLARE_CURL') ) {
144 foreach (array(dirname(__FILE__
)."/",dirname(__FILE__
)."/libcurlemu/") as $k=>$libcurlemupath) {
145 $libcurlemuinc = $libcurlemupath.'libcurlexternal.inc.php';
146 if (is_readable($libcurlemuinc)) require_once($libcurlemuinc);
150 class HTTPRetriever
{
153 function HTTPRetriever() {
154 // default HTTP headers to send with all requests
155 $this->headers
= array(
157 "User-Agent"=>"HTTPRetriever/1.0",
158 "Connection"=>"close"
161 // HTTP version (has no effect if using CURL)
162 $this->version
= "1.1";
164 // Normally, CURL is only used for HTTPS requests; setting this to
165 // TRUE will force CURL for HTTP requests as well. Not recommended.
166 $this->force_curl
= false;
168 // If you don't want to use CURL at all, set this to TRUE.
169 $this->disable_curl
= false;
171 // If HTTPS request return an error message about SSL certificates in
172 // $this->error and you don't care about security, set this to TRUE
173 $this->insecure_ssl
= false;
175 // Set the maximum time to wait for a connection
176 $this->connect_timeout
= 15;
178 // Set the maximum time to allow a transfer to run, or 0 to disable.
181 // Set the maximum time for a socket read/write operation, or 0 to disable.
182 $this->stream_timeout
= 0;
184 // If you're making an HTTPS request to a host whose SSL certificate
185 // doesn't match its domain name, AND YOU FULLY UNDERSTAND THE
186 // SECURITY IMPLICATIONS OF IGNORING THIS PROBLEM, set this to TRUE.
187 $this->ignore_ssl_hostname
= false;
189 // If TRUE, the get() and post() methods will close the connection
190 // and return immediately after receiving the HTTP result code
191 $this->result_close
= false;
193 // If set to a positive integer value, retrieved pages will be cached
194 // for this number of seconds. Any subsequent calls within the cache
195 // period will return the cached page, without contacting the remote
197 $this->caching
= false;
199 // If $this->caching is enabled, this specifies the folder under which
200 // cached pages are saved.
201 $this->cache_path
= '/tmp/';
203 // Set these to perform basic HTTP authentication
204 $this->auth_username
= '';
205 $this->auth_password
= '';
207 // Optionally set this to a valid callback method to have HTTPRetriever
208 // provide progress messages. Your callback must accept 2 parameters:
209 // an integer representing the severity (0=debug, 1=information, 2=error),
210 // and a string representing the progress message
211 $this->progress_callback
= null;
213 // Optionally set this to a valid callback method to have HTTPRetriever
214 // provide bytes-transferred messages. Your callbcak must accept 2
215 // parameters: an integer representing the number of bytes transferred,
216 // and an integer representing the total number of bytes expected (or
218 $this->transfer_callback
= null;
220 // Set this to TRUE if you HTTPRetriever to transparently follow HTTP
221 // redirects (code 301, 302, 303, and 307). Optionally set this to a
222 // numeric value to limit the maximum number of redirects to the specified
223 // value. (Redirection loops are detected automatically.)
224 // Note that non-GET/HEAD requests will NOT be redirected except on code
225 // 303, as per HTTP standards.
226 $this->follow_redirects
= false;
229 // Send an HTTP GET request to $url; if $ipaddress is specified, the
230 // connection will be made to the selected IP instead of resolving the
233 // If $cookies is set, it should be an array in one of two formats.
235 // Either: $cookies[ 'cookiename' ] = array (
237 // 'expires'=>time(),
238 // 'domain'=>'yourdomain.com',
239 // 'value'=>'cookievalue'
243 // Or, a more simplified format:
244 // $cookies[ 'cookiename' ] = 'value';
246 // The former format will automatically check to make sure that the path, domain,
247 // and expiration values match the HTTP request, and will only send the cookie if
248 // they do match. The latter will force the cookie to be set for the HTTP request
251 function get($url,$ipaddress = false,$cookies = false) {
252 $this->method
= "GET";
253 $this->post_data
= "";
254 $this->connect_ip
= $ipaddress;
255 return $this->_execute_request($url,$cookies);
258 // Send an HTTP POST request to $url containing the POST data $data. See ::get()
259 // for a description of the remaining arguments.
260 function post($url,$data="",$ipaddress = false,$cookies = false) {
261 $this->method
= "POST";
262 $this->post_data
= $data;
263 $this->connect_ip
= $ipaddress;
264 return $this->_execute_request($url,$cookies);
267 // Send an HTTP HEAD request to $url. See ::get() for a description of the arguments.
268 function head($url,$ipaddress = false,$cookies = false) {
269 $this->method
= "HEAD";
270 $this->post_data
= "";
271 $this->connect_ip
= $ipaddress;
272 return $this->_execute_request($url,$cookies);
275 // send an alternate (non-GET/POST) HTTP request to $url
276 function custom($method,$url,$data="",$ipaddress = false,$cookies = false) {
277 $this->method
= $method;
278 $this->post_data
= $data;
279 $this->connect_ip
= $ipaddress;
280 return $this->_execute_request($url,$cookies);
283 function array_to_query($arrayname,$arraycontents) {
285 foreach ($arraycontents as $key=>$value) {
286 if (is_array($value)) {
287 $output .= $this->array_to_query(sprintf('%s[%s]',$arrayname,urlencode($key)),$value);
289 $output .= sprintf('%s[%s]=%s&',$arrayname,urlencode($key),urlencode($value));
295 // builds a query string from the associative array array $data;
296 // returns a string that can be passed to $this->post()
297 function make_query_string($data) {
299 if (is_array($data)) {
300 foreach ($data as $name=>$value) {
301 if (is_array($value)) {
302 $output .= $this->array_to_query(urlencode($name),$value);
303 } elseif (is_scalar($value)) {
304 $output .= urlencode($name)."=".urlencode($value)."&";
306 $output .= urlencode($name)."=".urlencode(serialize($value)).'&';
310 return substr($output,0,strlen($output)-1);
314 // this is pretty limited... but really, if you're going to spoof you UA, you'll probably
315 // want to use a Windows OS for the spoof anyway
317 // if you want to set the user agent to a custom string, just assign your string to
318 // $this->headers["User-Agent"] directly
319 function set_user_agent($agenttype,$agentversion,$windowsversion) {
321 "Mozilla/4.0 (compatible; MSIE %agent%; Windows NT %os%)", // IE
322 "Mozilla/5.0 (Windows; U; Windows NT %os%; en-US; rv:%agent%) Gecko/20040514", // Moz
323 "Mozilla/5.0 (Windows; U; Windows NT %os%; en-US; rv:1.7) Gecko/20040803 Firefox/%agent%", // FFox
324 "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT %os%) Opera %agent% [en]", // Opera
326 $agent = $useragents[$agenttype];
327 $this->headers
["User-Agent"] = str_replace(array("%agent%","%os%"),array($agentversion,$windowsversion),$agent);
330 // this isn't presently used as it's now handled inline by the request parser
331 function remove_chunkiness() {
332 $remaining = $this->response
;
333 $this->response
= "";
336 $hexlen = strpos($remaining,"\r");
337 $chunksize = substr($remaining,0,$hexlen);
338 $argstart = strpos($chunksize,';');
339 if ($argstart!==false) $chunksize = substr($chunksize,0,$argstart);
340 $chunksize = (int) @hexdec
($chunksize);
342 $this->response
.= substr($remaining,$hexlen+
2,$chunksize);
343 $remaining = substr($remaining,$hexlen+
2+
$chunksize+
2);
346 // either we're done, or something's borked... exit
347 $this->response
.= $remaining;
353 // (internal) store a page in the cache
354 function _cache_store($token) {
356 "stats"=>$this->stats
,
357 "result_code"=>$this->result_code
,
358 "result_text"=>$this->result_text
,
359 "version"=>$this->version
,
360 "response"=>$this->response
,
361 "response_headers"=>$this->response_headers
,
362 "response_cookies"=>$this->response_cookies
,
363 "raw_response"=>$this->raw_response
,
365 $values = serialize($values);
367 $filename = $this->cache_path
.$token.'.tmp';
369 $fp = @fopen
($filename,"w");
371 $this->progress(HRP_DEBUG
,"Unable to create cache file");
377 $this->progress(HRP_DEBUG
,"HTTP response stored to cache");
380 // (internal) fetch a page from the cache
381 function _cache_fetch($token) {
382 $this->cache_hit
= false;
383 $this->progress(HRP_DEBUG
,"Checking for cached page value");
385 $filename = $this->cache_path
.$token.'.tmp';
386 if (!file_exists($filename)) {
387 $this->progress(HRP_DEBUG
,"Page not available in cache");
391 if (time()-filemtime($filename)>$this->caching
) {
392 $this->progress(HRP_DEBUG
,"Page in cache is expired");
397 if ($values = file_get_contents($filename)) {
398 $values = unserialize($values);
400 $this->progress(HRP_DEBUG
,"Invalid cache contents");
404 $this->stats
= $values["stats"];
405 $this->result_code
= $values["result_code"];
406 $this->result_text
= $values["result_text"];
407 $this->version
= $values["version"];
408 $this->response
= $values["response"];
409 $this->response_headers
= $values["response_headers"];
410 $this->response_cookies
= $values["response_cookies"];
411 $this->raw_response
= $values["raw_response"];
413 $this->progress(HRP_DEBUG
,"Page loaded from cache");
414 $this->cache_hit
= true;
417 $this->progress(HRP_DEBUG
,"Error reading cache file");
422 function parent_path($path) {
423 if (substr($path,0,1)=='/') $path = substr($path,1);
424 if (substr($path,-1)=='/') $path = substr($path,0,strlen($path)-1);
425 $path = explode('/',$path);
427 return count($path) ?
('/' . implode('/',$path)) : '';
430 // $cookies should be an array in one of two formats.
432 // Either: $cookies[ 'cookiename' ] = array (
434 // 'expires'=>time(),
435 // 'domain'=>'yourdomain.com',
436 // 'value'=>'cookievalue'
440 // Or, a more simplified format:
441 // $cookies[ 'cookiename' ] = 'value';
443 // The former format will automatically check to make sure that the path, domain,
444 // and expiration values match the HTTP request, and will only send the cookie if
445 // they do match. The latter will force the cookie to be set for the HTTP request
448 function response_to_request_cookies($cookies,$urlinfo) {
450 // check for simplified cookie format (name=value)
451 $cookiekeys = array_keys($cookies);
452 if (!count($cookiekeys)) return;
454 $testkey = array_pop($cookiekeys);
455 if (!is_array($cookies[ $testkey ])) {
456 foreach ($cookies as $k=>$v) $this->request_cookies
[$k] = $v;
460 // must not be simplified format, so parse as complex format:
461 foreach ($cookies as $name=>$paths) {
462 foreach ($paths as $path=>$values) {
463 // make sure the cookie isn't expired
464 if ( isset($values['expires']) && ($values['expires']<time()) ) continue;
466 $cookiehost = $values['domain'];
467 $requesthost = $urlinfo['host'];
468 // make sure the cookie is valid for this host
470 ($requesthost==$cookiehost) ||
471 (substr($requesthost,-(strlen($cookiehost)+
1))=='.'.$cookiehost)
474 // make sure the cookie is valid for this path
475 $cookiepath = $path; if (substr($cookiepath,-1)!='/') $cookiepath .= '/';
476 $requestpath = $urlinfo['path']; if (substr($requestpath,-1)!='/') $requestpath .= '/';
477 if (substr($requestpath,0,strlen($cookiepath))!=$cookiepath) continue;
479 $this->request_cookies
[$name] = $values['value'];
484 // Execute the request for a particular URL, and transparently follow
485 // HTTP redirects if enabled. If $cookies is specified, it is assumed
486 // to be an array received from $this->response_cookies and will be
487 // processed to determine which cookies are valid for this host/URL.
488 function _execute_request($url,$cookies = false) {
489 // valid codes for which we transparently follow a redirect
490 $redirect_codes = array(301,302,303,307);
491 // valid methods for which we transparently follow a redirect
492 $redirect_methods = array('GET','HEAD');
494 $request_result = false;
496 $this->followed_redirect
= false;
497 $this->response_cookies
= array();
499 $previous_redirects = array();
502 $request_result = $this->_send_request($url,$cookies);
506 // see if a redirect code was received
507 if ($this->follow_redirects
&& in_array($this->result_code
,$redirect_codes)) {
509 // only redirect on a code 303 or if the method was GET/HEAD
510 if ( ($this->result_code
==303) ||
in_array($this->method
,$redirect_methods) ) {
512 // parse the information from the OLD URL so that we can handle
514 $oldurlinfo = parse_url($lasturl);
516 $url = $this->response_headers
['Location'];
518 // parse the information in the new URL, and fill in any blanks
519 // using values from the old URL
520 $urlinfo = parse_url($url);
521 foreach ($oldurlinfo as $k=>$v) {
522 if (!$urlinfo[$k]) $urlinfo[$k] = $v;
525 // create an absolute path
526 if (substr($urlinfo['path'],0,1)!='/') {
527 $baseurl = $oldurlinfo['path'];
528 if (substr($baseurl,-1)!='/') $baseurl = $this->parent_path($url) . '/';
529 $urlinfo['path'] = $baseurl . $urlinfo['path'];
533 $url = $this->rebuild_url($urlinfo);
535 $this->progress(HRP_INFO
,'Redirected to '.$url);
539 if ( $url && strlen($url) ) {
541 if (isset($previous_redirects[$url])) {
542 $this->error
= "Infinite redirection loop";
543 $request_result = false;
546 if ( is_numeric($this->follow_redirects
) && (count($previous_redirects)>$this->follow_redirects
) ) {
547 $this->error
= "Exceeded redirection limit";
548 $request_result = false;
552 $previous_redirects[$url] = true;
555 } while ($url && strlen($url));
557 // clear headers that shouldn't persist across multiple requests
558 $per_request_headers = array('Host','Content-Length');
559 foreach ($per_request_headers as $k=>$v) unset($this->headers
[$v]);
561 if (count($previous_redirects)>1) $this->followed_redirect
= array_keys($previous_redirects);
563 return $request_result;
566 // private - sends an HTTP request to $url
567 function _send_request($url,$cookies = false) {
568 $this->progress(HRP_INFO
,"Initiating {$this->method} request for $url");
569 if ($this->caching
) {
570 $cachetoken = md5($url.'|'.$this->post_data
);
571 if ($this->_cache_fetch($cachetoken)) return true;
574 $time_request_start = $this->getmicrotime();
576 $urldata = parse_url($url);
577 $http_host = $urldata['host'] . (isset($urldata['port']) ?
':'.$urldata['port'] : '');
579 if (!isset($urldata["port"]) ||
!$urldata["port"]) $urldata["port"] = ($urldata["scheme"]=="https") ?
443 : 80;
580 if (!isset($urldata["path"]) ||
!$urldata["path"]) $urldata["path"] = '/';
582 if (!empty($urldata['user'])) $this->auth_username
= $urldata['user'];
583 if (!empty($urldata['pass'])) $this->auth_password
= $urldata['pass'];
585 //echo "Sending HTTP/{$this->version} {$this->method} request for ".$urldata["host"].":".$urldata["port"]." page ".$urldata["path"]."<br>";
587 if ($this->version
>"1.0") $this->headers
["Host"] = $http_host;
588 if ($this->method
=="POST") {
589 $this->headers
["Content-Length"] = strlen($this->post_data
);
590 if (!isset($this->headers
["Content-Type"])) $this->headers
["Content-Type"] = "application/x-www-form-urlencoded";
593 if ( !empty($this->auth_username
) ||
!empty($this->auth_password
) ) {
594 $this->headers
['Authorization'] = 'Basic '.base64_encode($this->auth_username
.':'.$this->auth_password
);
596 unset($this->headers
['Authorization']);
599 if (is_array($cookies)) {
600 $this->response_to_request_cookies($cookies,$urldata);
603 if (($this->method
=="GET") && (!empty($urldata["query"]))) $urldata["path"] .= "?".$urldata["query"];
604 $request = $this->method
." ".$urldata["path"]." HTTP/".$this->version
."\r\n";
605 $request .= $this->build_headers();
606 $request .= $this->post_data
;
608 $this->response
= "";
610 // Native SSL support requires the OpenSSL extension, and was introduced in PHP 4.3.0
611 $php_ssl_support = extension_loaded("openssl") && version_compare(phpversion(),"4.3.0")>=0;
613 // if this is a plain HTTP request, or if it's an HTTPS request and OpenSSL support is available,
614 // natively perform the HTTP request
615 if ( ( ($urldata["scheme"]=="http") ||
($php_ssl_support && ($urldata["scheme"]=="https")) ) && (!$this->force_curl
) ) {
618 $hostname = $this->connect_ip ?
$this->connect_ip
: $urldata['host'];
619 if ($urldata["scheme"]=="https") $hostname = 'ssl://'.$hostname;
621 $time_connect_start = $this->getmicrotime();
623 $this->progress(HRP_INFO
,'Opening socket connection to '.$hostname.' port '.$urldata['port']);
625 $this->expected_bytes
= -1;
626 $this->received_bytes
= 0;
628 $fp = @fsockopen
($hostname,$urldata["port"],$errno,$errstr,$this->connect_timeout
);
629 $time_connected = $this->getmicrotime();
630 $connect_time = $time_connected - $time_connect_start;
632 if ($this->stream_timeout
) stream_set_timeout($fp,$this->stream_timeout
);
633 $this->progress(HRP_INFO
,"Connected; sending request");
635 $this->progress(HRP_DEBUG
,$request);
636 fputs ($fp, $request);
637 $this->raw_request
= $request;
639 if ($this->stream_timeout
) {
640 $meta = socket_get_status($fp);
641 if ($meta['timed_out']) {
642 $this->error
= "Exceeded socket write timeout of ".$this->stream_timeout
." seconds";
643 $this->progress(HRP_ERROR
,$this->error
);
648 $this->progress(HRP_INFO
,"Request sent; awaiting reply");
650 $headers_received = false;
651 $data_length = false;
656 if ($data_length>0) {
657 $line = fread($fp,$data_length);
658 $data_length -= strlen($line);
660 $line = @fgets
($fp,10240);
663 if (!strlen($line)) continue;
665 list($data_length,) = explode(';',$line);
666 $data_length = (int) hexdec(trim($data_length));
668 if ($data_length==0) {
669 $this->progress(HRP_DEBUG
,"Done");
670 // end of chunked data
673 $this->progress(HRP_DEBUG
,"Chunk length $data_length (0x$line)");
678 $this->response
.= $line;
681 if ($headers_received) {
682 if ($time_connected>0) {
683 $time_firstdata = $this->getmicrotime();
684 $process_time = $time_firstdata - $time_connected;
687 $this->received_bytes +
= strlen($line);
688 if ($iterations %
20 == 0) {
689 $this->update_transfer_counters();
694 // some dumbass webservers don't respect Connection: close and just
695 // leave the connection open, so we have to be diligent about
696 // calculating the content length so we can disconnect at the end of
698 if ( (!$headers_received) && (trim($line)=="") ) {
699 $headers_received = true;
701 if (preg_match('/^Content-Length: ([0-9]+)/im',$this->response
,$matches)) {
702 $data_length = (int) $matches[1];
703 $this->progress(HRP_DEBUG
,"Content length is $data_length");
704 $this->expected_bytes
= $data_length;
705 $this->update_transfer_counters();
707 if (preg_match("/^Transfer-Encoding: chunked/im",$this->response
,$matches)) {
709 $this->progress(HRP_DEBUG
,"Chunked transfer encoding requested");
712 if (preg_match_all("/^Set-Cookie: ((.*?)\=(.*?)(?:;\s*(.*))?)$/im",$this->response
,$cookielist,PREG_SET_ORDER
)) {
713 // get the path for which cookies will be valid if no path is specified
714 $cookiepath = preg_replace('/\/{2,}/','',$urldata['path']);
715 if (substr($cookiepath,-1)!='/') {
716 $cookiepath = explode('/',$cookiepath);
717 array_pop($cookiepath);
718 $cookiepath = implode('/',$cookiepath) . '/';
720 // process each cookie
721 foreach ($cookielist as $k=>$cookiedata) {
722 list(,$rawcookie,$name,$value,$attributedata) = $cookiedata;
723 $attributedata = explode(';',trim($attributedata));
724 $attributes = array();
728 'raw'=>trim($rawcookie),
730 foreach ($attributedata as $k=>$attribute) {
731 list($attrname,$attrvalue) = explode('=',trim($attribute));
732 $cookie[$attrname] = $attrvalue;
735 if (!isset($cookie['domain']) ||
!$cookie['domain']) $cookie['domain'] = $urldata['host'];
736 if (!isset($cookie['path']) ||
!$cookie['path']) $cookie['path'] = $cookiepath;
737 if (isset($cookie['expires']) && $cookie['expires']) $cookie['expires'] = strtotime($cookie['expires']);
739 if (!$this->validate_response_cookie($cookie,$urldata['host'])) continue;
741 // do not store expired cookies; if one exists, unset it
742 if ( isset($cookie['expires']) && ($cookie['expires']<time()) ) {
743 unset($this->response_cookies
[ $name ][ $cookie['path'] ]);
747 $this->response_cookies
[ $name ][ $cookie['path'] ] = $cookie;
752 //$this->progress(HRP_INFO,"Next [$line]");
753 if ($this->stream_timeout
) {
754 $meta = socket_get_status($fp);
755 if ($meta['timed_out']) {
756 $this->error
= "Exceeded socket read timeout of ".$this->stream_timeout
." seconds";
757 $this->progress(HRP_ERROR
,$this->error
);
762 // check time limits if requested
763 if ($this->max_time
>0) {
764 if ($this->getmicrotime() - $time_request_start > $this->max_time
) {
765 $this->error
= "Exceeded maximum transfer time of ".$this->max_time
." seconds";
766 $this->progress(HRP_ERROR
,$this->error
);
771 if ($this->result_close
) {
772 if (preg_match_all("/HTTP\/([0-9\.]+) ([0-9]+) (.*?)[\r\n]/",$this->response
,$matches)) {
773 $resultcodes = $matches[2];
774 foreach ($resultcodes as $k=>$code) {
776 $this->progress(HRP_INFO
,'HTTP result code received; closing connection');
778 $this->result_code
= $code;
779 $this->result_text
= $matches[3][$k];
782 return ($this->result_code
==200);
790 $this->update_transfer_counters();
792 if (is_array($this->response_cookies
)) {
793 // make sure paths are sorted in the order in which they should be applied
794 // when setting response cookies
795 foreach ($this->response_cookies
as $name=>$paths) {
796 ksort($this->response_cookies
[$name]);
799 $this->progress(HRP_INFO
,'Request complete');
801 $this->error
= strtoupper($urldata["scheme"])." connection to ".$hostname." port ".$urldata["port"]." failed";
802 $this->progress(HRP_ERROR
,$this->error
);
806 // perform an HTTP/HTTPS request using CURL
807 } elseif ( !$this->disable_curl
&& ( ($urldata["scheme"]=="https") ||
($this->force_curl
) ) ) {
808 $this->progress(HRP_INFO
,'Passing HTTP request for $url to CURL');
810 if (!$this->_curl_request($url)) return false;
814 $this->error
= "Unsupported protocol: ".$urldata["scheme"];
815 $this->progress(HRP_ERROR
,$this->error
);
819 $this->raw_response
= $this->response
;
821 $totallength = strlen($this->response
);
824 $headerlength = strpos($this->response
,"\r\n\r\n");
826 $response_headers = explode("\r\n",substr($this->response
,0,$headerlength));
827 $http_status = trim(array_shift($response_headers));
828 foreach ($response_headers as $line) {
829 list($k,$v) = explode(":",$line,2);
830 $this->response_headers
[trim($k)] = trim($v);
832 $this->response
= substr($this->response
,$headerlength+
4);
834 /* // Handled in-transfer now
835 if (($this->response_headers['Transfer-Encoding']=="chunked") && (!$curl_mode)) {
836 $this->remove_chunkiness();
840 if (!preg_match("/^HTTP\/([0-9\.]+) ([0-9]+) (.*?)$/",$http_status,$matches)) {
841 $matches = array("",$this->version
,0,"HTTP request error");
843 list (,$response_version,$this->result_code
,$this->result_text
) = $matches;
845 // skip HTTP result code 100 (Continue) responses
846 } while (($this->result_code
==100) && ($headerlength));
848 // record some statistics, roughly compatible with CURL's curl_getinfo()
850 $total_time = $this->getmicrotime() - $time_request_start;
851 $transfer_time = $total_time - $connect_time;
852 $this->stats
= array(
853 "total_time"=>$total_time,
854 "connect_time"=>$connect_time, // time between connection request and connection established
855 "process_time"=>$process_time, // time between HTTP request and first data (non-headers) received
857 "content_type"=>$this->response_headers
["Content-Type"],
858 "http_code"=>$this->result_code
,
859 "header_size"=>$headerlength,
860 "request_size"=>$totallength,
861 "filetime"=>strtotime($this->response_headers
["Date"]),
862 "pretransfer_time"=>$connect_time,
863 "size_download"=>$totallength,
864 "speed_download"=>$transfer_time > 0 ?
round($totallength / $transfer_time) : 0,
865 "download_content_length"=>$totallength,
866 "upload_content_length"=>0,
867 "starttransfer_time"=>$connect_time,
872 $ok = ($this->result_code
==200);
873 if ($ok && $this->caching
) $this->_cache_store($cachetoken);
878 function validate_response_cookie($cookie,$actual_hostname) {
879 // make sure the cookie can't be set for a TLD, eg: '.com'
880 $cookiehost = $cookie['domain'];
881 $p = strrpos($cookiehost,'.');
882 if ($p===false) return false;
884 $tld = strtolower(substr($cookiehost,$p+
1));
885 $special_domains = array("com", "edu", "net", "org", "gov", "mil", "int");
886 $periods_required = in_array($tld,$special_domains) ?
1 : 2;
888 $periods = substr_count($cookiehost,'.');
889 if ($periods<$periods_required) return false;
891 if (substr($actual_hostname,0,1)!='.') $actual_hostname = '.'.$actual_hostname;
892 if (substr($cookiehost,0,1)!='.') $cookiehost = '.'.$cookiehost;
894 ($actual_hostname==$cookiehost) ||
895 (substr($actual_hostname,-strlen($cookiehost))==$cookiehost)
898 return $domain_match;
902 function build_headers() {
904 foreach ($this->headers
as $name=>$value) {
905 $value = trim($value);
906 if (empty($value)) continue;
907 $headers .= "{$name}: {$value}\r\n";
910 if (isset($this->request_cookies
) && is_array($this->request_cookies
)) {
911 $cookielist = array();
912 foreach ($this->request_cookies
as $name=>$value) {
913 $cookielist[] = "{$name}={$value}";
915 if (count($cookielist)) $headers .= "Cookie: ".implode('; ',$cookielist)."\r\n";
924 // opposite of parse_url()
925 function rebuild_url($urlinfo) {
926 $url = $urlinfo['scheme'].'://';
928 if ($urlinfo['user'] ||
$urlinfo['pass']) {
929 $url .= $urlinfo['user'];
930 if ($urlinfo['pass']) {
931 if ($urlinfo['user']) $url .= ':';
932 $url .= $urlinfo['pass'];
937 $url .= $urlinfo['host'];
938 if ($urlinfo['port']) $url .= ':'.$urlinfo['port'];
940 $url .= $urlinfo['path'];
942 if ($urlinfo['query']) $url .= '?'.$urlinfo['query'];
943 if ($urlinfo['fragment']) $url .= '#'.$urlinfo['fragment'];
948 function _replace_hostname(&$url,$new_hostname) {
949 $parts = parse_url($url);
950 $old_hostname = $parts['host'];
952 $parts['host'] = $new_hostname;
954 $url = $this->rebuild_url($parts);
956 return $old_hostname;
959 function _curl_request($url) {
960 $this->error
= false;
962 // if a direct connection IP address was specified, replace the hostname
963 // in the URL with the IP address, and set the Host: header to the
965 if ($this->connect_ip
) {
966 $old_hostname = $this->_replace_hostname($url,$this->connect_ip
);
967 $this->headers
["Host"] = $old_hostname;
971 unset($this->headers
["Content-Length"]);
972 $headers = explode("\n",$this->build_headers());
975 curl_setopt($ch,CURLOPT_URL
, $url);
976 curl_setopt($ch,CURLOPT_USERAGENT
, $this->headers
["User-Agent"]);
977 curl_setopt($ch,CURLOPT_HEADER
, 1);
978 curl_setopt($ch,CURLOPT_RETURNTRANSFER
, 1);
979 // curl_setopt($ch,CURLOPT_FOLLOWLOCATION, 1); // native method doesn't support this yet, so it's disabled for consistency
980 curl_setopt($ch,CURLOPT_TIMEOUT
, 10);
981 curl_setopt($ch,CURLOPT_HTTPHEADER
, $headers);
983 if ($this->method
=="POST") {
984 curl_setopt($ch,CURLOPT_POST
,1);
985 curl_setopt($ch,CURLOPT_POSTFIELDS
,$this->post_data
);
987 if ($this->insecure_ssl
) {
988 curl_setopt($ch,CURLOPT_SSL_VERIFYPEER
,0);
990 if ($this->ignore_ssl_hostname
) {
991 curl_setopt($ch,CURLOPT_SSL_VERIFYHOST
,1);
994 $this->response
= curl_exec ($ch);
995 if (curl_errno($ch)!=0) {
996 $this->error
= "CURL error #".curl_errno($ch).": ".curl_error($ch);
999 $this->stats
= curl_getinfo($ch);
1002 return ($this->error
=== false);
1005 function progress($level,$msg) {
1006 if (is_callable($this->progress_callback
)) call_user_func($this->progress_callback
,$level,$msg);
1009 // Gets any available HTTPRetriever error message (including both internal
1010 // errors and HTTP errors)
1011 function get_error() {
1012 return $this->error ?
$this->error
: 'HTTP ' . $this->result_code
.': '.$this->result_text
;
1015 function get_content_type() {
1016 if (!$ctype = $this->response_headers
['Content-Type']) {
1017 $ctype = $this->response_headers
['Content-type'];
1019 list($ctype,) = explode(';',$ctype);
1021 return strtolower($ctype);
1024 function update_transfer_counters() {
1025 if (is_callable($this->transfer_callback
)) call_user_func($this->transfer_callback
,$this->received_bytes
,$this->expected_bytes
);
1028 function set_transfer_display($enabled = true) {
1030 $this->transfer_callback
= array(&$this,'default_transfer_callback');
1032 unset($this->transfer_callback
);
1036 function set_progress_display($enabled = true) {
1038 $this->progress_callback
= array(&$this,'default_progress_callback');
1040 unset($this->progress_callback
);
1044 function default_progress_callback($severity,$message) {
1045 $severities = array(
1051 echo date('Y-m-d H:i:sa').' ['.$severities[$severity].'] '.$message."\n";
1055 function default_transfer_callback($transferred,$expected) {
1056 $msg = "Transferred " . round($transferred/1024,1);
1057 if ($expected>=0) $msg .= "/" . round($expected/1024,1);
1059 if ($expected>0) $msg .= " (".round($transferred*100/$expected,1)."%)";
1060 echo date('Y-m-d H:i:sa')." $msg\n";
1064 function getmicrotime() {
1065 list($usec, $sec) = explode(" ",microtime());
1066 return ((float)$usec +
(float)$sec);