MDL-11082 Improved groups upgrade performance 1.8x -> 1.9; thanks Eloy for telling...
[moodle-pu.git] / lib / snoopy / Snoopy.class.inc
blob394a3c676d69911a7b6ab55bf06383aaea32e8d7
1 <?php
3 /*************************************************
5 Snoopy - the PHP net client
6 Author: Monte Ohrt <monte@ispi.net>
7 Copyright (c): 1999-2000 ispi, all rights reserved
8 Version: 1.01
9 (Note: v1.2.3 - var $agent="Snoopy v1.2.3";// agent we masquerade as)
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with this library; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25 You may contact the author of Snoopy by e-mail at:
26 monte@ispi.net
28 Or, write to:
29 Monte Ohrt
30 CTO, ispi
31 237 S. 70th suite 220
32 Lincoln, NE 68510
34 The latest version of Snoopy can be obtained from:
35 http://snoopy.sourceforge.net/
37 *************************************************/
39 class Snoopy
41     /**** Public variables ****/
42     
43     /* user definable vars */
45     var $host            =    "www.php.net";        // host name we are connecting to
46     var $port            =    80;                    // port we are connecting to
47     var $proxy_host        =    "";                    // proxy host to use
48     var $proxy_port        =    "";                    // proxy port to use
49     var $proxy_user        =    "";                    // proxy user to use
50     var $proxy_pass        =    "";                    // proxy password to use
51     
52     var $agent            =    "Snoopy v1.2.3";    // agent we masquerade as
53     var    $referer        =    "";                    // referer info to pass
54     var $cookies        =    array();            // array of cookies to pass
55                                                 // $cookies["username"]="joe";
56     var    $rawheaders        =    array();            // array of raw headers to send
57                                                 // $rawheaders["Content-type"]="text/html";
59     var $maxredirs        =    5;                    // http redirection depth maximum. 0 = disallow
60     var $lastredirectaddr    =    "";                // contains address of last redirected address
61     var    $offsiteok        =    true;                // allows redirection off-site
62     var $maxframes        =    0;                    // frame content depth maximum. 0 = disallow
63     var $expandlinks    =    true;                // expand links to fully qualified URLs.
64                                                 // this only applies to fetchlinks()
65                                                 // submitlinks(), and submittext()
66     var $passcookies    =    true;                // pass set cookies back through redirects
67                                                 // NOTE: this currently does not respect
68                                                 // dates, domains or paths.
69     
70     var    $user            =    "";                    // user for http authentication
71     var    $pass            =    "";                    // password for http authentication
72     
73     // http accept types
74     var $accept            =    "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
75     
76     var $results        =    "";                    // where the content is put
77         
78     var $error            =    "";                    // error messages sent here
79     var    $response_code    =    "";                    // response code returned from server
80     var    $headers        =    array();            // headers returned from server sent here
81     var    $maxlength        =    500000;                // max return data length (body)
82     var $read_timeout    =    0;                    // timeout on read operations, in seconds
83                                                 // supported only since PHP 4 Beta 4
84                                                 // set to 0 to disallow timeouts
85     var $timed_out        =    false;                // if a read operation timed out
86     var    $status            =    0;                    // http request status
88     var $temp_dir        =    "/tmp";                // temporary directory that the webserver
89                                                 // has permission to write to.
90                                                 // under Windows, this should be C:\temp
92     var    $curl_path        =    "/usr/local/bin/curl";
93                                                 // Snoopy will use cURL for fetching
94                                                 // SSL content if a full system path to
95                                                 // the cURL binary is supplied here.
96                                                 // set to false if you do not have
97                                                 // cURL installed. See http://curl.haxx.se
98                                                 // for details on installing cURL.
99                                                 // Snoopy does *not* use the cURL
100                                                 // library functions built into php,
101                                                 // as these functions are not stable
102                                                 // as of this Snoopy release.
103     
104     /**** Private variables ****/    
105     
106     var    $_maxlinelen    =    4096;                // max line length (headers)
107     
108     var $_httpmethod    =    "GET";                // default http request method
109     var $_httpversion    =    "HTTP/1.0";            // default http request version
110     var $_submit_method    =    "POST";                // default submit method
111     var $_submit_type    =    "application/x-www-form-urlencoded";    // default submit type
112     var $_mime_boundary    =   "";                    // MIME boundary for multipart/form-data submit type
113     var $_redirectaddr    =    false;                // will be set if page fetched is a redirect
114     var $_redirectdepth    =    0;                    // increments on an http redirect
115     var $_frameurls        =     array();            // frame src urls
116     var $_framedepth    =    0;                    // increments on frame depth
117     
118     var $_isproxy        =    false;                // set if using a proxy server
119     var $_fp_timeout    =    30;                    // timeout for socket connection
121 /*======================================================================*\
122     Function:    fetch
123     Purpose:    fetch the contents of a web page
124                 (and possibly other protocols in the
125                 future like ftp, nntp, gopher, etc.)
126     Input:        $URI    the location of the page to fetch
127     Output:        $this->results    the output text from the fetch
128 \*======================================================================*/
130     function fetch($URI)
131     {
132     
133         //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
134         $URI_PARTS = parse_url($URI);
135         if (!empty($URI_PARTS["user"]))
136             $this->user = $URI_PARTS["user"];
137         if (!empty($URI_PARTS["pass"]))
138             $this->pass = $URI_PARTS["pass"];
139         if (empty($URI_PARTS["query"]))
140             $URI_PARTS["query"] = '';
141         if (empty($URI_PARTS["path"]))
142             $URI_PARTS["path"] = '';
143                 
144         switch(strtolower($URI_PARTS["scheme"]))
145         {
146             case "http":
147                 $this->host = $URI_PARTS["host"];
148                 if(!empty($URI_PARTS["port"]))
149                     $this->port = $URI_PARTS["port"];
150                 if($this->_connect($fp))
151                 {
152                     if($this->_isproxy)
153                     {
154                         // using proxy, send entire URI
155                         $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
156                     }
157                     else
158                     {
159                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
160                         // no proxy, send only the path
161                         $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
162                     }
163                     
164                     $this->_disconnect($fp);
166                     if($this->_redirectaddr)
167                     {
168                         /* url was redirected, check if we've hit the max depth */
169                         if($this->maxredirs > $this->_redirectdepth)
170                         {
171                             // only follow redirect if it's on this site, or offsiteok is true
172                             if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
173                             {
174                                 /* follow the redirect */
175                                 $this->_redirectdepth++;
176                                 $this->lastredirectaddr=$this->_redirectaddr;
177                                 $this->fetch($this->_redirectaddr);
178                             }
179                         }
180                     }
182                     if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
183                     {
184                         $frameurls = $this->_frameurls;
185                         $this->_frameurls = array();
186                         
187                         while(list(,$frameurl) = each($frameurls))
188                         {
189                             if($this->_framedepth < $this->maxframes)
190                             {
191                                 $this->fetch($frameurl);
192                                 $this->_framedepth++;
193                             }
194                             else
195                                 break;
196                         }
197                     }                    
198                 }
199                 else
200                 {
201                     return false;
202                 }
203                 return true;                    
204                 break;
205             case "https":
206                 if(!$this->curl_path)
207                     return false;
208                 if(function_exists("is_executable"))
209                     if (!is_executable($this->curl_path))
210                         return false;
211                 $this->host = $URI_PARTS["host"];
212                 if(!empty($URI_PARTS["port"]))
213                     $this->port = $URI_PARTS["port"];
214                 if($this->_isproxy)
215                 {
216                     // using proxy, send entire URI
217                     $this->_httpsrequest($URI,$URI,$this->_httpmethod);
218                 }
219                 else
220                 {
221                     $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
222                     // no proxy, send only the path
223                     $this->_httpsrequest($path, $URI, $this->_httpmethod);
224                 }
226                 if($this->_redirectaddr)
227                 {
228                     /* url was redirected, check if we've hit the max depth */
229                     if($this->maxredirs > $this->_redirectdepth)
230                     {
231                         // only follow redirect if it's on this site, or offsiteok is true
232                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
233                         {
234                             /* follow the redirect */
235                             $this->_redirectdepth++;
236                             $this->lastredirectaddr=$this->_redirectaddr;
237                             $this->fetch($this->_redirectaddr);
238                         }
239                     }
240                 }
242                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
243                 {
244                     $frameurls = $this->_frameurls;
245                     $this->_frameurls = array();
247                     while(list(,$frameurl) = each($frameurls))
248                     {
249                         if($this->_framedepth < $this->maxframes)
250                         {
251                             $this->fetch($frameurl);
252                             $this->_framedepth++;
253                         }
254                         else
255                             break;
256                     }
257                 }                    
258                 return true;                    
259                 break;
260             default:
261                 // not a valid protocol
262                 $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
263                 return false;
264                 break;
265         }        
266         return true;
267     }
269 /*======================================================================*\
270     Function:    submit
271     Purpose:    submit an http form
272     Input:        $URI    the location to post the data
273                 $formvars    the formvars to use.
274                     format: $formvars["var"] = "val";
275                 $formfiles  an array of files to submit
276                     format: $formfiles["var"] = "/dir/filename.ext";
277     Output:        $this->results    the text output from the post
278 \*======================================================================*/
280     function submit($URI, $formvars="", $formfiles="")
281     {
282         unset($postdata);
283         
284         $postdata = $this->_prepare_post_body($formvars, $formfiles);
285             
286         $URI_PARTS = parse_url($URI);
287         if (!empty($URI_PARTS["user"]))
288             $this->user = $URI_PARTS["user"];
289         if (!empty($URI_PARTS["pass"]))
290             $this->pass = $URI_PARTS["pass"];
291         if (empty($URI_PARTS["query"]))
292             $URI_PARTS["query"] = '';
293         if (empty($URI_PARTS["path"]))
294             $URI_PARTS["path"] = '';
296         switch(strtolower($URI_PARTS["scheme"]))
297         {
298             case "http":
299                 $this->host = $URI_PARTS["host"];
300                 if(!empty($URI_PARTS["port"]))
301                     $this->port = $URI_PARTS["port"];
302                 if($this->_connect($fp))
303                 {
304                     if($this->_isproxy)
305                     {
306                         // using proxy, send entire URI
307                         $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
308                     }
309                     else
310                     {
311                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
312                         // no proxy, send only the path
313                         $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
314                     }
315                     
316                     $this->_disconnect($fp);
318                     if($this->_redirectaddr)
319                     {
320                         /* url was redirected, check if we've hit the max depth */
321                         if($this->maxredirs > $this->_redirectdepth)
322                         {                        
323                             if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
324                                 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);                        
325                             
326                             // only follow redirect if it's on this site, or offsiteok is true
327                             if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
328                             {
329                                 /* follow the redirect */
330                                 $this->_redirectdepth++;
331                                 $this->lastredirectaddr=$this->_redirectaddr;
332                                 if( strpos( $this->_redirectaddr, "?" ) > 0 )
333                                     $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
334                                 else
335                                     $this->submit($this->_redirectaddr,$formvars, $formfiles);
336                             }
337                         }
338                     }
340                     if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
341                     {
342                         $frameurls = $this->_frameurls;
343                         $this->_frameurls = array();
344                         
345                         while(list(,$frameurl) = each($frameurls))
346                         {                                                        
347                             if($this->_framedepth < $this->maxframes)
348                             {
349                                 $this->fetch($frameurl);
350                                 $this->_framedepth++;
351                             }
352                             else
353                                 break;
354                         }
355                     }                    
356                     
357                 }
358                 else
359                 {
360                     return false;
361                 }
362                 return true;                    
363                 break;
364             case "https":
365                 if(!$this->curl_path)
366                     return false;
367                 if(function_exists("is_executable"))
368                     if (!is_executable($this->curl_path))
369                         return false;
370                 $this->host = $URI_PARTS["host"];
371                 if(!empty($URI_PARTS["port"]))
372                     $this->port = $URI_PARTS["port"];
373                 if($this->_isproxy)
374                 {
375                     // using proxy, send entire URI
376                     $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
377                 }
378                 else
379                 {
380                     $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
381                     // no proxy, send only the path
382                     $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
383                 }
385                 if($this->_redirectaddr)
386                 {
387                     /* url was redirected, check if we've hit the max depth */
388                     if($this->maxredirs > $this->_redirectdepth)
389                     {                        
390                         if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
391                             $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);                        
393                         // only follow redirect if it's on this site, or offsiteok is true
394                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
395                         {
396                             /* follow the redirect */
397                             $this->_redirectdepth++;
398                             $this->lastredirectaddr=$this->_redirectaddr;
399                             if( strpos( $this->_redirectaddr, "?" ) > 0 )
400                                 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
401                             else
402                                 $this->submit($this->_redirectaddr,$formvars, $formfiles);
403                         }
404                     }
405                 }
407                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
408                 {
409                     $frameurls = $this->_frameurls;
410                     $this->_frameurls = array();
412                     while(list(,$frameurl) = each($frameurls))
413                     {                                                        
414                         if($this->_framedepth < $this->maxframes)
415                         {
416                             $this->fetch($frameurl);
417                             $this->_framedepth++;
418                         }
419                         else
420                             break;
421                     }
422                 }                    
423                 return true;                    
424                 break;
425                 
426             default:
427                 // not a valid protocol
428                 $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
429                 return false;
430                 break;
431         }        
432         return true;
433     }
435 /*======================================================================*\
436     Function:    fetchlinks
437     Purpose:    fetch the links from a web page
438     Input:        $URI    where you are fetching from
439     Output:        $this->results    an array of the URLs
440 \*======================================================================*/
442     function fetchlinks($URI)
443     {
444         if ($this->fetch($URI))
445         {            
446             if($this->lastredirectaddr)
447                 $URI = $this->lastredirectaddr;
448             if(is_array($this->results))
449             {
450                 for($x=0;$x<count($this->results);$x++)
451                     $this->results[$x] = $this->_striplinks($this->results[$x]);
452             }
453             else
454                 $this->results = $this->_striplinks($this->results);
456             if($this->expandlinks)
457                 $this->results = $this->_expandlinks($this->results, $URI);
458             return true;
459         }
460         else
461             return false;
462     }
464 /*======================================================================*\
465     Function:    fetchform
466     Purpose:    fetch the form elements from a web page
467     Input:        $URI    where you are fetching from
468     Output:        $this->results    the resulting html form
469 \*======================================================================*/
471     function fetchform($URI)
472     {
473         
474         if ($this->fetch($URI))
475         {            
477             if(is_array($this->results))
478             {
479                 for($x=0;$x<count($this->results);$x++)
480                     $this->results[$x] = $this->_stripform($this->results[$x]);
481             }
482             else
483                 $this->results = $this->_stripform($this->results);
484             
485             return true;
486         }
487         else
488             return false;
489     }
490     
491     
492 /*======================================================================*\
493     Function:    fetchtext
494     Purpose:    fetch the text from a web page, stripping the links
495     Input:        $URI    where you are fetching from
496     Output:        $this->results    the text from the web page
497 \*======================================================================*/
499     function fetchtext($URI)
500     {
501         if($this->fetch($URI))
502         {            
503             if(is_array($this->results))
504             {
505                 for($x=0;$x<count($this->results);$x++)
506                     $this->results[$x] = $this->_striptext($this->results[$x]);
507             }
508             else
509                 $this->results = $this->_striptext($this->results);
510             return true;
511         }
512         else
513             return false;
514     }
516 /*======================================================================*\
517     Function:    submitlinks
518     Purpose:    grab links from a form submission
519     Input:        $URI    where you are submitting from
520     Output:        $this->results    an array of the links from the post
521 \*======================================================================*/
523     function submitlinks($URI, $formvars="", $formfiles="")
524     {
525         if($this->submit($URI,$formvars, $formfiles))
526         {            
527             if($this->lastredirectaddr)
528                 $URI = $this->lastredirectaddr;
529             if(is_array($this->results))
530             {
531                 for($x=0;$x<count($this->results);$x++)
532                 {
533                     $this->results[$x] = $this->_striplinks($this->results[$x]);
534                     if($this->expandlinks)
535                         $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
536                 }
537             }
538             else
539             {
540                 $this->results = $this->_striplinks($this->results);
541                 if($this->expandlinks)
542                     $this->results = $this->_expandlinks($this->results,$URI);
543             }
544             return true;
545         }
546         else
547             return false;
548     }
550 /*======================================================================*\
551     Function:    submittext
552     Purpose:    grab text from a form submission
553     Input:        $URI    where you are submitting from
554     Output:        $this->results    the text from the web page
555 \*======================================================================*/
557     function submittext($URI, $formvars = "", $formfiles = "")
558     {
559         if($this->submit($URI,$formvars, $formfiles))
560         {            
561             if($this->lastredirectaddr)
562                 $URI = $this->lastredirectaddr;
563             if(is_array($this->results))
564             {
565                 for($x=0;$x<count($this->results);$x++)
566                 {
567                     $this->results[$x] = $this->_striptext($this->results[$x]);
568                     if($this->expandlinks)
569                         $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
570                 }
571             }
572             else
573             {
574                 $this->results = $this->_striptext($this->results);
575                 if($this->expandlinks)
576                     $this->results = $this->_expandlinks($this->results,$URI);
577             }
578             return true;
579         }
580         else
581             return false;
582     }
584     
586 /*======================================================================*\
587     Function:    set_submit_multipart
588     Purpose:    Set the form submission content type to
589                 multipart/form-data
590 \*======================================================================*/
591     function set_submit_multipart()
592     {
593         $this->_submit_type = "multipart/form-data";
594     }
596     
597 /*======================================================================*\
598     Function:    set_submit_normal
599     Purpose:    Set the form submission content type to
600                 application/x-www-form-urlencoded
601 \*======================================================================*/
602     function set_submit_normal()
603     {
604         $this->_submit_type = "application/x-www-form-urlencoded";
605     }
607     
608     
610 /*======================================================================*\
611     Private functions
612 \*======================================================================*/
613     
614     
615 /*======================================================================*\
616     Function:    _striplinks
617     Purpose:    strip the hyperlinks from an html document
618     Input:        $document    document to strip.
619     Output:        $match        an array of the links
620 \*======================================================================*/
622     function _striplinks($document)
623     {    
624         preg_match_all("'<\s*a\s.*?href\s*=\s*            # find <a href=
625                         ([\"\'])?                    # find single or double quote
626                         (?(1) (.*?)\\1 | ([^\s\>]+))        # if quote found, match up to next matching
627                                                     # quote, otherwise match up to next space
628                         'isx",$document,$links);
629                         
631         // catenate the non-empty matches from the conditional subpattern
633         while(list($key,$val) = each($links[2]))
634         {
635             if(!empty($val))
636                 $match[] = $val;
637         }                
638         
639         while(list($key,$val) = each($links[3]))
640         {
641             if(!empty($val))
642                 $match[] = $val;
643         }        
644         
645         // return the links
646         return $match;
647     }
649 /*======================================================================*\
650     Function:    _stripform
651     Purpose:    strip the form elements from an html document
652     Input:        $document    document to strip.
653     Output:        $match        an array of the links
654 \*======================================================================*/
656     function _stripform($document)
657     {    
658         preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
659         
660         // catenate the matches
661         $match = implode("\r\n",$elements[0]);
662                 
663         // return the links
664         return $match;
665     }
667     
668     
669 /*======================================================================*\
670     Function:    _striptext
671     Purpose:    strip the text from an html document
672     Input:        $document    document to strip.
673     Output:        $text        the resulting text
674 \*======================================================================*/
676     function _striptext($document)
677     {
678         
679         // I didn't use preg eval (//e) since that is only available in PHP 4.0.
680         // so, list your entities one by one here. I included some of the
681         // more common ones.
682                                 
683         $search = array("'<script[^>]*?>.*?</script>'si",    // strip out javascript
684                         "'<[\/\!]*?[^<>]*?>'si",            // strip out html tags
685                         "'([\r\n])[\s]+'",                    // strip out white space
686                         "'&(quot|#34|#034|#x22);'i",        // replace html entities
687                         "'&(amp|#38|#038|#x26);'i",            // added hexadecimal values
688                         "'&(lt|#60|#060|#x3c);'i",
689                         "'&(gt|#62|#062|#x3e);'i",
690                         "'&(nbsp|#160|#xa0);'i",
691                         "'&(iexcl|#161);'i",
692                         "'&(cent|#162);'i",
693                         "'&(pound|#163);'i",
694                         "'&(copy|#169);'i",
695                         "'&(reg|#174);'i",
696                         "'&(deg|#176);'i",
697                         "'&(#39|#039|#x27);'",
698                         "'&(euro|#8364);'i",                // europe
699                         "'&a(uml|UML);'",                    // german
700                         "'&o(uml|UML);'",
701                         "'&u(uml|UML);'",
702                         "'&A(uml|UML);'",
703                         "'&O(uml|UML);'",
704                         "'&U(uml|UML);'",
705                         "'&szlig;'i",
706                         );
707         $replace = array(    "",
708                             "",
709                             "\\1",
710                             "\"",
711                             "&",
712                             "<",
713                             ">",
714                             " ",
715                             chr(161),
716                             chr(162),
717                             chr(163),
718                             chr(169),
719                             chr(174),
720                             chr(176),
721                             chr(39),
722                             chr(128),
723                             "ä",
724                             "ö",
725                             "ü",
726                             "Ä",
727                             "Ö",
728                             "Ãœ",
729                             "ß",
730                         );
731                     
732         $text = preg_replace($search,$replace,$document);
733                                 
734         return $text;
735     }
737 /*======================================================================*\
738     Function:    _expandlinks
739     Purpose:    expand each link into a fully qualified URL
740     Input:        $links            the links to qualify
741                 $URI            the full URI to get the base from
742     Output:        $expandedLinks    the expanded links
743 \*======================================================================*/
745     function _expandlinks($links,$URI)
746     {
747         
748         preg_match("/^[^\?]+/",$URI,$match);
750         $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
751         $match = preg_replace("|/$|","",$match);
752         $match_part = parse_url($match);
753         $match_root =
754         $match_part["scheme"]."://".$match_part["host"];
755                 
756         $search = array(     "|^http://".preg_quote($this->host)."|i",
757                             "|^(\/)|i",
758                             "|^(?!http://)(?!mailto:)|i",
759                             "|/\./|",
760                             "|/[^\/]+/\.\./|"
761                         );
762                         
763         $replace = array(    "",
764                             $match_root."/",
765                             $match."/",
766                             "/",
767                             "/"
768                         );            
769                 
770         $expandedLinks = preg_replace($search,$replace,$links);
772         return $expandedLinks;
773     }
775 /*======================================================================*\
776     Function:    _httprequest
777     Purpose:    go get the http data from the server
778     Input:        $url        the url to fetch
779                 $fp            the current open file pointer
780                 $URI        the full URI
781                 $body        body contents to send if any (POST)
782     Output:        
783 \*======================================================================*/
784     
785     function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
786     {
787         $cookie_headers = '';
788         if($this->passcookies && $this->_redirectaddr)
789             $this->setcookies();
790             
791         $URI_PARTS = parse_url($URI);
792         if(empty($url))
793             $url = "/";
794         $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";        
795         if(!empty($this->agent))
796             $headers .= "User-Agent: ".$this->agent."\r\n";
797         if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
798             $headers .= "Host: ".$this->host;
799             if(!empty($this->port))
800                 $headers .= ":".$this->port;
801             $headers .= "\r\n";
802         }
803         if(!empty($this->accept))
804             $headers .= "Accept: ".$this->accept."\r\n";
805         if(!empty($this->referer))
806             $headers .= "Referer: ".$this->referer."\r\n";
807         if(!empty($this->cookies))
808         {            
809             if(!is_array($this->cookies))
810                 $this->cookies = (array)$this->cookies;
811     
812             reset($this->cookies);
813             if ( count($this->cookies) > 0 ) {
814                 $cookie_headers .= 'Cookie: ';
815                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
816                 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
817                 }
818                 $headers .= substr($cookie_headers,0,-2) . "\r\n";
819             } 
820         }
821         if(!empty($this->rawheaders))
822         {
823             if(!is_array($this->rawheaders))
824                 $this->rawheaders = (array)$this->rawheaders;
825             while(list($headerKey,$headerVal) = each($this->rawheaders))
826                 $headers .= $headerKey.": ".$headerVal."\r\n";
827         }
828         if(!empty($content_type)) {
829             $headers .= "Content-type: $content_type";
830             if ($content_type == "multipart/form-data")
831                 $headers .= "; boundary=".$this->_mime_boundary;
832             $headers .= "\r\n";
833         }
834         if(!empty($body))    
835             $headers .= "Content-length: ".strlen($body)."\r\n";
836         if(!empty($this->user) || !empty($this->pass))    
837             $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
838         
839         //add proxy auth headers
840         if(!empty($this->proxy_user))    
841             $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
844         $headers .= "\r\n";
845         
846         // set the read timeout if needed
847         if ($this->read_timeout > 0)
848             socket_set_timeout($fp, $this->read_timeout);
849         $this->timed_out = false;
850         
851         fwrite($fp,$headers.$body,strlen($headers.$body));
852         
853         $this->_redirectaddr = false;
854         unset($this->headers);
855                         
856         while($currentHeader = fgets($fp,$this->_maxlinelen))
857         {
858             if ($this->read_timeout > 0 && $this->_check_timeout($fp))
859             {
860                 $this->status=-100;
861                 return false;
862             }
863                 
864             if($currentHeader == "\r\n")
865                 break;
866                         
867             // if a header begins with Location: or URI:, set the redirect
868             if(preg_match("/^(Location:|URI:)/i",$currentHeader))
869             {
870                 // get URL portion of the redirect
871                 preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
872                 // look for :// in the Location header to see if hostname is included
873                 if(!preg_match("|\:\/\/|",$matches[2]))
874                 {
875                     // no host in the path, so prepend
876                     $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
877                     // eliminate double slash
878                     if(!preg_match("|^/|",$matches[2]))
879                             $this->_redirectaddr .= "/".$matches[2];
880                     else
881                             $this->_redirectaddr .= $matches[2];
882                 }
883                 else
884                     $this->_redirectaddr = $matches[2];
885             }
886         
887             if(preg_match("|^HTTP/|",$currentHeader))
888             {
889                 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
890                 {
891                     $this->status= $status[1];
892                 }                
893                 $this->response_code = $currentHeader;
894             }
895                 
896             $this->headers[] = $currentHeader;
897         }
899         $results = '';
900         do {
901             $_data = fread($fp, $this->maxlength);
902             if (strlen($_data) == 0) {
903                 break;
904             }
905             $results .= $_data;
906         } while(true);
908         if ($this->read_timeout > 0 && $this->_check_timeout($fp))
909         {
910             $this->status=-100;
911             return false;
912         }
913         
914         // check if there is a a redirect meta tag
915         
916         if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
918         {
919             $this->_redirectaddr = $this->_expandlinks($match[1],$URI);    
920         }
922         // have we hit our frame depth and is there frame src to fetch?
923         if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
924         {
925             $this->results[] = $results;
926             for($x=0; $x<count($match[1]); $x++)
927                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
928         }
929         // have we already fetched framed content?
930         elseif(is_array($this->results))
931             $this->results[] = $results;
932         // no framed content
933         else
934             $this->results = $results;
935         
936         return true;
937     }
939 /*======================================================================*\
940     Function:    _httpsrequest
941     Purpose:    go get the https data from the server using curl
942     Input:        $url        the url to fetch
943                 $URI        the full URI
944                 $body        body contents to send if any (POST)
945     Output:        
946 \*======================================================================*/
947     
948     function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
949     {
950         if($this->passcookies && $this->_redirectaddr)
951             $this->setcookies();
953         $headers = array();        
954                     
955         $URI_PARTS = parse_url($URI);
956         if(empty($url))
957             $url = "/";
958         // GET ... header not needed for curl
959         //$headers[] = $http_method." ".$url." ".$this->_httpversion;        
960         if(!empty($this->agent))
961             $headers[] = "User-Agent: ".$this->agent;
962         if(!empty($this->host))
963             if(!empty($this->port))
964                 $headers[] = "Host: ".$this->host.":".$this->port;
965             else
966                 $headers[] = "Host: ".$this->host;
967         if(!empty($this->accept))
968             $headers[] = "Accept: ".$this->accept;
969         if(!empty($this->referer))
970             $headers[] = "Referer: ".$this->referer;
971         if(!empty($this->cookies))
972         {            
973             if(!is_array($this->cookies))
974                 $this->cookies = (array)$this->cookies;
975     
976             reset($this->cookies);
977             if ( count($this->cookies) > 0 ) {
978                 $cookie_str = 'Cookie: ';
979                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
980                 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
981                 }
982                 $headers[] = substr($cookie_str,0,-2);
983             }
984         }
985         if(!empty($this->rawheaders))
986         {
987             if(!is_array($this->rawheaders))
988                 $this->rawheaders = (array)$this->rawheaders;
989             while(list($headerKey,$headerVal) = each($this->rawheaders))
990                 $headers[] = $headerKey.": ".$headerVal;
991         }
992         if(!empty($content_type)) {
993             if ($content_type == "multipart/form-data")
994                 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
995             else
996                 $headers[] = "Content-type: $content_type";
997         }
998         if(!empty($body))    
999             $headers[] = "Content-length: ".strlen($body);
1000         if(!empty($this->user) || !empty($this->pass))    
1001             $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
1002             
1003         for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
1004             $safer_header = strtr( $headers[$curr_header], "\"", " " );
1005             $cmdline_params .= " -H \"".$safer_header."\"";
1006         }
1007         
1008         if(!empty($body))
1009             $cmdline_params .= " -d \"$body\"";
1010         
1011         if($this->read_timeout > 0)
1012             $cmdline_params .= " -m ".$this->read_timeout;
1013         
1014         $headerfile = tempnam($temp_dir, "sno");
1016         $safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access
1017         exec($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\"",$results,$return);
1018         
1019         if($return)
1020         {
1021             $this->error = "Error: cURL could not retrieve the document, error $return.";
1022             return false;
1023         }
1024             
1025             
1026         $results = implode("\r\n",$results);
1027         
1028         $result_headers = file("$headerfile");
1029                         
1030         $this->_redirectaddr = false;
1031         unset($this->headers);
1032                         
1033         for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1034         {
1035             
1036             // if a header begins with Location: or URI:, set the redirect
1037             if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1038             {
1039                 // get URL portion of the redirect
1040                 preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1041                 // look for :// in the Location header to see if hostname is included
1042                 if(!preg_match("|\:\/\/|",$matches[2]))
1043                 {
1044                     // no host in the path, so prepend
1045                     $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1046                     // eliminate double slash
1047                     if(!preg_match("|^/|",$matches[2]))
1048                             $this->_redirectaddr .= "/".$matches[2];
1049                     else
1050                             $this->_redirectaddr .= $matches[2];
1051                 }
1052                 else
1053                     $this->_redirectaddr = $matches[2];
1054             }
1055         
1056             if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1057                 $this->response_code = $result_headers[$currentHeader];
1059             $this->headers[] = $result_headers[$currentHeader];
1060         }
1062         // check if there is a a redirect meta tag
1063         
1064         if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1065         {
1066             $this->_redirectaddr = $this->_expandlinks($match[1],$URI);    
1067         }
1069         // have we hit our frame depth and is there frame src to fetch?
1070         if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1071         {
1072             $this->results[] = $results;
1073             for($x=0; $x<count($match[1]); $x++)
1074                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1075         }
1076         // have we already fetched framed content?
1077         elseif(is_array($this->results))
1078             $this->results[] = $results;
1079         // no framed content
1080         else
1081             $this->results = $results;
1083         unlink("$headerfile");
1084         
1085         return true;
1086     }
1088 /*======================================================================*\
1089     Function:    setcookies()
1090     Purpose:    set cookies for a redirection
1091 \*======================================================================*/
1092     
1093     function setcookies()
1094     {
1095         for($x=0; $x<count($this->headers); $x++)
1096         {
1097         if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1098             $this->cookies[$match[1]] = urldecode($match[2]);
1099         }
1100     }
1102     
1103 /*======================================================================*\
1104     Function:    _check_timeout
1105     Purpose:    checks whether timeout has occurred
1106     Input:        $fp    file pointer
1107 \*======================================================================*/
1109     function _check_timeout($fp)
1110     {
1111         if ($this->read_timeout > 0) {
1112             $fp_status = socket_get_status($fp);
1113             if ($fp_status["timed_out"]) {
1114                 $this->timed_out = true;
1115                 return true;
1116             }
1117         }
1118         return false;
1119     }
1121 /*======================================================================*\
1122     Function:    _connect
1123     Purpose:    make a socket connection
1124     Input:        $fp    file pointer
1125 \*======================================================================*/
1126     
1127     function _connect(&$fp)
1128     {
1129         if(!empty($this->proxy_host) && !empty($this->proxy_port))
1130             {
1131                 $this->_isproxy = true;
1132                 
1133                 $host = $this->proxy_host;
1134                 $port = $this->proxy_port;
1135             }
1136         else
1137         {
1138             $host = $this->host;
1139             $port = $this->port;
1140         }
1141     
1142         $this->status = 0;
1143         
1144         if($fp = fsockopen(
1145                     $host,
1146                     $port,
1147                     $errno,
1148                     $errstr,
1149                     $this->_fp_timeout
1150                     ))
1151         {
1152             // socket connection succeeded
1154             return true;
1155         }
1156         else
1157         {
1158             // socket connection failed
1159             $this->status = $errno;
1160             switch($errno)
1161             {
1162                 case -3:
1163                     $this->error="socket creation failed (-3)";
1164                 case -4:
1165                     $this->error="dns lookup failure (-4)";
1166                 case -5:
1167                     $this->error="connection refused or timed out (-5)";
1168                 default:
1169                     $this->error="connection failed (".$errno.")";
1170             }
1171             return false;
1172         }
1173     }
1174 /*======================================================================*\
1175     Function:    _disconnect
1176     Purpose:    disconnect a socket connection
1177     Input:        $fp    file pointer
1178 \*======================================================================*/
1179     
1180     function _disconnect($fp)
1181     {
1182         return(fclose($fp));
1183     }
1185     
1186 /*======================================================================*\
1187     Function:    _prepare_post_body
1188     Purpose:    Prepare post body according to encoding type
1189     Input:        $formvars  - form variables
1190                 $formfiles - form upload files
1191     Output:        post body
1192 \*======================================================================*/
1193     
1194     function _prepare_post_body($formvars, $formfiles)
1195     {
1196         settype($formvars, "array");
1197         settype($formfiles, "array");
1198         $postdata = '';
1200         if (count($formvars) == 0 && count($formfiles) == 0)
1201             return;
1202         
1203         switch ($this->_submit_type) {
1204             case "application/x-www-form-urlencoded":
1205                 reset($formvars);
1206                 while(list($key,$val) = each($formvars)) {
1207                     if (is_array($val) || is_object($val)) {
1208                         while (list($cur_key, $cur_val) = each($val)) {
1209                             $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1210                         }
1211                     } else
1212                         $postdata .= urlencode($key)."=".urlencode($val)."&";
1213                 }
1214                 break;
1216             case "multipart/form-data":
1217                 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1218                 
1219                 reset($formvars);
1220                 while(list($key,$val) = each($formvars)) {
1221                     if (is_array($val) || is_object($val)) {
1222                         while (list($cur_key, $cur_val) = each($val)) {
1223                             $postdata .= "--".$this->_mime_boundary."\r\n";
1224                             $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1225                             $postdata .= "$cur_val\r\n";
1226                         }
1227                     } else {
1228                         $postdata .= "--".$this->_mime_boundary."\r\n";
1229                         $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1230                         $postdata .= "$val\r\n";
1231                     }
1232                 }
1233                 
1234                 reset($formfiles);
1235                 while (list($field_name, $file_names) = each($formfiles)) {
1236                     settype($file_names, "array");
1237                     while (list(, $file_name) = each($file_names)) {
1238                         if (!is_readable($file_name)) continue;
1240                         $fp = fopen($file_name, "r");
1241                         $file_content = fread($fp, filesize($file_name));
1242                         fclose($fp);
1243                         $base_name = basename($file_name);
1245                         $postdata .= "--".$this->_mime_boundary."\r\n";
1246                         $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1247                         $postdata .= "$file_content\r\n";
1248                     }
1249                 }
1250                 $postdata .= "--".$this->_mime_boundary."--\r\n";
1251                 break;
1252         }
1254         return $postdata;
1255     }