[ Index ]

PHP Cross Reference of WordPress Trunk (Updated Daily)

title

Body

[close]

/wp-includes/ -> class-snoopy.php (source)

   1  <?php
   2  
   3  /**
   4   * Deprecated. Use WP_HTTP (http.php) instead.
   5   */
   6  _deprecated_file( basename( __FILE__ ), '3.0.0', WPINC . '/http.php' );
   7  
   8  if ( ! class_exists( 'Snoopy', false ) ) :
   9  /*************************************************
  10  
  11  Snoopy - the PHP net client
  12  Author: Monte Ohrt <monte@ispi.net>
  13  Copyright (c): 1999-2008 New Digital Group, all rights reserved
  14  Version: 1.2.4
  15  
  16   * This library is free software; you can redistribute it and/or
  17   * modify it under the terms of the GNU Lesser General Public
  18   * License as published by the Free Software Foundation; either
  19   * version 2.1 of the License, or (at your option) any later version.
  20   *
  21   * This library is distributed in the hope that it will be useful,
  22   * but WITHOUT ANY WARRANTY; without even the implied warranty of
  23   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  24   * Lesser General Public License for more details.
  25   *
  26   * You should have received a copy of the GNU Lesser General Public
  27   * License along with this library; if not, write to the Free Software
  28   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  29  
  30  You may contact the author of Snoopy by e-mail at:
  31  monte@ohrt.com
  32  
  33  The latest version of Snoopy can be obtained from:
  34  http://snoopy.sourceforge.net/
  35  
  36  *************************************************/
  37  
  38  class Snoopy
  39  {
  40      /**** Public variables ****/
  41  
  42      /* user definable vars */
  43  
  44      var $host            =    "www.php.net";        // host name we are connecting to
  45      var $port            =    80;                    // port we are connecting to
  46      var $proxy_host        =    "";                    // proxy host to use
  47      var $proxy_port        =    "";                    // proxy port to use
  48      var $proxy_user        =    "";                    // proxy user to use
  49      var $proxy_pass        =    "";                    // proxy password to use
  50  
  51      var $agent            =    "Snoopy v1.2.4";    // agent we masquerade as
  52      var    $referer        =    "";                    // referer info to pass
  53      var $cookies        =    array();            // array of cookies to pass
  54                                                  // $cookies["username"]="joe";
  55      var    $rawheaders        =    array();            // array of raw headers to send
  56                                                  // $rawheaders["Content-type"]="text/html";
  57  
  58      var $maxredirs        =    5;                    // http redirection depth maximum. 0 = disallow
  59      var $lastredirectaddr    =    "";                // contains address of last redirected address
  60      var    $offsiteok        =    true;                // allows redirection off-site
  61      var $maxframes        =    0;                    // frame content depth maximum. 0 = disallow
  62      var $expandlinks    =    true;                // expand links to fully qualified URLs.
  63                                                  // this only applies to fetchlinks()
  64                                                  // submitlinks(), and submittext()
  65      var $passcookies    =    true;                // pass set cookies back through redirects
  66                                                  // NOTE: this currently does not respect
  67                                                  // dates, domains or paths.
  68  
  69      var    $user            =    "";                    // user for http authentication
  70      var    $pass            =    "";                    // password for http authentication
  71  
  72      // http accept types
  73      var $accept            =    "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  74  
  75      var $results        =    "";                    // where the content is put
  76  
  77      var $error            =    "";                    // error messages sent here
  78      var    $response_code    =    "";                    // response code returned from server
  79      var    $headers        =    array();            // headers returned from server sent here
  80      var    $maxlength        =    500000;                // max return data length (body)
  81      var $read_timeout    =    0;                    // timeout on read operations, in seconds
  82                                                  // supported only since PHP 4 Beta 4
  83                                                  // set to 0 to disallow timeouts
  84      var $timed_out        =    false;                // if a read operation timed out
  85      var    $status            =    0;                    // http request status
  86  
  87      var $temp_dir        =    "/tmp";                // temporary directory that the webserver
  88                                                  // has permission to write to.
  89                                                  // under Windows, this should be C:\temp
  90  
  91      var    $curl_path        =    "/usr/local/bin/curl";
  92                                                  // Snoopy will use cURL for fetching
  93                                                  // SSL content if a full system path to
  94                                                  // the cURL binary is supplied here.
  95                                                  // set to false if you do not have
  96                                                  // cURL installed. See http://curl.haxx.se
  97                                                  // for details on installing cURL.
  98                                                  // Snoopy does *not* use the cURL
  99                                                  // library functions built into php,
 100                                                  // as these functions are not stable
 101                                                  // as of this Snoopy release.
 102  
 103      /**** Private variables ****/
 104  
 105      var    $_maxlinelen    =    4096;                // max line length (headers)
 106  
 107      var $_httpmethod    =    "GET";                // default http request method
 108      var $_httpversion    =    "HTTP/1.0";            // default http request version
 109      var $_submit_method    =    "POST";                // default submit method
 110      var $_submit_type    =    "application/x-www-form-urlencoded";    // default submit type
 111      var $_mime_boundary    =   "";                    // MIME boundary for multipart/form-data submit type
 112      var $_redirectaddr    =    false;                // will be set if page fetched is a redirect
 113      var $_redirectdepth    =    0;                    // increments on an http redirect
 114      var $_frameurls        =     array();            // frame src urls
 115      var $_framedepth    =    0;                    // increments on frame depth
 116  
 117      var $_isproxy        =    false;                // set if using a proxy server
 118      var $_fp_timeout    =    30;                    // timeout for socket connection
 119  
 120  /*======================================================================*\
 121      Function:    fetch
 122      Purpose:    fetch the contents of a web page
 123                  (and possibly other protocols in the
 124                  future like ftp, nntp, gopher, etc.)
 125      Input:        $URI    the location of the page to fetch
 126      Output:        $this->results    the output text from the fetch
 127  \*======================================================================*/
 128  
 129  	function fetch($URI)
 130      {
 131  
 132          //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
 133          $URI_PARTS = parse_url($URI);
 134          if (!empty($URI_PARTS["user"]))
 135              $this->user = $URI_PARTS["user"];
 136          if (!empty($URI_PARTS["pass"]))
 137              $this->pass = $URI_PARTS["pass"];
 138          if (empty($URI_PARTS["query"]))
 139              $URI_PARTS["query"] = '';
 140          if (empty($URI_PARTS["path"]))
 141              $URI_PARTS["path"] = '';
 142  
 143          switch(strtolower($URI_PARTS["scheme"]))
 144          {
 145              case "http":
 146                  $this->host = $URI_PARTS["host"];
 147                  if(!empty($URI_PARTS["port"]))
 148                      $this->port = $URI_PARTS["port"];
 149                  if($this->_connect($fp))
 150                  {
 151                      if($this->_isproxy)
 152                      {
 153                          // using proxy, send entire URI
 154                          $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
 155                      }
 156                      else
 157                      {
 158                          $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 159                          // no proxy, send only the path
 160                          $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
 161                      }
 162  
 163                      $this->_disconnect($fp);
 164  
 165                      if($this->_redirectaddr)
 166                      {
 167                          /* url was redirected, check if we've hit the max depth */
 168                          if($this->maxredirs > $this->_redirectdepth)
 169                          {
 170                              // only follow redirect if it's on this site, or offsiteok is true
 171                              if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 172                              {
 173                                  /* follow the redirect */
 174                                  $this->_redirectdepth++;
 175                                  $this->lastredirectaddr=$this->_redirectaddr;
 176                                  $this->fetch($this->_redirectaddr);
 177                              }
 178                          }
 179                      }
 180  
 181                      if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 182                      {
 183                          $frameurls = $this->_frameurls;
 184                          $this->_frameurls = array();
 185  
 186                          while(list(,$frameurl) = each($frameurls))
 187                          {
 188                              if($this->_framedepth < $this->maxframes)
 189                              {
 190                                  $this->fetch($frameurl);
 191                                  $this->_framedepth++;
 192                              }
 193                              else
 194                                  break;
 195                          }
 196                      }
 197                  }
 198                  else
 199                  {
 200                      return false;
 201                  }
 202                  return true;
 203                  break;
 204              case "https":
 205                  if(!$this->curl_path)
 206                      return false;
 207                  if(function_exists("is_executable"))
 208                      if (!is_executable($this->curl_path))
 209                          return false;
 210                  $this->host = $URI_PARTS["host"];
 211                  if(!empty($URI_PARTS["port"]))
 212                      $this->port = $URI_PARTS["port"];
 213                  if($this->_isproxy)
 214                  {
 215                      // using proxy, send entire URI
 216                      $this->_httpsrequest($URI,$URI,$this->_httpmethod);
 217                  }
 218                  else
 219                  {
 220                      $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 221                      // no proxy, send only the path
 222                      $this->_httpsrequest($path, $URI, $this->_httpmethod);
 223                  }
 224  
 225                  if($this->_redirectaddr)
 226                  {
 227                      /* url was redirected, check if we've hit the max depth */
 228                      if($this->maxredirs > $this->_redirectdepth)
 229                      {
 230                          // only follow redirect if it's on this site, or offsiteok is true
 231                          if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 232                          {
 233                              /* follow the redirect */
 234                              $this->_redirectdepth++;
 235                              $this->lastredirectaddr=$this->_redirectaddr;
 236                              $this->fetch($this->_redirectaddr);
 237                          }
 238                      }
 239                  }
 240  
 241                  if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 242                  {
 243                      $frameurls = $this->_frameurls;
 244                      $this->_frameurls = array();
 245  
 246                      while(list(,$frameurl) = each($frameurls))
 247                      {
 248                          if($this->_framedepth < $this->maxframes)
 249                          {
 250                              $this->fetch($frameurl);
 251                              $this->_framedepth++;
 252                          }
 253                          else
 254                              break;
 255                      }
 256                  }
 257                  return true;
 258                  break;
 259              default:
 260                  // not a valid protocol
 261                  $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 262                  return false;
 263                  break;
 264          }
 265          return true;
 266      }
 267  
 268  /*======================================================================*\
 269      Function:    submit
 270      Purpose:    submit an http form
 271      Input:        $URI    the location to post the data
 272                  $formvars    the formvars to use.
 273                      format: $formvars["var"] = "val";
 274                  $formfiles  an array of files to submit
 275                      format: $formfiles["var"] = "/dir/filename.ext";
 276      Output:        $this->results    the text output from the post
 277  \*======================================================================*/
 278  
 279  	function submit($URI, $formvars="", $formfiles="")
 280      {
 281          unset($postdata);
 282  
 283          $postdata = $this->_prepare_post_body($formvars, $formfiles);
 284  
 285          $URI_PARTS = parse_url($URI);
 286          if (!empty($URI_PARTS["user"]))
 287              $this->user = $URI_PARTS["user"];
 288          if (!empty($URI_PARTS["pass"]))
 289              $this->pass = $URI_PARTS["pass"];
 290          if (empty($URI_PARTS["query"]))
 291              $URI_PARTS["query"] = '';
 292          if (empty($URI_PARTS["path"]))
 293              $URI_PARTS["path"] = '';
 294  
 295          switch(strtolower($URI_PARTS["scheme"]))
 296          {
 297              case "http":
 298                  $this->host = $URI_PARTS["host"];
 299                  if(!empty($URI_PARTS["port"]))
 300                      $this->port = $URI_PARTS["port"];
 301                  if($this->_connect($fp))
 302                  {
 303                      if($this->_isproxy)
 304                      {
 305                          // using proxy, send entire URI
 306                          $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
 307                      }
 308                      else
 309                      {
 310                          $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 311                          // no proxy, send only the path
 312                          $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 313                      }
 314  
 315                      $this->_disconnect($fp);
 316  
 317                      if($this->_redirectaddr)
 318                      {
 319                          /* url was redirected, check if we've hit the max depth */
 320                          if($this->maxredirs > $this->_redirectdepth)
 321                          {
 322                              if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 323                                  $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
 324  
 325                              // only follow redirect if it's on this site, or offsiteok is true
 326                              if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 327                              {
 328                                  /* follow the redirect */
 329                                  $this->_redirectdepth++;
 330                                  $this->lastredirectaddr=$this->_redirectaddr;
 331                                  if( strpos( $this->_redirectaddr, "?" ) > 0 )
 332                                      $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 333                                  else
 334                                      $this->submit($this->_redirectaddr,$formvars, $formfiles);
 335                              }
 336                          }
 337                      }
 338  
 339                      if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 340                      {
 341                          $frameurls = $this->_frameurls;
 342                          $this->_frameurls = array();
 343  
 344                          while(list(,$frameurl) = each($frameurls))
 345                          {
 346                              if($this->_framedepth < $this->maxframes)
 347                              {
 348                                  $this->fetch($frameurl);
 349                                  $this->_framedepth++;
 350                              }
 351                              else
 352                                  break;
 353                          }
 354                      }
 355  
 356                  }
 357                  else
 358                  {
 359                      return false;
 360                  }
 361                  return true;
 362                  break;
 363              case "https":
 364                  if(!$this->curl_path)
 365                      return false;
 366                  if(function_exists("is_executable"))
 367                      if (!is_executable($this->curl_path))
 368                          return false;
 369                  $this->host = $URI_PARTS["host"];
 370                  if(!empty($URI_PARTS["port"]))
 371                      $this->port = $URI_PARTS["port"];
 372                  if($this->_isproxy)
 373                  {
 374                      // using proxy, send entire URI
 375                      $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 376                  }
 377                  else
 378                  {
 379                      $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 380                      // no proxy, send only the path
 381                      $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 382                  }
 383  
 384                  if($this->_redirectaddr)
 385                  {
 386                      /* url was redirected, check if we've hit the max depth */
 387                      if($this->maxredirs > $this->_redirectdepth)
 388                      {
 389                          if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 390                              $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
 391  
 392                          // only follow redirect if it's on this site, or offsiteok is true
 393                          if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 394                          {
 395                              /* follow the redirect */
 396                              $this->_redirectdepth++;
 397                              $this->lastredirectaddr=$this->_redirectaddr;
 398                              if( strpos( $this->_redirectaddr, "?" ) > 0 )
 399                                  $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 400                              else
 401                                  $this->submit($this->_redirectaddr,$formvars, $formfiles);
 402                          }
 403                      }
 404                  }
 405  
 406                  if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 407                  {
 408                      $frameurls = $this->_frameurls;
 409                      $this->_frameurls = array();
 410  
 411                      while(list(,$frameurl) = each($frameurls))
 412                      {
 413                          if($this->_framedepth < $this->maxframes)
 414                          {
 415                              $this->fetch($frameurl);
 416                              $this->_framedepth++;
 417                          }
 418                          else
 419                              break;
 420                      }
 421                  }
 422                  return true;
 423                  break;
 424  
 425              default:
 426                  // not a valid protocol
 427                  $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 428                  return false;
 429                  break;
 430          }
 431          return true;
 432      }
 433  
 434  /*======================================================================*\
 435      Function:    fetchlinks
 436      Purpose:    fetch the links from a web page
 437      Input:        $URI    where you are fetching from
 438      Output:        $this->results    an array of the URLs
 439  \*======================================================================*/
 440  
 441  	function fetchlinks($URI)
 442      {
 443          if ($this->fetch($URI))
 444          {
 445              if($this->lastredirectaddr)
 446                  $URI = $this->lastredirectaddr;
 447              if(is_array($this->results))
 448              {
 449                  for($x=0;$x<count($this->results);$x++)
 450                      $this->results[$x] = $this->_striplinks($this->results[$x]);
 451              }
 452              else
 453                  $this->results = $this->_striplinks($this->results);
 454  
 455              if($this->expandlinks)
 456                  $this->results = $this->_expandlinks($this->results, $URI);
 457              return true;
 458          }
 459          else
 460              return false;
 461      }
 462  
 463  /*======================================================================*\
 464      Function:    fetchform
 465      Purpose:    fetch the form elements from a web page
 466      Input:        $URI    where you are fetching from
 467      Output:        $this->results    the resulting html form
 468  \*======================================================================*/
 469  
 470  	function fetchform($URI)
 471      {
 472  
 473          if ($this->fetch($URI))
 474          {
 475  
 476              if(is_array($this->results))
 477              {
 478                  for($x=0;$x<count($this->results);$x++)
 479                      $this->results[$x] = $this->_stripform($this->results[$x]);
 480              }
 481              else
 482                  $this->results = $this->_stripform($this->results);
 483  
 484              return true;
 485          }
 486          else
 487              return false;
 488      }
 489  
 490  
 491  /*======================================================================*\
 492      Function:    fetchtext
 493      Purpose:    fetch the text from a web page, stripping the links
 494      Input:        $URI    where you are fetching from
 495      Output:        $this->results    the text from the web page
 496  \*======================================================================*/
 497  
 498  	function fetchtext($URI)
 499      {
 500          if($this->fetch($URI))
 501          {
 502              if(is_array($this->results))
 503              {
 504                  for($x=0;$x<count($this->results);$x++)
 505                      $this->results[$x] = $this->_striptext($this->results[$x]);
 506              }
 507              else
 508                  $this->results = $this->_striptext($this->results);
 509              return true;
 510          }
 511          else
 512              return false;
 513      }
 514  
 515  /*======================================================================*\
 516      Function:    submitlinks
 517      Purpose:    grab links from a form submission
 518      Input:        $URI    where you are submitting from
 519      Output:        $this->results    an array of the links from the post
 520  \*======================================================================*/
 521  
 522  	function submitlinks($URI, $formvars="", $formfiles="")
 523      {
 524          if($this->submit($URI,$formvars, $formfiles))
 525          {
 526              if($this->lastredirectaddr)
 527                  $URI = $this->lastredirectaddr;
 528              if(is_array($this->results))
 529              {
 530                  for($x=0;$x<count($this->results);$x++)
 531                  {
 532                      $this->results[$x] = $this->_striplinks($this->results[$x]);
 533                      if($this->expandlinks)
 534                          $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 535                  }
 536              }
 537              else
 538              {
 539                  $this->results = $this->_striplinks($this->results);
 540                  if($this->expandlinks)
 541                      $this->results = $this->_expandlinks($this->results,$URI);
 542              }
 543              return true;
 544          }
 545          else
 546              return false;
 547      }
 548  
 549  /*======================================================================*\
 550      Function:    submittext
 551      Purpose:    grab text from a form submission
 552      Input:        $URI    where you are submitting from
 553      Output:        $this->results    the text from the web page
 554  \*======================================================================*/
 555  
 556  	function submittext($URI, $formvars = "", $formfiles = "")
 557      {
 558          if($this->submit($URI,$formvars, $formfiles))
 559          {
 560              if($this->lastredirectaddr)
 561                  $URI = $this->lastredirectaddr;
 562              if(is_array($this->results))
 563              {
 564                  for($x=0;$x<count($this->results);$x++)
 565                  {
 566                      $this->results[$x] = $this->_striptext($this->results[$x]);
 567                      if($this->expandlinks)
 568                          $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 569                  }
 570              }
 571              else
 572              {
 573                  $this->results = $this->_striptext($this->results);
 574                  if($this->expandlinks)
 575                      $this->results = $this->_expandlinks($this->results,$URI);
 576              }
 577              return true;
 578          }
 579          else
 580              return false;
 581      }
 582  
 583  
 584  
 585  /*======================================================================*\
 586      Function:    set_submit_multipart
 587      Purpose:    Set the form submission content type to
 588                  multipart/form-data
 589  \*======================================================================*/
 590  	function set_submit_multipart()
 591      {
 592          $this->_submit_type = "multipart/form-data";
 593      }
 594  
 595  
 596  /*======================================================================*\
 597      Function:    set_submit_normal
 598      Purpose:    Set the form submission content type to
 599                  application/x-www-form-urlencoded
 600  \*======================================================================*/
 601  	function set_submit_normal()
 602      {
 603          $this->_submit_type = "application/x-www-form-urlencoded";
 604      }
 605  
 606  
 607  
 608  
 609  /*======================================================================*\
 610      Private functions
 611  \*======================================================================*/
 612  
 613  
 614  /*======================================================================*\
 615      Function:    _striplinks
 616      Purpose:    strip the hyperlinks from an html document
 617      Input:        $document    document to strip.
 618      Output:        $match        an array of the links
 619  \*======================================================================*/
 620  
 621  	function _striplinks($document)
 622      {
 623          preg_match_all("'<\s*a\s.*?href\s*=\s*            # find <a href=
 624                          ([\"\'])?                    # find single or double quote
 625                          (?(1) (.*?)\\1 | ([^\s\>]+))        # if quote found, match up to next matching
 626                                                      # quote, otherwise match up to next space
 627                          'isx",$document,$links);
 628  
 629  
 630          // catenate the non-empty matches from the conditional subpattern
 631  
 632          while(list($key,$val) = each($links[2]))
 633          {
 634              if(!empty($val))
 635                  $match[] = $val;
 636          }
 637  
 638          while(list($key,$val) = each($links[3]))
 639          {
 640              if(!empty($val))
 641                  $match[] = $val;
 642          }
 643  
 644          // return the links
 645          return $match;
 646      }
 647  
 648  /*======================================================================*\
 649      Function:    _stripform
 650      Purpose:    strip the form elements from an html document
 651      Input:        $document    document to strip.
 652      Output:        $match        an array of the links
 653  \*======================================================================*/
 654  
 655  	function _stripform($document)
 656      {
 657          preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
 658  
 659          // catenate the matches
 660          $match = implode("\r\n",$elements[0]);
 661  
 662          // return the links
 663          return $match;
 664      }
 665  
 666  
 667  
 668  /*======================================================================*\
 669      Function:    _striptext
 670      Purpose:    strip the text from an html document
 671      Input:        $document    document to strip.
 672      Output:        $text        the resulting text
 673  \*======================================================================*/
 674  
 675  	function _striptext($document)
 676      {
 677  
 678          // I didn't use preg eval (//e) since that is only available in PHP 4.0.
 679          // so, list your entities one by one here. I included some of the
 680          // more common ones.
 681  
 682          $search = array("'<script[^>]*?>.*?</script>'si",    // strip out javascript
 683                          "'<[\/\!]*?[^<>]*?>'si",            // strip out html tags
 684                          "'([\r\n])[\s]+'",                    // strip out white space
 685                          "'&(quot|#34|#034|#x22);'i",        // replace html entities
 686                          "'&(amp|#38|#038|#x26);'i",            // added hexadecimal values
 687                          "'&(lt|#60|#060|#x3c);'i",
 688                          "'&(gt|#62|#062|#x3e);'i",
 689                          "'&(nbsp|#160|#xa0);'i",
 690                          "'&(iexcl|#161);'i",
 691                          "'&(cent|#162);'i",
 692                          "'&(pound|#163);'i",
 693                          "'&(copy|#169);'i",
 694                          "'&(reg|#174);'i",
 695                          "'&(deg|#176);'i",
 696                          "'&(#39|#039|#x27);'",
 697                          "'&(euro|#8364);'i",                // europe
 698                          "'&a(uml|UML);'",                    // german
 699                          "'&o(uml|UML);'",
 700                          "'&u(uml|UML);'",
 701                          "'&A(uml|UML);'",
 702                          "'&O(uml|UML);'",
 703                          "'&U(uml|UML);'",
 704                          "'&szlig;'i",
 705                          );
 706          $replace = array(    "",
 707                              "",
 708                              "\\1",
 709                              "\"",
 710                              "&",
 711                              "<",
 712                              ">",
 713                              " ",
 714                              chr(161),
 715                              chr(162),
 716                              chr(163),
 717                              chr(169),
 718                              chr(174),
 719                              chr(176),
 720                              chr(39),
 721                              chr(128),
 722                              chr(0xE4), // ANSI &auml;
 723                              chr(0xF6), // ANSI &ouml;
 724                              chr(0xFC), // ANSI &uuml;
 725                              chr(0xC4), // ANSI &Auml;
 726                              chr(0xD6), // ANSI &Ouml;
 727                              chr(0xDC), // ANSI &Uuml;
 728                              chr(0xDF), // ANSI &szlig;
 729                          );
 730  
 731          $text = preg_replace($search,$replace,$document);
 732  
 733          return $text;
 734      }
 735  
 736  /*======================================================================*\
 737      Function:    _expandlinks
 738      Purpose:    expand each link into a fully qualified URL
 739      Input:        $links            the links to qualify
 740                  $URI            the full URI to get the base from
 741      Output:        $expandedLinks    the expanded links
 742  \*======================================================================*/
 743  
 744  	function _expandlinks($links,$URI)
 745      {
 746  
 747          preg_match("/^[^\?]+/",$URI,$match);
 748  
 749          $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
 750          $match = preg_replace("|/$|","",$match);
 751          $match_part = parse_url($match);
 752          $match_root =
 753          $match_part["scheme"]."://".$match_part["host"];
 754  
 755          $search = array(     "|^http://".preg_quote($this->host)."|i",
 756                              "|^(\/)|i",
 757                              "|^(?!http://)(?!mailto:)|i",
 758                              "|/\./|",
 759                              "|/[^\/]+/\.\./|"
 760                          );
 761  
 762          $replace = array(    "",
 763                              $match_root."/",
 764                              $match."/",
 765                              "/",
 766                              "/"
 767                          );
 768  
 769          $expandedLinks = preg_replace($search,$replace,$links);
 770  
 771          return $expandedLinks;
 772      }
 773  
 774  /*======================================================================*\
 775      Function:    _httprequest
 776      Purpose:    go get the http data from the server
 777      Input:        $url        the url to fetch
 778                  $fp            the current open file pointer
 779                  $URI        the full URI
 780                  $body        body contents to send if any (POST)
 781      Output:
 782  \*======================================================================*/
 783  
 784  	function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
 785      {
 786          $cookie_headers = '';
 787          if($this->passcookies && $this->_redirectaddr)
 788              $this->setcookies();
 789  
 790          $URI_PARTS = parse_url($URI);
 791          if(empty($url))
 792              $url = "/";
 793          $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
 794          if(!empty($this->agent))
 795              $headers .= "User-Agent: ".$this->agent."\r\n";
 796          if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
 797              $headers .= "Host: ".$this->host;
 798              if(!empty($this->port) && $this->port != 80)
 799                  $headers .= ":".$this->port;
 800              $headers .= "\r\n";
 801          }
 802          if(!empty($this->accept))
 803              $headers .= "Accept: ".$this->accept."\r\n";
 804          if(!empty($this->referer))
 805              $headers .= "Referer: ".$this->referer."\r\n";
 806          if(!empty($this->cookies))
 807          {
 808              if(!is_array($this->cookies))
 809                  $this->cookies = (array)$this->cookies;
 810  
 811              reset($this->cookies);
 812              if ( count($this->cookies) > 0 ) {
 813                  $cookie_headers .= 'Cookie: ';
 814                  foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 815                  $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
 816                  }
 817                  $headers .= substr($cookie_headers,0,-2) . "\r\n";
 818              }
 819          }
 820          if(!empty($this->rawheaders))
 821          {
 822              if(!is_array($this->rawheaders))
 823                  $this->rawheaders = (array)$this->rawheaders;
 824              while(list($headerKey,$headerVal) = each($this->rawheaders))
 825                  $headers .= $headerKey.": ".$headerVal."\r\n";
 826          }
 827          if(!empty($content_type)) {
 828              $headers .= "Content-type: $content_type";
 829              if ($content_type == "multipart/form-data")
 830                  $headers .= "; boundary=".$this->_mime_boundary;
 831              $headers .= "\r\n";
 832          }
 833          if(!empty($body))
 834              $headers .= "Content-length: ".strlen($body)."\r\n";
 835          if(!empty($this->user) || !empty($this->pass))
 836              $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
 837  
 838          //add proxy auth headers
 839          if(!empty($this->proxy_user))
 840              $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
 841  
 842  
 843          $headers .= "\r\n";
 844  
 845          // set the read timeout if needed
 846          if ($this->read_timeout > 0)
 847              socket_set_timeout($fp, $this->read_timeout);
 848          $this->timed_out = false;
 849  
 850          fwrite($fp,$headers.$body,strlen($headers.$body));
 851  
 852          $this->_redirectaddr = false;
 853          unset($this->headers);
 854  
 855          while($currentHeader = fgets($fp,$this->_maxlinelen))
 856          {
 857              if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 858              {
 859                  $this->status=-100;
 860                  return false;
 861              }
 862  
 863              if($currentHeader == "\r\n")
 864                  break;
 865  
 866              // if a header begins with Location: or URI:, set the redirect
 867              if(preg_match("/^(Location:|URI:)/i",$currentHeader))
 868              {
 869                  // get URL portion of the redirect
 870                  preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
 871                  // look for :// in the Location header to see if hostname is included
 872                  if(!preg_match("|\:\/\/|",$matches[2]))
 873                  {
 874                      // no host in the path, so prepend
 875                      $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
 876                      // eliminate double slash
 877                      if(!preg_match("|^/|",$matches[2]))
 878                              $this->_redirectaddr .= "/".$matches[2];
 879                      else
 880                              $this->_redirectaddr .= $matches[2];
 881                  }
 882                  else
 883                      $this->_redirectaddr = $matches[2];
 884              }
 885  
 886              if(preg_match("|^HTTP/|",$currentHeader))
 887              {
 888                  if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
 889                  {
 890                      $this->status= $status[1];
 891                  }
 892                  $this->response_code = $currentHeader;
 893              }
 894  
 895              $this->headers[] = $currentHeader;
 896          }
 897  
 898          $results = '';
 899          do {
 900              $_data = fread($fp, $this->maxlength);
 901              if (strlen($_data) == 0) {
 902                  break;
 903              }
 904              $results .= $_data;
 905          } while(true);
 906  
 907          if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 908          {
 909              $this->status=-100;
 910              return false;
 911          }
 912  
 913          // check if there is a redirect meta tag
 914  
 915          if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
 916  
 917          {
 918              $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
 919          }
 920  
 921          // have we hit our frame depth and is there frame src to fetch?
 922          if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
 923          {
 924              $this->results[] = $results;
 925              for($x=0; $x<count($match[1]); $x++)
 926                  $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
 927          }
 928          // have we already fetched framed content?
 929          elseif(is_array($this->results))
 930              $this->results[] = $results;
 931          // no framed content
 932          else
 933              $this->results = $results;
 934  
 935          return true;
 936      }
 937  
 938  /*======================================================================*\
 939      Function:    _httpsrequest
 940      Purpose:    go get the https data from the server using curl
 941      Input:        $url        the url to fetch
 942                  $URI        the full URI
 943                  $body        body contents to send if any (POST)
 944      Output:
 945  \*======================================================================*/
 946  
 947  	function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
 948      {
 949          if($this->passcookies && $this->_redirectaddr)
 950              $this->setcookies();
 951  
 952          $headers = array();
 953  
 954          $URI_PARTS = parse_url($URI);
 955          if(empty($url))
 956              $url = "/";
 957          // GET ... header not needed for curl
 958          //$headers[] = $http_method." ".$url." ".$this->_httpversion;
 959          if(!empty($this->agent))
 960              $headers[] = "User-Agent: ".$this->agent;
 961          if(!empty($this->host))
 962              if(!empty($this->port))
 963                  $headers[] = "Host: ".$this->host.":".$this->port;
 964              else
 965                  $headers[] = "Host: ".$this->host;
 966          if(!empty($this->accept))
 967              $headers[] = "Accept: ".$this->accept;
 968          if(!empty($this->referer))
 969              $headers[] = "Referer: ".$this->referer;
 970          if(!empty($this->cookies))
 971          {
 972              if(!is_array($this->cookies))
 973                  $this->cookies = (array)$this->cookies;
 974  
 975              reset($this->cookies);
 976              if ( count($this->cookies) > 0 ) {
 977                  $cookie_str = 'Cookie: ';
 978                  foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 979                  $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
 980                  }
 981                  $headers[] = substr($cookie_str,0,-2);
 982              }
 983          }
 984          if(!empty($this->rawheaders))
 985          {
 986              if(!is_array($this->rawheaders))
 987                  $this->rawheaders = (array)$this->rawheaders;
 988              while(list($headerKey,$headerVal) = each($this->rawheaders))
 989                  $headers[] = $headerKey.": ".$headerVal;
 990          }
 991          if(!empty($content_type)) {
 992              if ($content_type == "multipart/form-data")
 993                  $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
 994              else
 995                  $headers[] = "Content-type: $content_type";
 996          }
 997          if(!empty($body))
 998              $headers[] = "Content-length: ".strlen($body);
 999          if(!empty($this->user) || !empty($this->pass))
1000              $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
1001  
1002          $headerfile = tempnam( $this->temp_dir, "sno" );
1003          $cmdline_params = '-k -D ' . escapeshellarg( $headerfile );
1004  
1005          foreach ( $headers as $header ) {
1006              $cmdline_params .= ' -H ' . escapeshellarg( $header );
1007          }
1008  
1009          if ( ! empty( $body ) ) {
1010              $cmdline_params .= ' -d ' . escapeshellarg( $body );
1011          }
1012  
1013          if ( $this->read_timeout > 0 ) {
1014              $cmdline_params .= ' -m ' . escapeshellarg( $this->read_timeout );
1015          }
1016  
1017  
1018          exec( $this->curl_path . ' ' . $cmdline_params . ' ' . escapeshellarg( $URI ), $results, $return );
1019  
1020          if($return)
1021          {
1022              $this->error = "Error: cURL could not retrieve the document, error $return.";
1023              return false;
1024          }
1025  
1026  
1027          $results = implode("\r\n",$results);
1028  
1029          $result_headers = file("$headerfile");
1030  
1031          $this->_redirectaddr = false;
1032          unset($this->headers);
1033  
1034          for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1035          {
1036  
1037              // if a header begins with Location: or URI:, set the redirect
1038              if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1039              {
1040                  // get URL portion of the redirect
1041                  preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1042                  // look for :// in the Location header to see if hostname is included
1043                  if(!preg_match("|\:\/\/|",$matches[2]))
1044                  {
1045                      // no host in the path, so prepend
1046                      $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1047                      // eliminate double slash
1048                      if(!preg_match("|^/|",$matches[2]))
1049                              $this->_redirectaddr .= "/".$matches[2];
1050                      else
1051                              $this->_redirectaddr .= $matches[2];
1052                  }
1053                  else
1054                      $this->_redirectaddr = $matches[2];
1055              }
1056  
1057              if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1058                  $this->response_code = $result_headers[$currentHeader];
1059  
1060              $this->headers[] = $result_headers[$currentHeader];
1061          }
1062  
1063          // check if there is a redirect meta tag
1064  
1065          if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1066          {
1067              $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1068          }
1069  
1070          // have we hit our frame depth and is there frame src to fetch?
1071          if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1072          {
1073              $this->results[] = $results;
1074              for($x=0; $x<count($match[1]); $x++)
1075                  $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1076          }
1077          // have we already fetched framed content?
1078          elseif(is_array($this->results))
1079              $this->results[] = $results;
1080          // no framed content
1081          else
1082              $this->results = $results;
1083  
1084          unlink("$headerfile");
1085  
1086          return true;
1087      }
1088  
1089  /*======================================================================*\
1090      Function:    setcookies()
1091      Purpose:    set cookies for a redirection
1092  \*======================================================================*/
1093  
1094  	function setcookies()
1095      {
1096          for($x=0; $x<count($this->headers); $x++)
1097          {
1098          if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1099              $this->cookies[$match[1]] = urldecode($match[2]);
1100          }
1101      }
1102  
1103  
1104  /*======================================================================*\
1105      Function:    _check_timeout
1106      Purpose:    checks whether timeout has occurred
1107      Input:        $fp    file pointer
1108  \*======================================================================*/
1109  
1110  	function _check_timeout($fp)
1111      {
1112          if ($this->read_timeout > 0) {
1113              $fp_status = socket_get_status($fp);
1114              if ($fp_status["timed_out"]) {
1115                  $this->timed_out = true;
1116                  return true;
1117              }
1118          }
1119          return false;
1120      }
1121  
1122  /*======================================================================*\
1123      Function:    _connect
1124      Purpose:    make a socket connection
1125      Input:        $fp    file pointer
1126  \*======================================================================*/
1127  
1128  	function _connect(&$fp)
1129      {
1130          if(!empty($this->proxy_host) && !empty($this->proxy_port))
1131              {
1132                  $this->_isproxy = true;
1133  
1134                  $host = $this->proxy_host;
1135                  $port = $this->proxy_port;
1136              }
1137          else
1138          {
1139              $host = $this->host;
1140              $port = $this->port;
1141          }
1142  
1143          $this->status = 0;
1144  
1145          if($fp = fsockopen(
1146                      $host,
1147                      $port,
1148                      $errno,
1149                      $errstr,
1150                      $this->_fp_timeout
1151                      ))
1152          {
1153              // socket connection succeeded
1154  
1155              return true;
1156          }
1157          else
1158          {
1159              // socket connection failed
1160              $this->status = $errno;
1161              switch($errno)
1162              {
1163                  case -3:
1164                      $this->error="socket creation failed (-3)";
1165                  case -4:
1166                      $this->error="dns lookup failure (-4)";
1167                  case -5:
1168                      $this->error="connection refused or timed out (-5)";
1169                  default:
1170                      $this->error="connection failed (".$errno.")";
1171              }
1172              return false;
1173          }
1174      }
1175  /*======================================================================*\
1176      Function:    _disconnect
1177      Purpose:    disconnect a socket connection
1178      Input:        $fp    file pointer
1179  \*======================================================================*/
1180  
1181  	function _disconnect($fp)
1182      {
1183          return(fclose($fp));
1184      }
1185  
1186  
1187  /*======================================================================*\
1188      Function:    _prepare_post_body
1189      Purpose:    Prepare post body according to encoding type
1190      Input:        $formvars  - form variables
1191                  $formfiles - form upload files
1192      Output:        post body
1193  \*======================================================================*/
1194  
1195  	function _prepare_post_body($formvars, $formfiles)
1196      {
1197          settype($formvars, "array");
1198          settype($formfiles, "array");
1199          $postdata = '';
1200  
1201          if (count($formvars) == 0 && count($formfiles) == 0)
1202              return;
1203  
1204          switch ($this->_submit_type) {
1205              case "application/x-www-form-urlencoded":
1206                  reset($formvars);
1207                  while(list($key,$val) = each($formvars)) {
1208                      if (is_array($val) || is_object($val)) {
1209                          while (list($cur_key, $cur_val) = each($val)) {
1210                              $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1211                          }
1212                      } else
1213                          $postdata .= urlencode($key)."=".urlencode($val)."&";
1214                  }
1215                  break;
1216  
1217              case "multipart/form-data":
1218                  $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1219  
1220                  reset($formvars);
1221                  while(list($key,$val) = each($formvars)) {
1222                      if (is_array($val) || is_object($val)) {
1223                          while (list($cur_key, $cur_val) = each($val)) {
1224                              $postdata .= "--".$this->_mime_boundary."\r\n";
1225                              $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1226                              $postdata .= "$cur_val\r\n";
1227                          }
1228                      } else {
1229                          $postdata .= "--".$this->_mime_boundary."\r\n";
1230                          $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1231                          $postdata .= "$val\r\n";
1232                      }
1233                  }
1234  
1235                  reset($formfiles);
1236                  while (list($field_name, $file_names) = each($formfiles)) {
1237                      settype($file_names, "array");
1238                      while (list(, $file_name) = each($file_names)) {
1239                          if (!is_readable($file_name)) continue;
1240  
1241                          $fp = fopen($file_name, "r");
1242                          $file_content = fread($fp, filesize($file_name));
1243                          fclose($fp);
1244                          $base_name = basename($file_name);
1245  
1246                          $postdata .= "--".$this->_mime_boundary."\r\n";
1247                          $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1248                          $postdata .= "$file_content\r\n";
1249                      }
1250                  }
1251                  $postdata .= "--".$this->_mime_boundary."--\r\n";
1252                  break;
1253          }
1254  
1255          return $postdata;
1256      }
1257  }
1258  endif;
1259  ?>


Generated: Fri Oct 25 08:20:01 2019 Cross-referenced by PHPXref 0.7