[ Index ]

PHP Cross Reference of WordPress Trunk (Updated Daily)

Search

title

Body

[close]

/wp-includes/SimplePie/src/ -> Locator.php (source)

   1  <?php
   2  
   3  // SPDX-FileCopyrightText: 2004-2023 Ryan Parman, Sam Sneddon, Ryan McCue
   4  // SPDX-License-Identifier: BSD-3-Clause
   5  
   6  declare(strict_types=1);
   7  
   8  namespace SimplePie;
   9  
  10  use DomDocument;
  11  use Psr\Http\Client\ClientInterface;
  12  use Psr\Http\Message\RequestFactoryInterface;
  13  use Psr\Http\Message\UriFactoryInterface;
  14  use SimplePie\HTTP\Client;
  15  use SimplePie\HTTP\ClientException;
  16  use SimplePie\HTTP\FileClient;
  17  use SimplePie\HTTP\Psr18Client;
  18  use SimplePie\HTTP\Response;
  19  
  20  /**
  21   * Used for feed auto-discovery
  22   *
  23   *
  24   * This class can be overloaded with {@see \SimplePie\SimplePie::set_locator_class()}
  25   */
  26  class Locator implements RegistryAware
  27  {
  28      /** @var ?string */
  29      public $useragent = null;
  30      /** @var int */
  31      public $timeout = 10;
  32      /** @var File */
  33      public $file;
  34      /** @var string[] */
  35      public $local = [];
  36      /** @var string[] */
  37      public $elsewhere = [];
  38      /** @var array<mixed> */
  39      public $cached_entities = [];
  40      /** @var string */
  41      public $http_base;
  42      /** @var string */
  43      public $base;
  44      /** @var int */
  45      public $base_location = 0;
  46      /** @var int */
  47      public $checked_feeds = 0;
  48      /** @var int */
  49      public $max_checked_feeds = 10;
  50      /** @var bool */
  51      public $force_fsockopen = false;
  52      /** @var array<int, mixed> */
  53      public $curl_options = [];
  54      /** @var ?\DomDocument */
  55      public $dom;
  56      /** @var ?Registry */
  57      protected $registry;
  58  
  59      /**
  60       * @var Client|null
  61       */
  62      private $http_client = null;
  63  
  64      /**
  65       * @param array<int, mixed> $curl_options
  66       */
  67      public function __construct(File $file, int $timeout = 10, ?string $useragent = null, int $max_checked_feeds = 10, bool $force_fsockopen = false, array $curl_options = [])
  68      {
  69          $this->file = $file;
  70          $this->useragent = $useragent;
  71          $this->timeout = $timeout;
  72          $this->max_checked_feeds = $max_checked_feeds;
  73          $this->force_fsockopen = $force_fsockopen;
  74          $this->curl_options = $curl_options;
  75  
  76          $body = $this->file->get_body_content();
  77  
  78          if (class_exists('DOMDocument') && $body != '') {
  79              $this->dom = new \DOMDocument();
  80  
  81              set_error_handler([Misc::class, 'silence_errors']);
  82              try {
  83                  $this->dom->loadHTML($body);
  84              } catch (\Throwable $ex) {
  85                  $this->dom = null;
  86              }
  87              restore_error_handler();
  88          } else {
  89              $this->dom = null;
  90          }
  91      }
  92  
  93      /**
  94       * Set a PSR-18 client and PSR-17 factories
  95       *
  96       * Allows you to use your own HTTP client implementations.
  97       */
  98      final public function set_http_client(
  99          ClientInterface $http_client,
 100          RequestFactoryInterface $request_factory,
 101          UriFactoryInterface $uri_factory
 102      ): void {
 103          $this->http_client = new Psr18Client($http_client, $request_factory, $uri_factory);
 104      }
 105  
 106      /**
 107       * @return void
 108       */
 109      public function set_registry(\SimplePie\Registry $registry)
 110      {
 111          $this->registry = $registry;
 112      }
 113  
 114      /**
 115       * @param SimplePie::LOCATOR_* $type
 116       * @param array<Response>|null $working
 117       * @return Response|null
 118       */
 119      public function find(int $type = \SimplePie\SimplePie::LOCATOR_ALL, ?array &$working = null)
 120      {
 121          assert($this->registry !== null);
 122  
 123          if ($this->is_feed($this->file)) {
 124              return $this->file;
 125          }
 126  
 127          if (Misc::is_remote_uri($this->file->get_final_requested_uri())) {
 128              $sniffer = $this->registry->create(Content\Type\Sniffer::class, [$this->file]);
 129              if ($sniffer->get_type() !== 'text/html') {
 130                  return null;
 131              }
 132          }
 133  
 134          if ($type & ~\SimplePie\SimplePie::LOCATOR_NONE) {
 135              $this->get_base();
 136          }
 137  
 138          if ($type & \SimplePie\SimplePie::LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery()) {
 139              return $working[0];
 140          }
 141  
 142          if ($type & (\SimplePie\SimplePie::LOCATOR_LOCAL_EXTENSION | \SimplePie\SimplePie::LOCATOR_LOCAL_BODY | \SimplePie\SimplePie::LOCATOR_REMOTE_EXTENSION | \SimplePie\SimplePie::LOCATOR_REMOTE_BODY) && $this->get_links()) {
 143              if ($type & \SimplePie\SimplePie::LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local)) {
 144                  return $working[0];
 145              }
 146  
 147              if ($type & \SimplePie\SimplePie::LOCATOR_LOCAL_BODY && $working = $this->body($this->local)) {
 148                  return $working[0];
 149              }
 150  
 151              if ($type & \SimplePie\SimplePie::LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere)) {
 152                  return $working[0];
 153              }
 154  
 155              if ($type & \SimplePie\SimplePie::LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere)) {
 156                  return $working[0];
 157              }
 158          }
 159          return null;
 160      }
 161  
 162      /**
 163       * @return bool
 164       */
 165      public function is_feed(Response $file, bool $check_html = false)
 166      {
 167          assert($this->registry !== null);
 168  
 169          if (Misc::is_remote_uri($file->get_final_requested_uri())) {
 170              $sniffer = $this->registry->create(Content\Type\Sniffer::class, [$file]);
 171              $sniffed = $sniffer->get_type();
 172              $mime_types = ['application/rss+xml', 'application/rdf+xml',
 173                                  'text/rdf', 'application/atom+xml', 'text/xml',
 174                                  'application/xml', 'application/x-rss+xml'];
 175              if ($check_html) {
 176                  $mime_types[] = 'text/html';
 177              }
 178  
 179              return in_array($sniffed, $mime_types);
 180          } elseif (is_file($file->get_final_requested_uri())) {
 181              return true;
 182          } else {
 183              return false;
 184          }
 185      }
 186  
 187      /**
 188       * @return void
 189       */
 190      public function get_base()
 191      {
 192          assert($this->registry !== null);
 193  
 194          if ($this->dom === null) {
 195              throw new \SimplePie\Exception('DOMDocument not found, unable to use locator');
 196          }
 197          $this->http_base = $this->file->get_final_requested_uri();
 198          $this->base = $this->http_base;
 199          $elements = $this->dom->getElementsByTagName('base');
 200          foreach ($elements as $element) {
 201              if ($element->hasAttribute('href')) {
 202                  $base = $this->registry->call(Misc::class, 'absolutize_url', [trim($element->getAttribute('href')), $this->http_base]);
 203                  if ($base === false) {
 204                      continue;
 205                  }
 206                  $this->base = $base;
 207                  $this->base_location = method_exists($element, 'getLineNo') ? $element->getLineNo() : 0;
 208                  break;
 209              }
 210          }
 211      }
 212  
 213      /**
 214       * @return array<Response>|null
 215       */
 216      public function autodiscovery()
 217      {
 218          $done = [];
 219          $feeds = [];
 220          $feeds = array_merge($feeds, $this->search_elements_by_tag('link', $done, $feeds));
 221          $feeds = array_merge($feeds, $this->search_elements_by_tag('a', $done, $feeds));
 222          $feeds = array_merge($feeds, $this->search_elements_by_tag('area', $done, $feeds));
 223  
 224          if (!empty($feeds)) {
 225              return array_values($feeds);
 226          }
 227  
 228          return null;
 229      }
 230  
 231      /**
 232       * @param string[] $done
 233       * @param array<string, Response> $feeds
 234       * @return array<string, Response>
 235       */
 236      protected function search_elements_by_tag(string $name, array &$done, array $feeds)
 237      {
 238          assert($this->registry !== null);
 239  
 240          if ($this->dom === null) {
 241              throw new \SimplePie\Exception('DOMDocument not found, unable to use locator');
 242          }
 243  
 244          $links = $this->dom->getElementsByTagName($name);
 245          foreach ($links as $link) {
 246              if ($this->checked_feeds === $this->max_checked_feeds) {
 247                  break;
 248              }
 249              if ($link->hasAttribute('href') && $link->hasAttribute('rel')) {
 250                  $rel = array_unique($this->registry->call(Misc::class, 'space_separated_tokens', [strtolower($link->getAttribute('rel'))]));
 251                  $line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1;
 252  
 253                  if ($this->base_location < $line) {
 254                      $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->base]);
 255                  } else {
 256                      $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->http_base]);
 257                  }
 258                  if ($href === false) {
 259                      continue;
 260                  }
 261  
 262                  if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call(Misc::class, 'parse_mime', [$link->getAttribute('type')])), ['text/html', 'application/rss+xml', 'application/atom+xml'])) && !isset($feeds[$href])) {
 263                      $this->checked_feeds++;
 264                      $headers = [
 265                          'Accept' => SimplePie::DEFAULT_HTTP_ACCEPT_HEADER,
 266                      ];
 267  
 268                      try {
 269                          $feed = $this->get_http_client()->request(Client::METHOD_GET, $href, $headers);
 270  
 271                          if ((!Misc::is_remote_uri($feed->get_final_requested_uri()) || ($feed->get_status_code() === 200 || $feed->get_status_code() > 206 && $feed->get_status_code() < 300)) && $this->is_feed($feed, true)) {
 272                              $feeds[$href] = $feed;
 273                          }
 274                      } catch (ClientException $th) {
 275                          // Just mark it as done and continue.
 276                      }
 277                  }
 278                  $done[] = $href;
 279              }
 280          }
 281  
 282          return $feeds;
 283      }
 284  
 285      /**
 286       * @return true|null
 287       */
 288      public function get_links()
 289      {
 290          assert($this->registry !== null);
 291  
 292          if ($this->dom === null) {
 293              throw new \SimplePie\Exception('DOMDocument not found, unable to use locator');
 294          }
 295  
 296          $links = $this->dom->getElementsByTagName('a');
 297          foreach ($links as $link) {
 298              if ($link->hasAttribute('href')) {
 299                  $href = trim($link->getAttribute('href'));
 300                  $parsed = $this->registry->call(Misc::class, 'parse_url', [$href]);
 301                  if ($parsed['scheme'] === '' || preg_match('/^(https?|feed)?$/i', $parsed['scheme'])) {
 302                      if (method_exists($link, 'getLineNo') && $this->base_location < $link->getLineNo()) {
 303                          $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->base]);
 304                      } else {
 305                          $href = $this->registry->call(Misc::class, 'absolutize_url', [trim($link->getAttribute('href')), $this->http_base]);
 306                      }
 307                      if ($href === false) {
 308                          continue;
 309                      }
 310  
 311                      $current = $this->registry->call(Misc::class, 'parse_url', [$this->file->get_final_requested_uri()]);
 312  
 313                      if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority']) {
 314                          $this->local[] = $href;
 315                      } else {
 316                          $this->elsewhere[] = $href;
 317                      }
 318                  }
 319              }
 320          }
 321          $this->local = array_unique($this->local);
 322          $this->elsewhere = array_unique($this->elsewhere);
 323          if (!empty($this->local) || !empty($this->elsewhere)) {
 324              return true;
 325          }
 326          return null;
 327      }
 328  
 329      /**
 330       * Extracts first `link` element with given `rel` attribute inside the `head` element.
 331       *
 332       * @return string|null
 333       */
 334      public function get_rel_link(string $rel)
 335      {
 336          assert($this->registry !== null);
 337  
 338          if ($this->dom === null) {
 339              throw new \SimplePie\Exception('DOMDocument not found, unable to use '.
 340                                            'locator');
 341          }
 342          if (!class_exists('DOMXpath')) {
 343              throw new \SimplePie\Exception('DOMXpath not found, unable to use '.
 344                                            'get_rel_link');
 345          }
 346  
 347          $xpath = new \DOMXpath($this->dom);
 348          $query = '(//head)[1]/link[@rel and @href]';
 349          /** @var \DOMNodeList<\DOMElement> */
 350          $queryResult = $xpath->query($query);
 351          foreach ($queryResult as $link) {
 352              $href = trim($link->getAttribute('href'));
 353              $parsed = $this->registry->call(Misc::class, 'parse_url', [$href]);
 354              if ($parsed['scheme'] === '' ||
 355                  preg_match('/^https?$/i', $parsed['scheme'])) {
 356                  if (method_exists($link, 'getLineNo') &&
 357                      $this->base_location < $link->getLineNo()) {
 358                      $href = $this->registry->call(
 359                          Misc::class,
 360                          'absolutize_url',
 361                          [trim($link->getAttribute('href')), $this->base]
 362                      );
 363                  } else {
 364                      $href = $this->registry->call(
 365                          Misc::class,
 366                          'absolutize_url',
 367                          [trim($link->getAttribute('href')), $this->http_base]
 368                      );
 369                  }
 370                  if ($href === false) {
 371                      return null;
 372                  }
 373                  $rel_values = explode(' ', strtolower($link->getAttribute('rel')));
 374                  if (in_array($rel, $rel_values)) {
 375                      return $href;
 376                  }
 377              }
 378          }
 379  
 380          return null;
 381      }
 382  
 383      /**
 384       * @param string[] $array
 385       * @return array<Response>|null
 386       */
 387      public function extension(array &$array)
 388      {
 389          foreach ($array as $key => $value) {
 390              if ($this->checked_feeds === $this->max_checked_feeds) {
 391                  break;
 392              }
 393              $extension = strrchr($value, '.');
 394              if ($extension !== false && in_array(strtolower($extension), ['.rss', '.rdf', '.atom', '.xml'])) {
 395                  $this->checked_feeds++;
 396  
 397                  $headers = [
 398                      'Accept' => SimplePie::DEFAULT_HTTP_ACCEPT_HEADER,
 399                  ];
 400  
 401                  try {
 402                      $feed = $this->get_http_client()->request(Client::METHOD_GET, $value, $headers);
 403  
 404                      if ((!Misc::is_remote_uri($feed->get_final_requested_uri()) || ($feed->get_status_code() === 200 || $feed->get_status_code() > 206 && $feed->get_status_code() < 300)) && $this->is_feed($feed)) {
 405                          return [$feed];
 406                      }
 407                  } catch (ClientException $th) {
 408                      // Just unset and continue.
 409                  }
 410  
 411                  unset($array[$key]);
 412              }
 413          }
 414          return null;
 415      }
 416  
 417      /**
 418       * @param string[] $array
 419       * @return array<Response>|null
 420       */
 421      public function body(array &$array)
 422      {
 423          foreach ($array as $key => $value) {
 424              if ($this->checked_feeds === $this->max_checked_feeds) {
 425                  break;
 426              }
 427              if (preg_match('/(feed|rss|rdf|atom|xml)/i', $value)) {
 428                  $this->checked_feeds++;
 429                  $headers = [
 430                      'Accept' => SimplePie::DEFAULT_HTTP_ACCEPT_HEADER,
 431                  ];
 432  
 433                  try {
 434                      $feed = $this->get_http_client()->request(Client::METHOD_GET, $value, $headers);
 435  
 436                      if ((!Misc::is_remote_uri($feed->get_final_requested_uri()) || ($feed->get_status_code() === 200 || $feed->get_status_code() > 206 && $feed->get_status_code() < 300)) && $this->is_feed($feed)) {
 437                          return [$feed];
 438                      }
 439                  } catch (ClientException $th) {
 440                      // Just unset and continue.
 441                  }
 442  
 443                  unset($array[$key]);
 444              }
 445          }
 446          return null;
 447      }
 448  
 449      /**
 450       * Get a HTTP client
 451       */
 452      private function get_http_client(): Client
 453      {
 454          assert($this->registry !== null);
 455  
 456          if ($this->http_client === null) {
 457              $options = [
 458                  'timeout' => $this->timeout,
 459                  'redirects' => 5,
 460                  'force_fsockopen' => $this->force_fsockopen,
 461                  'curl_options' => $this->curl_options,
 462              ];
 463  
 464              if ($this->useragent !== null) {
 465                  $options['useragent'] = $this->useragent;
 466              }
 467  
 468              return new FileClient(
 469                  $this->registry,
 470                  $options
 471              );
 472          }
 473  
 474          return $this->http_client;
 475      }
 476  }
 477  
 478  class_alias('SimplePie\Locator', 'SimplePie_Locator', false);


Generated : Wed Sep 17 08:20:04 2025 Cross-referenced by PHPXref