[ Index ]

PHP Cross Reference of WordPress Trunk (Updated Daily)

Search

title

Body

[close]

/wp-includes/SimplePie/src/ -> Sanitize.php (source)

   1  <?php
   2  
   3  // SPDX-FileCopyrightText: 2004-2023 Ryan Parman, Sam Sneddon, Ryan McCue
   4  // SPDX-License-Identifier: BSD-3-Clause
   5  
   6  declare(strict_types=1);
   7  
   8  namespace SimplePie;
   9  
  10  use DOMDocument;
  11  use DOMXPath;
  12  use InvalidArgumentException;
  13  use Psr\Http\Client\ClientInterface;
  14  use Psr\Http\Message\RequestFactoryInterface;
  15  use Psr\Http\Message\UriFactoryInterface;
  16  use SimplePie\Cache\Base;
  17  use SimplePie\Cache\BaseDataCache;
  18  use SimplePie\Cache\CallableNameFilter;
  19  use SimplePie\Cache\DataCache;
  20  use SimplePie\Cache\NameFilter;
  21  use SimplePie\HTTP\Client;
  22  use SimplePie\HTTP\ClientException;
  23  use SimplePie\HTTP\FileClient;
  24  use SimplePie\HTTP\Psr18Client;
  25  
  26  /**
  27   * Used for data cleanup and post-processing
  28   *
  29   *
  30   * This class can be overloaded with {@see \SimplePie\SimplePie::set_sanitize_class()}
  31   *
  32   * @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags
  33   */
  34  class Sanitize implements RegistryAware
  35  {
  36      // Private vars
  37      /** @var string */
  38      public $base = '';
  39  
  40      // Options
  41      /** @var bool */
  42      public $remove_div = true;
  43      /** @var string */
  44      public $image_handler = '';
  45      /** @var string[] */
  46      public $strip_htmltags = ['base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'];
  47      /** @var bool */
  48      public $encode_instead_of_strip = false;
  49      /** @var string[] */
  50      public $strip_attributes = ['bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'];
  51      /** @var string[] */
  52      public $rename_attributes = [];
  53      /** @var array<string, array<string, string>> */
  54      public $add_attributes = ['audio' => ['preload' => 'none'], 'iframe' => ['sandbox' => 'allow-scripts allow-same-origin'], 'video' => ['preload' => 'none']];
  55      /** @var bool */
  56      public $strip_comments = false;
  57      /** @var string */
  58      public $output_encoding = 'UTF-8';
  59      /** @var bool */
  60      public $enable_cache = true;
  61      /** @var string */
  62      public $cache_location = './cache';
  63      /** @var string&(callable(string): string) */
  64      public $cache_name_function = 'md5';
  65  
  66      /**
  67       * @var NameFilter
  68       */
  69      private $cache_namefilter;
  70      /** @var int */
  71      public $timeout = 10;
  72      /** @var string */
  73      public $useragent = '';
  74      /** @var bool */
  75      public $force_fsockopen = false;
  76      /** @var array<string, string|string[]> */
  77      public $replace_url_attributes = [];
  78      /**
  79       * @var array<int, mixed> Custom curl options
  80       * @see SimplePie::set_curl_options()
  81       */
  82      private $curl_options = [];
  83  
  84      /** @var Registry */
  85      public $registry;
  86  
  87      /**
  88       * @var DataCache|null
  89       */
  90      private $cache = null;
  91  
  92      /**
  93       * @var int Cache duration (in seconds)
  94       */
  95      private $cache_duration = 3600;
  96  
  97      /**
  98       * List of domains for which to force HTTPS.
  99       * @see \SimplePie\Sanitize::set_https_domains()
 100       * Array is a tree split at DNS levels. Example:
 101       * array('biz' => true, 'com' => array('example' => true), 'net' => array('example' => array('www' => true)))
 102       * @var true|array<string, true|array<string, true|array<string, array<string, true|array<string, true|array<string, true>>>>>>
 103       */
 104      public $https_domains = [];
 105  
 106      /**
 107       * @var Client|null
 108       */
 109      private $http_client = null;
 110  
 111      public function __construct()
 112      {
 113          // Set defaults
 114          $this->set_url_replacements(null);
 115      }
 116  
 117      /**
 118       * @return void
 119       */
 120      public function remove_div(bool $enable = true)
 121      {
 122          $this->remove_div = (bool) $enable;
 123      }
 124  
 125      /**
 126       * @param string|false $page
 127       * @return void
 128       */
 129      public function set_image_handler($page = false)
 130      {
 131          if ($page) {
 132              $this->image_handler = (string) $page;
 133          } else {
 134              $this->image_handler = '';
 135          }
 136      }
 137  
 138      /**
 139       * @return void
 140       */
 141      public function set_registry(\SimplePie\Registry $registry)
 142      {
 143          $this->registry = $registry;
 144      }
 145  
 146      /**
 147       * @param (string&(callable(string): string))|NameFilter $cache_name_function
 148       * @param class-string<Cache> $cache_class
 149       * @return void
 150       */
 151      public function pass_cache_data(bool $enable_cache = true, string $cache_location = './cache', $cache_name_function = 'md5', string $cache_class = Cache::class, ?DataCache $cache = null)
 152      {
 153          $this->enable_cache = $enable_cache;
 154  
 155          if ($cache_location) {
 156              $this->cache_location = $cache_location;
 157          }
 158  
 159          // @phpstan-ignore-next-line Enforce PHPDoc type.
 160          if (!is_string($cache_name_function) && !$cache_name_function instanceof NameFilter) {
 161              throw new InvalidArgumentException(sprintf(
 162                  '%s(): Argument #3 ($cache_name_function) must be of type %s',
 163                  __METHOD__,
 164                  NameFilter::class
 165              ), 1);
 166          }
 167  
 168          // BC: $cache_name_function could be a callable as string
 169          if (is_string($cache_name_function)) {
 170              // trigger_error(sprintf('Providing $cache_name_function as string in "%s()" is deprecated since SimplePie 1.8.0, provide as "%s" instead.', __METHOD__, NameFilter::class), \E_USER_DEPRECATED);
 171              $this->cache_name_function = $cache_name_function;
 172  
 173              $cache_name_function = new CallableNameFilter($cache_name_function);
 174          }
 175  
 176          $this->cache_namefilter = $cache_name_function;
 177  
 178          if ($cache !== null) {
 179              $this->cache = $cache;
 180          }
 181      }
 182  
 183      /**
 184       * Set a PSR-18 client and PSR-17 factories
 185       *
 186       * Allows you to use your own HTTP client implementations.
 187       */
 188      final public function set_http_client(
 189          ClientInterface $http_client,
 190          RequestFactoryInterface $request_factory,
 191          UriFactoryInterface $uri_factory
 192      ): void {
 193          $this->http_client = new Psr18Client($http_client, $request_factory, $uri_factory);
 194      }
 195  
 196      /**
 197       * @deprecated since SimplePie 1.9.0, use \SimplePie\Sanitize::set_http_client() instead.
 198       * @param class-string<File> $file_class
 199       * @param array<int, mixed> $curl_options
 200       * @return void
 201       */
 202      public function pass_file_data(string $file_class = File::class, int $timeout = 10, string $useragent = '', bool $force_fsockopen = false, array $curl_options = [])
 203      {
 204          // trigger_error(sprintf('SimplePie\Sanitize::pass_file_data() is deprecated since SimplePie 1.9.0, please use "SimplePie\Sanitize::set_http_client()" instead.'), \E_USER_DEPRECATED);
 205          if ($timeout) {
 206              $this->timeout = $timeout;
 207          }
 208  
 209          if ($useragent) {
 210              $this->useragent = $useragent;
 211          }
 212  
 213          if ($force_fsockopen) {
 214              $this->force_fsockopen = $force_fsockopen;
 215          }
 216  
 217          $this->curl_options = $curl_options;
 218          // Invalidate the registered client.
 219          $this->http_client = null;
 220      }
 221  
 222      /**
 223       * @param string[]|string|false $tags Set a list of tags to strip, or set empty string to use default tags, or false to strip nothing.
 224       * @return void
 225       */
 226      public function strip_htmltags($tags = ['base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'])
 227      {
 228          if ($tags) {
 229              if (is_array($tags)) {
 230                  $this->strip_htmltags = $tags;
 231              } else {
 232                  $this->strip_htmltags = explode(',', $tags);
 233              }
 234          } else {
 235              $this->strip_htmltags = [];
 236          }
 237      }
 238  
 239      /**
 240       * @return void
 241       */
 242      public function encode_instead_of_strip(bool $encode = false)
 243      {
 244          $this->encode_instead_of_strip = $encode;
 245      }
 246  
 247      /**
 248       * @param string[]|string $attribs
 249       * @return void
 250       */
 251      public function rename_attributes($attribs = [])
 252      {
 253          if ($attribs) {
 254              if (is_array($attribs)) {
 255                  $this->rename_attributes = $attribs;
 256              } else {
 257                  $this->rename_attributes = explode(',', $attribs);
 258              }
 259          } else {
 260              $this->rename_attributes = [];
 261          }
 262      }
 263  
 264      /**
 265       * @param string[]|string $attribs
 266       * @return void
 267       */
 268      public function strip_attributes($attribs = ['bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'])
 269      {
 270          if ($attribs) {
 271              if (is_array($attribs)) {
 272                  $this->strip_attributes = $attribs;
 273              } else {
 274                  $this->strip_attributes = explode(',', $attribs);
 275              }
 276          } else {
 277              $this->strip_attributes = [];
 278          }
 279      }
 280  
 281      /**
 282       * @param array<string, array<string, string>> $attribs
 283       * @return void
 284       */
 285      public function add_attributes(array $attribs = ['audio' => ['preload' => 'none'], 'iframe' => ['sandbox' => 'allow-scripts allow-same-origin'], 'video' => ['preload' => 'none']])
 286      {
 287          $this->add_attributes = $attribs;
 288      }
 289  
 290      /**
 291       * @return void
 292       */
 293      public function strip_comments(bool $strip = false)
 294      {
 295          $this->strip_comments = $strip;
 296      }
 297  
 298      /**
 299       * @return void
 300       */
 301      public function set_output_encoding(string $encoding = 'UTF-8')
 302      {
 303          $this->output_encoding = $encoding;
 304      }
 305  
 306      /**
 307       * Set element/attribute key/value pairs of HTML attributes
 308       * containing URLs that need to be resolved relative to the feed
 309       *
 310       * Defaults to |a|@href, |area|@href, |audio|@src, |blockquote|@cite,
 311       * |del|@cite, |form|@action, |img|@longdesc, |img|@src, |input|@src,
 312       * |ins|@cite, |q|@cite, |source|@src, |video|@src
 313       *
 314       * @since 1.0
 315       * @param array<string, string|string[]>|null $element_attribute Element/attribute key/value pairs, null for default
 316       * @return void
 317       */
 318      public function set_url_replacements(?array $element_attribute = null)
 319      {
 320          if ($element_attribute === null) {
 321              $element_attribute = [
 322                  'a' => 'href',
 323                  'area' => 'href',
 324                  'audio' => 'src',
 325                  'blockquote' => 'cite',
 326                  'del' => 'cite',
 327                  'form' => 'action',
 328                  'img' => [
 329                      'longdesc',
 330                      'src'
 331                  ],
 332                  'input' => 'src',
 333                  'ins' => 'cite',
 334                  'q' => 'cite',
 335                  'source' => 'src',
 336                  'video' => [
 337                      'poster',
 338                      'src'
 339                  ]
 340              ];
 341          }
 342          $this->replace_url_attributes = $element_attribute;
 343      }
 344  
 345      /**
 346       * Set the list of domains for which to force HTTPS.
 347       * @see \SimplePie\Misc::https_url()
 348       * Example array('biz', 'example.com', 'example.org', 'www.example.net');
 349       *
 350       * @param string[] $domains list of domain names ['biz', 'example.com', 'example.org', 'www.example.net']
 351       *
 352       * @return void
 353       */
 354      public function set_https_domains(array $domains)
 355      {
 356          $this->https_domains = [];
 357          foreach ($domains as $domain) {
 358              $domain = trim($domain, ". \t\n\r\0\x0B");
 359              $segments = array_reverse(explode('.', $domain));
 360              /** @var true|array<string, true|array<string, true|array<string, array<string, true|array<string, true|array<string, true>>>>>> */ // Needed for PHPStan.
 361              $node = &$this->https_domains;
 362              foreach ($segments as $segment) {//Build a tree
 363                  if ($node === true) {
 364                      break;
 365                  }
 366                  if (!isset($node[$segment])) {
 367                      $node[$segment] = [];
 368                  }
 369                  $node = &$node[$segment];
 370              }
 371              $node = true;
 372          }
 373      }
 374  
 375      /**
 376       * Check if the domain is in the list of forced HTTPS.
 377       *
 378       * @return bool
 379       */
 380      protected function is_https_domain(string $domain)
 381      {
 382          $domain = trim($domain, '. ');
 383          $segments = array_reverse(explode('.', $domain));
 384          $node = &$this->https_domains;
 385          foreach ($segments as $segment) {//Explore the tree
 386              if (isset($node[$segment])) {
 387                  $node = &$node[$segment];
 388              } else {
 389                  break;
 390              }
 391          }
 392          return $node === true;
 393      }
 394  
 395      /**
 396       * Force HTTPS for selected Web sites.
 397       *
 398       * @return string
 399       */
 400      public function https_url(string $url)
 401      {
 402          return (
 403              strtolower(substr($url, 0, 7)) === 'http://'
 404              && ($parsed = parse_url($url, PHP_URL_HOST)) !== false // Malformed URL
 405              && $parsed !== null // Missing host
 406              && $this->is_https_domain($parsed) // Should be forced?
 407          ) ? substr_replace($url, 's', 4, 0) // Add the 's' to HTTPS
 408          : $url;
 409      }
 410  
 411      /**
 412       * @param int-mask-of<SimplePie::CONSTRUCT_*> $type
 413       * @param string $base
 414       * @return string Sanitized data; false if output encoding is changed to something other than UTF-8 and conversion fails
 415       */
 416      public function sanitize(string $data, int $type, string $base = '')
 417      {
 418          $data = trim($data);
 419          if ($data !== '' || $type & \SimplePie\SimplePie::CONSTRUCT_IRI) {
 420              if ($type & \SimplePie\SimplePie::CONSTRUCT_MAYBE_HTML) {
 421                  if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . \SimplePie\SimplePie::PCRE_HTML_ATTRIBUTE . '>)/', $data)) {
 422                      $type |= \SimplePie\SimplePie::CONSTRUCT_HTML;
 423                  } else {
 424                      $type |= \SimplePie\SimplePie::CONSTRUCT_TEXT;
 425                  }
 426              }
 427  
 428              if ($type & \SimplePie\SimplePie::CONSTRUCT_BASE64) {
 429                  $data = base64_decode($data);
 430              }
 431  
 432              if ($type & (\SimplePie\SimplePie::CONSTRUCT_HTML | \SimplePie\SimplePie::CONSTRUCT_XHTML)) {
 433                  if (!class_exists('DOMDocument')) {
 434                      throw new \SimplePie\Exception('DOMDocument not found, unable to use sanitizer');
 435                  }
 436                  $document = new \DOMDocument();
 437                  $document->encoding = 'UTF-8';
 438  
 439                  // PHPStan seems to have trouble resolving int-mask because bitwise
 440                  // operators are used when operators are used when passing this parameter.
 441                  // https://github.com/phpstan/phpstan/issues/9384
 442                  /** @var int-mask-of<SimplePie::CONSTRUCT_*> $type */
 443                  $data = $this->preprocess($data, $type);
 444  
 445                  set_error_handler([Misc::class, 'silence_errors']);
 446                  $document->loadHTML($data);
 447                  restore_error_handler();
 448  
 449                  $xpath = new \DOMXPath($document);
 450  
 451                  // Strip comments
 452                  if ($this->strip_comments) {
 453                      /** @var \DOMNodeList<\DOMComment> */
 454                      $comments = $xpath->query('//comment()');
 455  
 456                      foreach ($comments as $comment) {
 457                          $parentNode = $comment->parentNode;
 458                          assert($parentNode !== null, 'For PHPStan, comment must have a parent');
 459                          $parentNode->removeChild($comment);
 460                      }
 461                  }
 462  
 463                  // Strip out HTML tags and attributes that might cause various security problems.
 464                  // Based on recommendations by Mark Pilgrim at:
 465                  // https://web.archive.org/web/20110902041826/http://diveintomark.org:80/archives/2003/06/12/how_to_consume_rss_safely
 466                  if ($this->strip_htmltags) {
 467                      foreach ($this->strip_htmltags as $tag) {
 468                          $this->strip_tag($tag, $document, $xpath, $type);
 469                      }
 470                  }
 471  
 472                  if ($this->rename_attributes) {
 473                      foreach ($this->rename_attributes as $attrib) {
 474                          $this->rename_attr($attrib, $xpath);
 475                      }
 476                  }
 477  
 478                  if ($this->strip_attributes) {
 479                      foreach ($this->strip_attributes as $attrib) {
 480                          $this->strip_attr($attrib, $xpath);
 481                      }
 482                  }
 483  
 484                  if ($this->add_attributes) {
 485                      foreach ($this->add_attributes as $tag => $valuePairs) {
 486                          $this->add_attr($tag, $valuePairs, $document);
 487                      }
 488                  }
 489  
 490                  // Replace relative URLs
 491                  $this->base = $base;
 492                  foreach ($this->replace_url_attributes as $element => $attributes) {
 493                      $this->replace_urls($document, $element, $attributes);
 494                  }
 495  
 496                  // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
 497                  if ($this->image_handler !== '' && $this->enable_cache) {
 498                      $images = $document->getElementsByTagName('img');
 499  
 500                      foreach ($images as $img) {
 501                          if ($img->hasAttribute('src')) {
 502                              $image_url = $this->cache_namefilter->filter($img->getAttribute('src'));
 503                              $cache = $this->get_cache($image_url);
 504  
 505                              if ($cache->get_data($image_url, false)) {
 506                                  $img->setAttribute('src', $this->image_handler . $image_url);
 507                              } else {
 508                                  try {
 509                                      $file = $this->get_http_client()->request(
 510                                          Client::METHOD_GET,
 511                                          $img->getAttribute('src'),
 512                                          ['X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']]
 513                                      );
 514                                  } catch (ClientException $th) {
 515                                      continue;
 516                                  }
 517  
 518                                  if ((!Misc::is_remote_uri($file->get_final_requested_uri()) || ($file->get_status_code() === 200 || $file->get_status_code() > 206 && $file->get_status_code() < 300))) {
 519                                      if ($cache->set_data($image_url, ['headers' => $file->get_headers(), 'body' => $file->get_body_content()], $this->cache_duration)) {
 520                                          $img->setAttribute('src', $this->image_handler . $image_url);
 521                                      } else {
 522                                          trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
 523                                      }
 524                                  }
 525                              }
 526                          }
 527                      }
 528                  }
 529  
 530                  // Get content node
 531                  $div = null;
 532                  if (($item = $document->getElementsByTagName('body')->item(0)) !== null) {
 533                      $div = $item->firstChild;
 534                  }
 535                  // Finally, convert to a HTML string
 536                  $data = trim((string) $document->saveHTML($div));
 537  
 538                  if ($this->remove_div) {
 539                      $data = preg_replace('/^<div' . \SimplePie\SimplePie::PCRE_XML_ATTRIBUTE . '>/', '', $data);
 540                      // Cast for PHPStan, it is unable to validate a non-literal regex above.
 541                      $data = preg_replace('/<\/div>$/', '', (string) $data);
 542                  } else {
 543                      $data = preg_replace('/^<div' . \SimplePie\SimplePie::PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
 544                  }
 545  
 546                  // Cast for PHPStan, it is unable to validate a non-literal regex above.
 547                  $data = str_replace('</source>', '', (string) $data);
 548              }
 549  
 550              if ($type & \SimplePie\SimplePie::CONSTRUCT_IRI) {
 551                  $absolute = $this->registry->call(Misc::class, 'absolutize_url', [$data, $base]);
 552                  if ($absolute !== false) {
 553                      $data = $absolute;
 554                  }
 555              }
 556  
 557              if ($type & (\SimplePie\SimplePie::CONSTRUCT_TEXT | \SimplePie\SimplePie::CONSTRUCT_IRI)) {
 558                  $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
 559              }
 560  
 561              if ($this->output_encoding !== 'UTF-8') {
 562                  // This really returns string|false but changing encoding is uncommon and we are going to deprecate it, so let’s just lie to PHPStan in the interest of cleaner annotations.
 563                  /** @var string */
 564                  $data = $this->registry->call(Misc::class, 'change_encoding', [$data, 'UTF-8', $this->output_encoding]);
 565              }
 566          }
 567          return $data;
 568      }
 569  
 570      /**
 571       * @param int-mask-of<SimplePie::CONSTRUCT_*> $type
 572       * @return string
 573       */
 574      protected function preprocess(string $html, int $type)
 575      {
 576          $ret = '';
 577          $html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html);
 578          if ($type & ~\SimplePie\SimplePie::CONSTRUCT_XHTML) {
 579              // Atom XHTML constructs are wrapped with a div by default
 580              // Note: No protection if $html contains a stray </div>!
 581              $html = '<div>' . $html . '</div>';
 582              $ret .= '<!DOCTYPE html>';
 583              $content_type = 'text/html';
 584          } else {
 585              $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';
 586              $content_type = 'application/xhtml+xml';
 587          }
 588  
 589          $ret .= '<html><head>';
 590          $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />';
 591          $ret .= '</head><body>' . $html . '</body></html>';
 592          return $ret;
 593      }
 594  
 595      /**
 596       * @param array<string>|string $attributes
 597       * @return void
 598       */
 599      public function replace_urls(DOMDocument $document, string $tag, $attributes)
 600      {
 601          if (!is_array($attributes)) {
 602              $attributes = [$attributes];
 603          }
 604  
 605          if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) {
 606              $elements = $document->getElementsByTagName($tag);
 607              foreach ($elements as $element) {
 608                  foreach ($attributes as $attribute) {
 609                      if ($element->hasAttribute($attribute)) {
 610                          $value = $this->registry->call(Misc::class, 'absolutize_url', [$element->getAttribute($attribute), $this->base]);
 611                          if ($value !== false) {
 612                              $value = $this->https_url($value);
 613                              $element->setAttribute($attribute, $value);
 614                          }
 615                      }
 616                  }
 617              }
 618          }
 619      }
 620  
 621      /**
 622       * @param array<int, string> $match
 623       * @return string
 624       */
 625      public function do_strip_htmltags(array $match)
 626      {
 627          if ($this->encode_instead_of_strip) {
 628              if (isset($match[4]) && !in_array(strtolower($match[1]), ['script', 'style'])) {
 629                  $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8');
 630                  $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8');
 631                  return "&lt;$match[1]$match[2]&gt;$match[3]&lt;/$match[1]&gt;";
 632              } else {
 633                  return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8');
 634              }
 635          } elseif (isset($match[4]) && !in_array(strtolower($match[1]), ['script', 'style'])) {
 636              return $match[4];
 637          } else {
 638              return '';
 639          }
 640      }
 641  
 642      /**
 643       * @param int-mask-of<SimplePie::CONSTRUCT_*> $type
 644       * @return void
 645       */
 646      protected function strip_tag(string $tag, DOMDocument $document, DOMXPath $xpath, int $type)
 647      {
 648          $elements = $xpath->query('body//' . $tag);
 649  
 650          if ($elements === false) {
 651              throw new \SimplePie\Exception(sprintf(
 652                  '%s(): Possibly malformed expression, check argument #1 ($tag)',
 653                  __METHOD__
 654              ), 1);
 655          }
 656  
 657          if ($this->encode_instead_of_strip) {
 658              foreach ($elements as $element) {
 659                  $fragment = $document->createDocumentFragment();
 660  
 661                  // For elements which aren't script or style, include the tag itself
 662                  if (!in_array($tag, ['script', 'style'])) {
 663                      $text = '<' . $tag;
 664                      if ($element->attributes !== null) {
 665                          $attrs = [];
 666                          foreach ($element->attributes as $name => $attr) {
 667                              $value = $attr->value;
 668  
 669                              // In XHTML, empty values should never exist, so we repeat the value
 670                              if (empty($value) && ($type & \SimplePie\SimplePie::CONSTRUCT_XHTML)) {
 671                                  $value = $name;
 672                              }
 673                              // For HTML, empty is fine
 674                              elseif (empty($value) && ($type & \SimplePie\SimplePie::CONSTRUCT_HTML)) {
 675                                  $attrs[] = $name;
 676                                  continue;
 677                              }
 678  
 679                              // Standard attribute text
 680                              $attrs[] = $name . '="' . $attr->value . '"';
 681                          }
 682                          $text .= ' ' . implode(' ', $attrs);
 683                      }
 684                      $text .= '>';
 685                      $fragment->appendChild(new \DOMText($text));
 686                  }
 687  
 688                  $number = $element->childNodes->length;
 689                  for ($i = $number; $i > 0; $i--) {
 690                      if (($child = $element->childNodes->item(0)) !== null) {
 691                          $fragment->appendChild($child);
 692                      }
 693                  }
 694  
 695                  if (!in_array($tag, ['script', 'style'])) {
 696                      $fragment->appendChild(new \DOMText('</' . $tag . '>'));
 697                  }
 698  
 699                  if (($parentNode = $element->parentNode) !== null) {
 700                      $parentNode->replaceChild($fragment, $element);
 701                  }
 702              }
 703  
 704              return;
 705          } elseif (in_array($tag, ['script', 'style'])) {
 706              foreach ($elements as $element) {
 707                  if (($parentNode = $element->parentNode) !== null) {
 708                      $parentNode->removeChild($element);
 709                  }
 710              }
 711  
 712              return;
 713          } else {
 714              foreach ($elements as $element) {
 715                  $fragment = $document->createDocumentFragment();
 716                  $number = $element->childNodes->length;
 717                  for ($i = $number; $i > 0; $i--) {
 718                      if (($child = $element->childNodes->item(0)) !== null) {
 719                          $fragment->appendChild($child);
 720                      }
 721                  }
 722  
 723                  if (($parentNode = $element->parentNode) !== null) {
 724                      $parentNode->replaceChild($fragment, $element);
 725                  }
 726              }
 727          }
 728      }
 729  
 730      /**
 731       * @return void
 732       */
 733      protected function strip_attr(string $attrib, DOMXPath $xpath)
 734      {
 735          $elements = $xpath->query('//*[@' . $attrib . ']');
 736  
 737          if ($elements === false) {
 738              throw new \SimplePie\Exception(sprintf(
 739                  '%s(): Possibly malformed expression, check argument #1 ($attrib)',
 740                  __METHOD__
 741              ), 1);
 742          }
 743  
 744          /** @var \DOMElement $element */
 745          foreach ($elements as $element) {
 746              $element->removeAttribute($attrib);
 747          }
 748      }
 749  
 750      /**
 751       * @return void
 752       */
 753      protected function rename_attr(string $attrib, DOMXPath $xpath)
 754      {
 755          $elements = $xpath->query('//*[@' . $attrib . ']');
 756  
 757          if ($elements === false) {
 758              throw new \SimplePie\Exception(sprintf(
 759                  '%s(): Possibly malformed expression, check argument #1 ($attrib)',
 760                  __METHOD__
 761              ), 1);
 762          }
 763  
 764          /** @var \DOMElement $element */
 765          foreach ($elements as $element) {
 766              $element->setAttribute('data-sanitized-' . $attrib, $element->getAttribute($attrib));
 767              $element->removeAttribute($attrib);
 768          }
 769      }
 770  
 771      /**
 772       * @param array<string, string> $valuePairs
 773       * @return void
 774       */
 775      protected function add_attr(string $tag, array $valuePairs, DOMDocument $document)
 776      {
 777          $elements = $document->getElementsByTagName($tag);
 778          /** @var \DOMElement $element */
 779          foreach ($elements as $element) {
 780              foreach ($valuePairs as $attrib => $value) {
 781                  $element->setAttribute($attrib, $value);
 782              }
 783          }
 784      }
 785  
 786      /**
 787       * Get a DataCache
 788       *
 789       * @param string $image_url Only needed for BC, can be removed in SimplePie 2.0.0
 790       *
 791       * @return DataCache
 792       */
 793      private function get_cache(string $image_url = ''): DataCache
 794      {
 795          if ($this->cache === null) {
 796              // @trigger_error(sprintf('Not providing as PSR-16 cache implementation is deprecated since SimplePie 1.8.0, please use "SimplePie\SimplePie::set_cache()".'), \E_USER_DEPRECATED);
 797              $cache = $this->registry->call(Cache::class, 'get_handler', [
 798                  $this->cache_location,
 799                  $image_url,
 800                  Base::TYPE_IMAGE
 801              ]);
 802  
 803              return new BaseDataCache($cache);
 804          }
 805  
 806          return $this->cache;
 807      }
 808  
 809      /**
 810       * Get a HTTP client
 811       */
 812      private function get_http_client(): Client
 813      {
 814          if ($this->http_client === null) {
 815              $this->http_client = new FileClient(
 816                  $this->registry,
 817                  [
 818                      'timeout' => $this->timeout,
 819                      'redirects' => 5,
 820                      'useragent' => $this->useragent,
 821                      'force_fsockopen' => $this->force_fsockopen,
 822                      'curl_options' => $this->curl_options,
 823                  ]
 824              );
 825          }
 826  
 827          return $this->http_client;
 828      }
 829  }
 830  
 831  class_alias('SimplePie\Sanitize', 'SimplePie_Sanitize');


Generated : Fri Oct 10 08:20:03 2025 Cross-referenced by PHPXref