[ Index ]

PHP Cross Reference of WordPress Trunk (Updated Daily)

Search

title

Body

[close]

/wp-includes/ -> class-wp-block-parser.php (source)

   1  <?php
   2  /**
   3   * Block Serialization Parser
   4   *
   5   * @package WordPress
   6   */
   7  
   8  /**
   9   * Class WP_Block_Parser
  10   *
  11   * Parses a document and constructs a list of parsed block objects
  12   *
  13   * @since 5.0.0
  14   * @since 4.0.0 returns arrays not objects, all attributes are arrays
  15   */
  16  class WP_Block_Parser {
  17      /**
  18       * Input document being parsed
  19       *
  20       * @example "Pre-text\n<!-- wp:paragraph -->This is inside a block!<!-- /wp:paragraph -->"
  21       *
  22       * @since 5.0.0
  23       * @var string
  24       */
  25      public $document;
  26  
  27      /**
  28       * Tracks parsing progress through document
  29       *
  30       * @since 5.0.0
  31       * @var int
  32       */
  33      public $offset;
  34  
  35      /**
  36       * List of parsed blocks
  37       *
  38       * @since 5.0.0
  39       * @var WP_Block_Parser_Block[]
  40       */
  41      public $output;
  42  
  43      /**
  44       * Stack of partially-parsed structures in memory during parse
  45       *
  46       * @since 5.0.0
  47       * @var WP_Block_Parser_Frame[]
  48       */
  49      public $stack;
  50  
  51      /**
  52       * Parses a document and returns a list of block structures
  53       *
  54       * When encountering an invalid parse will return a best-effort
  55       * parse. In contrast to the specification parser this does not
  56       * return an error on invalid inputs.
  57       *
  58       * @since 5.0.0
  59       *
  60       * @param string $document Input document being parsed.
  61       * @return array[]
  62       */
  63  	public function parse( $document ) {
  64          $this->document = $document;
  65          $this->offset   = 0;
  66          $this->output   = array();
  67          $this->stack    = array();
  68  
  69          while ( $this->proceed() ) {
  70              continue;
  71          }
  72  
  73          return $this->output;
  74      }
  75  
  76      /**
  77       * Processes the next token from the input document
  78       * and returns whether to proceed eating more tokens
  79       *
  80       * This is the "next step" function that essentially
  81       * takes a token as its input and decides what to do
  82       * with that token before descending deeper into a
  83       * nested block tree or continuing along the document
  84       * or breaking out of a level of nesting.
  85       *
  86       * @internal
  87       * @since 5.0.0
  88       * @return bool
  89       */
  90  	public function proceed() {
  91          $next_token = $this->next_token();
  92          list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token;
  93          $stack_depth = count( $this->stack );
  94  
  95          // we may have some HTML soup before the next block.
  96          $leading_html_start = $start_offset > $this->offset ? $this->offset : null;
  97  
  98          switch ( $token_type ) {
  99              case 'no-more-tokens':
 100                  // if not in a block then flush output.
 101                  if ( 0 === $stack_depth ) {
 102                      $this->add_freeform();
 103                      return false;
 104                  }
 105  
 106                  /*
 107                   * Otherwise we have a problem
 108                   * This is an error
 109                   *
 110                   * we have options
 111                   * - treat it all as freeform text
 112                   * - assume an implicit closer (easiest when not nesting)
 113                   */
 114  
 115                  // for the easy case we'll assume an implicit closer.
 116                  if ( 1 === $stack_depth ) {
 117                      $this->add_block_from_stack();
 118                      return false;
 119                  }
 120  
 121                  /*
 122                   * for the nested case where it's more difficult we'll
 123                   * have to assume that multiple closers are missing
 124                   * and so we'll collapse the whole stack piecewise
 125                   */
 126                  while ( 0 < count( $this->stack ) ) {
 127                      $this->add_block_from_stack();
 128                  }
 129                  return false;
 130  
 131              case 'void-block':
 132                  /*
 133                   * easy case is if we stumbled upon a void block
 134                   * in the top-level of the document
 135                   */
 136                  if ( 0 === $stack_depth ) {
 137                      if ( isset( $leading_html_start ) ) {
 138                          $this->output[] = (array) $this->freeform(
 139                              substr(
 140                                  $this->document,
 141                                  $leading_html_start,
 142                                  $start_offset - $leading_html_start
 143                              )
 144                          );
 145                      }
 146  
 147                      $this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() );
 148                      $this->offset   = $start_offset + $token_length;
 149                      return true;
 150                  }
 151  
 152                  // otherwise we found an inner block.
 153                  $this->add_inner_block(
 154                      new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
 155                      $start_offset,
 156                      $token_length
 157                  );
 158                  $this->offset = $start_offset + $token_length;
 159                  return true;
 160  
 161              case 'block-opener':
 162                  // track all newly-opened blocks on the stack.
 163                  array_push(
 164                      $this->stack,
 165                      new WP_Block_Parser_Frame(
 166                          new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
 167                          $start_offset,
 168                          $token_length,
 169                          $start_offset + $token_length,
 170                          $leading_html_start
 171                      )
 172                  );
 173                  $this->offset = $start_offset + $token_length;
 174                  return true;
 175  
 176              case 'block-closer':
 177                  /*
 178                   * if we're missing an opener we're in trouble
 179                   * This is an error
 180                   */
 181                  if ( 0 === $stack_depth ) {
 182                      /*
 183                       * we have options
 184                       * - assume an implicit opener
 185                       * - assume _this_ is the opener
 186                       * - give up and close out the document
 187                       */
 188                      $this->add_freeform();
 189                      return false;
 190                  }
 191  
 192                  // if we're not nesting then this is easy - close the block.
 193                  if ( 1 === $stack_depth ) {
 194                      $this->add_block_from_stack( $start_offset );
 195                      $this->offset = $start_offset + $token_length;
 196                      return true;
 197                  }
 198  
 199                  /*
 200                   * otherwise we're nested and we have to close out the current
 201                   * block and add it as a new innerBlock to the parent
 202                   */
 203                  $stack_top                        = array_pop( $this->stack );
 204                  $html                             = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset );
 205                  $stack_top->block->innerHTML     .= $html;
 206                  $stack_top->block->innerContent[] = $html;
 207                  $stack_top->prev_offset           = $start_offset + $token_length;
 208  
 209                  $this->add_inner_block(
 210                      $stack_top->block,
 211                      $stack_top->token_start,
 212                      $stack_top->token_length,
 213                      $start_offset + $token_length
 214                  );
 215                  $this->offset = $start_offset + $token_length;
 216                  return true;
 217  
 218              default:
 219                  // This is an error.
 220                  $this->add_freeform();
 221                  return false;
 222          }
 223      }
 224  
 225      /**
 226       * Scans the document from where we last left off
 227       * and finds the next valid token to parse if it exists
 228       *
 229       * Returns the type of the find: kind of find, block information, attributes
 230       *
 231       * @internal
 232       * @since 5.0.0
 233       * @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments
 234       * @return array
 235       */
 236  	public function next_token() {
 237          $matches = null;
 238  
 239          /*
 240           * aye the magic
 241           * we're using a single RegExp to tokenize the block comment delimiters
 242           * we're also using a trick here because the only difference between a
 243           * block opener and a block closer is the leading `/` before `wp:` (and
 244           * a closer has no attributes). we can trap them both and process the
 245           * match back in PHP to see which one it was.
 246           */
 247          $has_match = preg_match(
 248              '/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s',
 249              $this->document,
 250              $matches,
 251              PREG_OFFSET_CAPTURE,
 252              $this->offset
 253          );
 254  
 255          // if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE.
 256          if ( false === $has_match ) {
 257              return array( 'no-more-tokens', null, null, null, null );
 258          }
 259  
 260          // we have no more tokens.
 261          if ( 0 === $has_match ) {
 262              return array( 'no-more-tokens', null, null, null, null );
 263          }
 264  
 265          list( $match, $started_at ) = $matches[0];
 266  
 267          $length    = strlen( $match );
 268          $is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1];
 269          $is_void   = isset( $matches['void'] ) && -1 !== $matches['void'][1];
 270          $namespace = $matches['namespace'];
 271          $namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/';
 272          $name      = $namespace . $matches['name'][0];
 273          $has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1];
 274  
 275          /*
 276           * Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays
 277           * are associative arrays. If we use `array()` we get a JSON `[]`
 278           */
 279          $attrs = $has_attrs
 280              ? json_decode( $matches['attrs'][0], /* as-associative */ true )
 281              : array();
 282  
 283          /*
 284           * This state isn't allowed
 285           * This is an error
 286           */
 287          if ( $is_closer && ( $is_void || $has_attrs ) ) {
 288              // we can ignore them since they don't hurt anything.
 289          }
 290  
 291          if ( $is_void ) {
 292              return array( 'void-block', $name, $attrs, $started_at, $length );
 293          }
 294  
 295          if ( $is_closer ) {
 296              return array( 'block-closer', $name, null, $started_at, $length );
 297          }
 298  
 299          return array( 'block-opener', $name, $attrs, $started_at, $length );
 300      }
 301  
 302      /**
 303       * Returns a new block object for freeform HTML
 304       *
 305       * @internal
 306       * @since 3.9.0
 307       *
 308       * @param string $inner_html HTML content of block.
 309       * @return WP_Block_Parser_Block freeform block object.
 310       */
 311  	public function freeform( $inner_html ) {
 312          return new WP_Block_Parser_Block( null, array(), array(), $inner_html, array( $inner_html ) );
 313      }
 314  
 315      /**
 316       * Pushes a length of text from the input document
 317       * to the output list as a freeform block.
 318       *
 319       * @internal
 320       * @since 5.0.0
 321       * @param null $length how many bytes of document text to output.
 322       */
 323  	public function add_freeform( $length = null ) {
 324          $length = $length ? $length : strlen( $this->document ) - $this->offset;
 325  
 326          if ( 0 === $length ) {
 327              return;
 328          }
 329  
 330          $this->output[] = (array) $this->freeform( substr( $this->document, $this->offset, $length ) );
 331      }
 332  
 333      /**
 334       * Given a block structure from memory pushes
 335       * a new block to the output list.
 336       *
 337       * @internal
 338       * @since 5.0.0
 339       * @param WP_Block_Parser_Block $block        The block to add to the output.
 340       * @param int                   $token_start  Byte offset into the document where the first token for the block starts.
 341       * @param int                   $token_length Byte length of entire block from start of opening token to end of closing token.
 342       * @param int|null              $last_offset  Last byte offset into document if continuing form earlier output.
 343       */
 344  	public function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) {
 345          $parent                       = $this->stack[ count( $this->stack ) - 1 ];
 346          $parent->block->innerBlocks[] = (array) $block;
 347          $html                         = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset );
 348  
 349          if ( ! empty( $html ) ) {
 350              $parent->block->innerHTML     .= $html;
 351              $parent->block->innerContent[] = $html;
 352          }
 353  
 354          $parent->block->innerContent[] = null;
 355          $parent->prev_offset           = $last_offset ? $last_offset : $token_start + $token_length;
 356      }
 357  
 358      /**
 359       * Pushes the top block from the parsing stack to the output list.
 360       *
 361       * @internal
 362       * @since 5.0.0
 363       * @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML.
 364       */
 365  	public function add_block_from_stack( $end_offset = null ) {
 366          $stack_top   = array_pop( $this->stack );
 367          $prev_offset = $stack_top->prev_offset;
 368  
 369          $html = isset( $end_offset )
 370              ? substr( $this->document, $prev_offset, $end_offset - $prev_offset )
 371              : substr( $this->document, $prev_offset );
 372  
 373          if ( ! empty( $html ) ) {
 374              $stack_top->block->innerHTML     .= $html;
 375              $stack_top->block->innerContent[] = $html;
 376          }
 377  
 378          if ( isset( $stack_top->leading_html_start ) ) {
 379              $this->output[] = (array) $this->freeform(
 380                  substr(
 381                      $this->document,
 382                      $stack_top->leading_html_start,
 383                      $stack_top->token_start - $stack_top->leading_html_start
 384                  )
 385              );
 386          }
 387  
 388          $this->output[] = (array) $stack_top->block;
 389      }
 390  }
 391  
 392  /**
 393   * WP_Block_Parser_Block class.
 394   *
 395   * Required for backward compatibility in WordPress Core.
 396   */
 397  require_once  __DIR__ . '/class-wp-block-parser-block.php';
 398  
 399  /**
 400   * WP_Block_Parser_Frame class.
 401   *
 402   * Required for backward compatibility in WordPress Core.
 403   */
 404  require_once  __DIR__ . '/class-wp-block-parser-frame.php';


Generated : Tue Apr 16 08:20:01 2024 Cross-referenced by PHPXref