[ Index ] |
PHP Cross Reference of WordPress Trunk (Updated Daily) |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Block Serialization Parser 4 * 5 * @package WordPress 6 */ 7 8 /** 9 * Class WP_Block_Parser 10 * 11 * Parses a document and constructs a list of parsed block objects 12 * 13 * @since 5.0.0 14 * @since 4.0.0 returns arrays not objects, all attributes are arrays 15 */ 16 class WP_Block_Parser { 17 /** 18 * Input document being parsed 19 * 20 * @example "Pre-text\n<!-- wp:paragraph -->This is inside a block!<!-- /wp:paragraph -->" 21 * 22 * @since 5.0.0 23 * @var string 24 */ 25 public $document; 26 27 /** 28 * Tracks parsing progress through document 29 * 30 * @since 5.0.0 31 * @var int 32 */ 33 public $offset; 34 35 /** 36 * List of parsed blocks 37 * 38 * @since 5.0.0 39 * @var WP_Block_Parser_Block[] 40 */ 41 public $output; 42 43 /** 44 * Stack of partially-parsed structures in memory during parse 45 * 46 * @since 5.0.0 47 * @var WP_Block_Parser_Frame[] 48 */ 49 public $stack; 50 51 /** 52 * Parses a document and returns a list of block structures 53 * 54 * When encountering an invalid parse will return a best-effort 55 * parse. In contrast to the specification parser this does not 56 * return an error on invalid inputs. 57 * 58 * @since 5.0.0 59 * 60 * @param string $document Input document being parsed. 61 * @return array[] 62 */ 63 public function parse( $document ) { 64 $this->document = $document; 65 $this->offset = 0; 66 $this->output = array(); 67 $this->stack = array(); 68 69 while ( $this->proceed() ) { 70 continue; 71 } 72 73 return $this->output; 74 } 75 76 /** 77 * Processes the next token from the input document 78 * and returns whether to proceed eating more tokens 79 * 80 * This is the "next step" function that essentially 81 * takes a token as its input and decides what to do 82 * with that token before descending deeper into a 83 * nested block tree or continuing along the document 84 * or breaking out of a level of nesting. 85 * 86 * @internal 87 * @since 5.0.0 88 * @return bool 89 */ 90 public function proceed() { 91 $next_token = $this->next_token(); 92 list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token; 93 $stack_depth = count( $this->stack ); 94 95 // we may have some HTML soup before the next block. 96 $leading_html_start = $start_offset > $this->offset ? $this->offset : null; 97 98 switch ( $token_type ) { 99 case 'no-more-tokens': 100 // if not in a block then flush output. 101 if ( 0 === $stack_depth ) { 102 $this->add_freeform(); 103 return false; 104 } 105 106 /* 107 * Otherwise we have a problem 108 * This is an error 109 * 110 * we have options 111 * - treat it all as freeform text 112 * - assume an implicit closer (easiest when not nesting) 113 */ 114 115 // for the easy case we'll assume an implicit closer. 116 if ( 1 === $stack_depth ) { 117 $this->add_block_from_stack(); 118 return false; 119 } 120 121 /* 122 * for the nested case where it's more difficult we'll 123 * have to assume that multiple closers are missing 124 * and so we'll collapse the whole stack piecewise 125 */ 126 while ( 0 < count( $this->stack ) ) { 127 $this->add_block_from_stack(); 128 } 129 return false; 130 131 case 'void-block': 132 /* 133 * easy case is if we stumbled upon a void block 134 * in the top-level of the document 135 */ 136 if ( 0 === $stack_depth ) { 137 if ( isset( $leading_html_start ) ) { 138 $this->output[] = (array) $this->freeform( 139 substr( 140 $this->document, 141 $leading_html_start, 142 $start_offset - $leading_html_start 143 ) 144 ); 145 } 146 147 $this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ); 148 $this->offset = $start_offset + $token_length; 149 return true; 150 } 151 152 // otherwise we found an inner block. 153 $this->add_inner_block( 154 new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ), 155 $start_offset, 156 $token_length 157 ); 158 $this->offset = $start_offset + $token_length; 159 return true; 160 161 case 'block-opener': 162 // track all newly-opened blocks on the stack. 163 array_push( 164 $this->stack, 165 new WP_Block_Parser_Frame( 166 new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ), 167 $start_offset, 168 $token_length, 169 $start_offset + $token_length, 170 $leading_html_start 171 ) 172 ); 173 $this->offset = $start_offset + $token_length; 174 return true; 175 176 case 'block-closer': 177 /* 178 * if we're missing an opener we're in trouble 179 * This is an error 180 */ 181 if ( 0 === $stack_depth ) { 182 /* 183 * we have options 184 * - assume an implicit opener 185 * - assume _this_ is the opener 186 * - give up and close out the document 187 */ 188 $this->add_freeform(); 189 return false; 190 } 191 192 // if we're not nesting then this is easy - close the block. 193 if ( 1 === $stack_depth ) { 194 $this->add_block_from_stack( $start_offset ); 195 $this->offset = $start_offset + $token_length; 196 return true; 197 } 198 199 /* 200 * otherwise we're nested and we have to close out the current 201 * block and add it as a new innerBlock to the parent 202 */ 203 $stack_top = array_pop( $this->stack ); 204 $html = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset ); 205 $stack_top->block->innerHTML .= $html; 206 $stack_top->block->innerContent[] = $html; 207 $stack_top->prev_offset = $start_offset + $token_length; 208 209 $this->add_inner_block( 210 $stack_top->block, 211 $stack_top->token_start, 212 $stack_top->token_length, 213 $start_offset + $token_length 214 ); 215 $this->offset = $start_offset + $token_length; 216 return true; 217 218 default: 219 // This is an error. 220 $this->add_freeform(); 221 return false; 222 } 223 } 224 225 /** 226 * Scans the document from where we last left off 227 * and finds the next valid token to parse if it exists 228 * 229 * Returns the type of the find: kind of find, block information, attributes 230 * 231 * @internal 232 * @since 5.0.0 233 * @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments 234 * @return array 235 */ 236 public function next_token() { 237 $matches = null; 238 239 /* 240 * aye the magic 241 * we're using a single RegExp to tokenize the block comment delimiters 242 * we're also using a trick here because the only difference between a 243 * block opener and a block closer is the leading `/` before `wp:` (and 244 * a closer has no attributes). we can trap them both and process the 245 * match back in PHP to see which one it was. 246 */ 247 $has_match = preg_match( 248 '/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s', 249 $this->document, 250 $matches, 251 PREG_OFFSET_CAPTURE, 252 $this->offset 253 ); 254 255 // if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE. 256 if ( false === $has_match ) { 257 return array( 'no-more-tokens', null, null, null, null ); 258 } 259 260 // we have no more tokens. 261 if ( 0 === $has_match ) { 262 return array( 'no-more-tokens', null, null, null, null ); 263 } 264 265 list( $match, $started_at ) = $matches[0]; 266 267 $length = strlen( $match ); 268 $is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1]; 269 $is_void = isset( $matches['void'] ) && -1 !== $matches['void'][1]; 270 $namespace = $matches['namespace']; 271 $namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/'; 272 $name = $namespace . $matches['name'][0]; 273 $has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1]; 274 275 /* 276 * Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays 277 * are associative arrays. If we use `array()` we get a JSON `[]` 278 */ 279 $attrs = $has_attrs 280 ? json_decode( $matches['attrs'][0], /* as-associative */ true ) 281 : array(); 282 283 /* 284 * This state isn't allowed 285 * This is an error 286 */ 287 if ( $is_closer && ( $is_void || $has_attrs ) ) { 288 // we can ignore them since they don't hurt anything. 289 } 290 291 if ( $is_void ) { 292 return array( 'void-block', $name, $attrs, $started_at, $length ); 293 } 294 295 if ( $is_closer ) { 296 return array( 'block-closer', $name, null, $started_at, $length ); 297 } 298 299 return array( 'block-opener', $name, $attrs, $started_at, $length ); 300 } 301 302 /** 303 * Returns a new block object for freeform HTML 304 * 305 * @internal 306 * @since 3.9.0 307 * 308 * @param string $inner_html HTML content of block. 309 * @return WP_Block_Parser_Block freeform block object. 310 */ 311 public function freeform( $inner_html ) { 312 return new WP_Block_Parser_Block( null, array(), array(), $inner_html, array( $inner_html ) ); 313 } 314 315 /** 316 * Pushes a length of text from the input document 317 * to the output list as a freeform block. 318 * 319 * @internal 320 * @since 5.0.0 321 * @param null $length how many bytes of document text to output. 322 */ 323 public function add_freeform( $length = null ) { 324 $length = $length ? $length : strlen( $this->document ) - $this->offset; 325 326 if ( 0 === $length ) { 327 return; 328 } 329 330 $this->output[] = (array) $this->freeform( substr( $this->document, $this->offset, $length ) ); 331 } 332 333 /** 334 * Given a block structure from memory pushes 335 * a new block to the output list. 336 * 337 * @internal 338 * @since 5.0.0 339 * @param WP_Block_Parser_Block $block The block to add to the output. 340 * @param int $token_start Byte offset into the document where the first token for the block starts. 341 * @param int $token_length Byte length of entire block from start of opening token to end of closing token. 342 * @param int|null $last_offset Last byte offset into document if continuing form earlier output. 343 */ 344 public function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) { 345 $parent = $this->stack[ count( $this->stack ) - 1 ]; 346 $parent->block->innerBlocks[] = (array) $block; 347 $html = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset ); 348 349 if ( ! empty( $html ) ) { 350 $parent->block->innerHTML .= $html; 351 $parent->block->innerContent[] = $html; 352 } 353 354 $parent->block->innerContent[] = null; 355 $parent->prev_offset = $last_offset ? $last_offset : $token_start + $token_length; 356 } 357 358 /** 359 * Pushes the top block from the parsing stack to the output list. 360 * 361 * @internal 362 * @since 5.0.0 363 * @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML. 364 */ 365 public function add_block_from_stack( $end_offset = null ) { 366 $stack_top = array_pop( $this->stack ); 367 $prev_offset = $stack_top->prev_offset; 368 369 $html = isset( $end_offset ) 370 ? substr( $this->document, $prev_offset, $end_offset - $prev_offset ) 371 : substr( $this->document, $prev_offset ); 372 373 if ( ! empty( $html ) ) { 374 $stack_top->block->innerHTML .= $html; 375 $stack_top->block->innerContent[] = $html; 376 } 377 378 if ( isset( $stack_top->leading_html_start ) ) { 379 $this->output[] = (array) $this->freeform( 380 substr( 381 $this->document, 382 $stack_top->leading_html_start, 383 $stack_top->token_start - $stack_top->leading_html_start 384 ) 385 ); 386 } 387 388 $this->output[] = (array) $stack_top->block; 389 } 390 } 391 392 /** 393 * WP_Block_Parser_Block class. 394 * 395 * Required for backward compatibility in WordPress Core. 396 */ 397 require_once __DIR__ . '/class-wp-block-parser-block.php'; 398 399 /** 400 * WP_Block_Parser_Frame class. 401 * 402 * Required for backward compatibility in WordPress Core. 403 */ 404 require_once __DIR__ . '/class-wp-block-parser-frame.php';
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated : Thu Nov 21 08:20:01 2024 | Cross-referenced by PHPXref |