| [ Index ] |
PHP Cross Reference of WordPress Trunk (Updated Daily) |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes 4 * Copyright (C) 2002, 2003, 2005 Ulf Harnhammar 5 * 6 * This program is free software and open source software; you can redistribute 7 * it and/or modify it under the terms of the GNU General Public License as 8 * published by the Free Software Foundation; either version 2 of the License, 9 * or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * more details. 15 * 16 * You should have received a copy of the GNU General Public License along 17 * with this program; if not, write to the Free Software Foundation, Inc., 18 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 19 * http://www.gnu.org/licenses/gpl.html 20 * 21 * [kses strips evil scripts!] 22 * 23 * Added wp_ prefix to avoid conflicts with existing kses users 24 * 25 * @version 0.2.2 26 * @copyright (C) 2002, 2003, 2005 27 * @author Ulf Harnhammar <http://advogato.org/person/metaur/> 28 * 29 * @package External 30 * @subpackage KSES 31 */ 32 33 /** 34 * Specifies the default allowable HTML tags. 35 * 36 * Using `CUSTOM_TAGS` is not recommended and should be considered deprecated. The 37 * {@see 'wp_kses_allowed_html'} filter is more powerful and supplies context. 38 * 39 * When using this constant, make sure to set all of these globals to arrays: 40 * 41 * - `$allowedposttags` 42 * - `$allowedtags` 43 * - `$allowedentitynames` 44 * - `$allowedxmlentitynames` 45 * 46 * @see wp_kses_allowed_html() 47 * @since 1.2.0 48 * 49 * @var array[]|false Array of default allowable HTML tags, or false to use the defaults. 50 */ 51 if ( ! defined( 'CUSTOM_TAGS' ) ) { 52 define( 'CUSTOM_TAGS', false ); 53 } 54 55 // Ensure that these variables are added to the global namespace 56 // (e.g. if using namespaces / autoload in the current PHP environment). 57 global $allowedposttags, $allowedtags, $allowedentitynames, $allowedxmlentitynames; 58 59 if ( ! CUSTOM_TAGS ) { 60 /** 61 * KSES global for default allowable HTML tags. 62 * 63 * Can be overridden with the `CUSTOM_TAGS` constant. 64 * 65 * @var array[] $allowedposttags Array of default allowable HTML tags. 66 * @since 2.0.0 67 */ 68 $allowedposttags = array( 69 'address' => array(), 70 'a' => array( 71 'href' => true, 72 'rel' => true, 73 'rev' => true, 74 'name' => true, 75 'target' => true, 76 'download' => array( 77 'valueless' => 'y', 78 ), 79 ), 80 'abbr' => array(), 81 'acronym' => array(), 82 'area' => array( 83 'alt' => true, 84 'coords' => true, 85 'href' => true, 86 'nohref' => true, 87 'shape' => true, 88 'target' => true, 89 ), 90 'article' => array( 91 'align' => true, 92 ), 93 'aside' => array( 94 'align' => true, 95 ), 96 'audio' => array( 97 'autoplay' => true, 98 'controls' => true, 99 'loop' => true, 100 'muted' => true, 101 'preload' => true, 102 'src' => true, 103 ), 104 'b' => array(), 105 'bdo' => array(), 106 'big' => array(), 107 'blockquote' => array( 108 'cite' => true, 109 ), 110 'br' => array(), 111 'button' => array( 112 'disabled' => true, 113 'name' => true, 114 'type' => true, 115 'value' => true, 116 'popovertarget' => true, 117 'popovertargetaction' => true, 118 'aria-haspopup' => true, 119 ), 120 'caption' => array( 121 'align' => true, 122 ), 123 'cite' => array(), 124 'code' => array(), 125 'col' => array( 126 'align' => true, 127 'char' => true, 128 'charoff' => true, 129 'span' => true, 130 'valign' => true, 131 'width' => true, 132 ), 133 'colgroup' => array( 134 'align' => true, 135 'char' => true, 136 'charoff' => true, 137 'span' => true, 138 'valign' => true, 139 'width' => true, 140 ), 141 'data' => array( 142 'value' => true, 143 ), 144 'del' => array( 145 'datetime' => true, 146 ), 147 'dd' => array(), 148 'dfn' => array(), 149 'details' => array( 150 'align' => true, 151 'open' => true, 152 'name' => true, 153 ), 154 'div' => array( 155 'align' => true, 156 'popover' => true, 157 ), 158 'dialog' => array( 159 'closedby' => true, 160 'open' => true, 161 'popover' => true, 162 ), 163 'dl' => array(), 164 'dt' => array(), 165 'em' => array(), 166 'fieldset' => array(), 167 'figure' => array( 168 'align' => true, 169 ), 170 'figcaption' => array( 171 'align' => true, 172 ), 173 'font' => array( 174 'color' => true, 175 'face' => true, 176 'size' => true, 177 ), 178 'footer' => array( 179 'align' => true, 180 ), 181 'h1' => array( 182 'align' => true, 183 ), 184 'h2' => array( 185 'align' => true, 186 ), 187 'h3' => array( 188 'align' => true, 189 ), 190 'h4' => array( 191 'align' => true, 192 ), 193 'h5' => array( 194 'align' => true, 195 ), 196 'h6' => array( 197 'align' => true, 198 ), 199 'header' => array( 200 'align' => true, 201 ), 202 'hgroup' => array( 203 'align' => true, 204 ), 205 'hr' => array( 206 'align' => true, 207 'noshade' => true, 208 'size' => true, 209 'width' => true, 210 ), 211 'i' => array(), 212 'img' => array( 213 'alt' => true, 214 'align' => true, 215 'border' => true, 216 'height' => true, 217 'hspace' => true, 218 'loading' => true, 219 'longdesc' => true, 220 'vspace' => true, 221 'src' => true, 222 'usemap' => true, 223 'width' => true, 224 ), 225 'ins' => array( 226 'datetime' => true, 227 'cite' => true, 228 ), 229 'kbd' => array(), 230 'label' => array( 231 'for' => true, 232 ), 233 'legend' => array( 234 'align' => true, 235 ), 236 'li' => array( 237 'align' => true, 238 'value' => true, 239 ), 240 'main' => array( 241 'align' => true, 242 ), 243 'map' => array( 244 'name' => true, 245 ), 246 'mark' => array(), 247 'menu' => array( 248 'type' => true, 249 ), 250 'meter' => array( 251 'high' => true, 252 'low' => true, 253 'max' => true, 254 'min' => true, 255 'optimum' => true, 256 'value' => true, 257 ), 258 'nav' => array( 259 'align' => true, 260 ), 261 'object' => array( 262 'data' => array( 263 'required' => true, 264 'value_callback' => '_wp_kses_allow_pdf_objects', 265 ), 266 'type' => array( 267 'required' => true, 268 'values' => array( 'application/pdf' ), 269 ), 270 ), 271 'p' => array( 272 'align' => true, 273 ), 274 'pre' => array( 275 'width' => true, 276 ), 277 'progress' => array( 278 'max' => true, 279 'value' => true, 280 ), 281 'q' => array( 282 'cite' => true, 283 ), 284 'rb' => array(), 285 'rp' => array(), 286 'rt' => array(), 287 'rtc' => array(), 288 'ruby' => array(), 289 's' => array(), 290 'samp' => array(), 291 'search' => array(), 292 'span' => array( 293 'align' => true, 294 ), 295 'section' => array( 296 'align' => true, 297 ), 298 'small' => array(), 299 'strike' => array(), 300 'strong' => array(), 301 'sub' => array(), 302 'summary' => array( 303 'align' => true, 304 ), 305 'sup' => array(), 306 'table' => array( 307 'align' => true, 308 'bgcolor' => true, 309 'border' => true, 310 'cellpadding' => true, 311 'cellspacing' => true, 312 'rules' => true, 313 'summary' => true, 314 'width' => true, 315 ), 316 'tbody' => array( 317 'align' => true, 318 'char' => true, 319 'charoff' => true, 320 'valign' => true, 321 ), 322 'td' => array( 323 'abbr' => true, 324 'align' => true, 325 'axis' => true, 326 'bgcolor' => true, 327 'char' => true, 328 'charoff' => true, 329 'colspan' => true, 330 'headers' => true, 331 'height' => true, 332 'nowrap' => true, 333 'rowspan' => true, 334 'scope' => true, 335 'valign' => true, 336 'width' => true, 337 ), 338 'textarea' => array( 339 'cols' => true, 340 'rows' => true, 341 'disabled' => true, 342 'name' => true, 343 'readonly' => true, 344 ), 345 'tfoot' => array( 346 'align' => true, 347 'char' => true, 348 'charoff' => true, 349 'valign' => true, 350 ), 351 'th' => array( 352 'abbr' => true, 353 'align' => true, 354 'axis' => true, 355 'bgcolor' => true, 356 'char' => true, 357 'charoff' => true, 358 'colspan' => true, 359 'headers' => true, 360 'height' => true, 361 'nowrap' => true, 362 'rowspan' => true, 363 'scope' => true, 364 'valign' => true, 365 'width' => true, 366 ), 367 'thead' => array( 368 'align' => true, 369 'char' => true, 370 'charoff' => true, 371 'valign' => true, 372 ), 373 'time' => array( 374 'datetime' => true, 375 ), 376 'title' => array(), 377 'tr' => array( 378 'align' => true, 379 'bgcolor' => true, 380 'char' => true, 381 'charoff' => true, 382 'valign' => true, 383 ), 384 'track' => array( 385 'default' => true, 386 'kind' => true, 387 'label' => true, 388 'src' => true, 389 'srclang' => true, 390 ), 391 'tt' => array(), 392 'u' => array(), 393 'ul' => array( 394 'type' => true, 395 'popover' => true, 396 'role' => true, 397 ), 398 'ol' => array( 399 'start' => true, 400 'type' => true, 401 'reversed' => true, 402 ), 403 'var' => array(), 404 'video' => array( 405 'autoplay' => true, 406 'controls' => true, 407 'height' => true, 408 'loop' => true, 409 'muted' => true, 410 'playsinline' => true, 411 'poster' => true, 412 'preload' => true, 413 'src' => true, 414 'width' => true, 415 ), 416 'wbr' => array(), 417 ); 418 419 // https://www.w3.org/TR/mathml-core/#global-attributes 420 // Except common attributes added by _wp_add_global_attributes. 421 $math_global_attributes = array( 422 'displaystyle' => true, 423 'scriptlevel' => true, 424 'mathbackground' => true, 425 'mathcolor' => true, 426 'mathsize' => true, 427 // Common attributes also defined by _wp_add_global_attributes. 428 // We do not want to add all those global attributes though. 429 'class' => true, 430 'data-*' => true, 431 'dir' => true, 432 'id' => true, 433 'style' => true, 434 ); 435 436 $math_overunder_attributes = array( 437 'accentunder' => true, 438 'accent' => true, 439 ); 440 441 $allowedposttags = array_merge( 442 $allowedposttags, 443 array( 444 // https://www.w3.org/TR/mathml-core/#the-top-level-math-element 445 'math' => array_merge( 446 $math_global_attributes, 447 array( 448 'display' => true, 449 ) 450 ), 451 452 // https://www.w3.org/TR/mathml-core/#token-elements 453 // https://www.w3.org/TR/mathml-core/#text-mtext 454 'mtext' => $math_global_attributes, 455 // https://www.w3.org/TR/mathml-core/#the-mi-element 456 'mi' => array_merge( 457 $math_global_attributes, 458 array( 459 'mathvariant' => true, 460 ) 461 ), 462 // https://www.w3.org/TR/mathml-core/#number-mn 463 'mn' => $math_global_attributes, 464 // https://www.w3.org/TR/mathml-core/#operator-fence-separator-or-accent-mo 465 'mo' => array_merge( 466 $math_global_attributes, 467 array( 468 'form' => true, 469 'fence' => true, 470 'separator' => true, 471 'lspace' => true, 472 'rspace' => true, 473 'stretchy' => true, 474 'symmetric' => true, 475 'maxsize' => true, 476 'minsize' => true, 477 'largeop' => true, 478 'movablelimits' => true, 479 ) 480 ), 481 // https://www.w3.org/TR/mathml-core/#space-mspace 482 'mspace' => array_merge( 483 $math_global_attributes, 484 array( 485 'width' => true, 486 'height' => true, 487 'depth' => true, 488 ) 489 ), 490 // https://www.w3.org/TR/mathml-core/#string-literal-ms 491 'ms' => $math_global_attributes, 492 493 // https://www.w3.org/TR/mathml-core/#general-layout-schemata 494 // https://www.w3.org/TR/mathml-core/#horizontally-group-sub-expressions-mrow 495 'mrow' => $math_global_attributes, 496 // https://www.w3.org/TR/mathml-core/#fractions-mfrac 497 'mfrac' => array_merge( 498 $math_global_attributes, 499 array( 500 'linethickness' => true, 501 ) 502 ), 503 // https://www.w3.org/TR/mathml-core/#radicals-msqrt-mroot 504 'msqrt' => $math_global_attributes, 505 'mroot' => $math_global_attributes, 506 // https://www.w3.org/TR/mathml-core/#style-change-mstyle 507 'mstyle' => $math_global_attributes, 508 // https://www.w3.org/TR/mathml-core/#error-message-merror 509 'merror' => $math_global_attributes, 510 // https://www.w3.org/TR/mathml-core/#adjust-space-around-content-mpadded 511 'mpadded' => array_merge( 512 $math_global_attributes, 513 array( 514 'width' => true, 515 'height' => true, 516 'depth' => true, 517 'lspace' => true, 518 'voffset' => true, 519 ) 520 ), 521 // https://www.w3.org/TR/mathml-core/#making-sub-expressions-invisible-mphantom 522 'mphantom' => $math_global_attributes, 523 524 // https://www.w3.org/TR/mathml-core/#script-and-limit-schemata 525 // https://www.w3.org/TR/mathml-core/#subscripts-and-superscripts-msub-msup-msubsup 526 'msub' => $math_global_attributes, 527 'msup' => $math_global_attributes, 528 'msubsup' => $math_global_attributes, 529 // https://www.w3.org/TR/mathml-core/#underscripts-and-overscripts-munder-mover-munderover 530 'munder' => array_merge( $math_global_attributes, $math_overunder_attributes ), 531 'mover' => array_merge( $math_global_attributes, $math_overunder_attributes ), 532 'munderover' => array_merge( $math_global_attributes, $math_overunder_attributes ), 533 // https://www.w3.org/TR/mathml-core/#prescripts-and-tensor-indices-mmultiscripts 534 'mmultiscripts' => $math_global_attributes, 535 'mprescripts' => $math_global_attributes, 536 537 // https://www.w3.org/TR/mathml-core/#tabular-math 538 // https://www.w3.org/TR/mathml-core/#table-or-matrix-mtable 539 'mtable' => array_merge( 540 $math_global_attributes, 541 array( 542 // Non-standard, used by temml/katex. 543 // https://developer.mozilla.org/en-US/docs/Web/MathML/Reference/Element/mtable 544 'columnalign' => true, 545 'rowspacing' => true, 546 'columnspacing' => true, 547 'align' => true, 548 'rowalign' => true, 549 'columnlines' => true, 550 'rowlines' => true, 551 'frame' => true, 552 'framespacing' => true, 553 'width' => true, 554 ) 555 ), 556 // https://www.w3.org/TR/mathml-core/#row-in-table-or-matrix-mtr 557 'mtr' => array_merge( 558 $math_global_attributes, 559 array( 560 // Non-standard, used by temml/katex. 561 // https://developer.mozilla.org/en-US/docs/Web/MathML/Reference/Element/mtr 562 'columnalign' => true, 563 'rowalign' => true, 564 ) 565 ), 566 // https://www.w3.org/TR/mathml-core/#entry-in-table-or-matrix-mtd 567 'mtd' => array_merge( 568 $math_global_attributes, 569 array( 570 'columnspan' => true, 571 'rowspan' => true, 572 // Non-standard, used by temml/katex. 573 // https://developer.mozilla.org/en-US/docs/Web/MathML/Reference/Element/mtd 574 'columnalign' => true, 575 'rowalign' => true, 576 ) 577 ), 578 579 // https://www.w3.org/TR/mathml-core/#semantics-and-presentation 580 'semantics' => $math_global_attributes, 581 'annotation' => array_merge( 582 $math_global_attributes, 583 array( 584 'encoding' => true, 585 ) 586 ), 587 588 // Non-standard but widely supported, used by temml/katex. 589 'menclose' => array_merge( 590 $math_global_attributes, 591 array( 592 'notation' => true, 593 ) 594 ), 595 ) 596 ); 597 598 /** 599 * @var array[] $allowedtags Array of KSES allowed HTML elements. 600 * @since 1.0.0 601 */ 602 $allowedtags = array( 603 'a' => array( 604 'href' => true, 605 'title' => true, 606 ), 607 'abbr' => array( 608 'title' => true, 609 ), 610 'acronym' => array( 611 'title' => true, 612 ), 613 'b' => array(), 614 'blockquote' => array( 615 'cite' => true, 616 ), 617 'cite' => array(), 618 'code' => array(), 619 'del' => array( 620 'datetime' => true, 621 ), 622 'em' => array(), 623 'i' => array(), 624 'q' => array( 625 'cite' => true, 626 ), 627 's' => array(), 628 'strike' => array(), 629 'strong' => array(), 630 ); 631 632 /** 633 * @var string[] $allowedentitynames Array of KSES allowed HTML entity names. 634 * @since 1.0.0 635 */ 636 $allowedentitynames = array( 637 'nbsp', 638 'iexcl', 639 'cent', 640 'pound', 641 'curren', 642 'yen', 643 'brvbar', 644 'sect', 645 'uml', 646 'copy', 647 'ordf', 648 'laquo', 649 'not', 650 'shy', 651 'reg', 652 'macr', 653 'deg', 654 'plusmn', 655 'acute', 656 'micro', 657 'para', 658 'middot', 659 'cedil', 660 'ordm', 661 'raquo', 662 'iquest', 663 'Agrave', 664 'Aacute', 665 'Acirc', 666 'Atilde', 667 'Auml', 668 'Aring', 669 'AElig', 670 'Ccedil', 671 'Egrave', 672 'Eacute', 673 'Ecirc', 674 'Euml', 675 'Igrave', 676 'Iacute', 677 'Icirc', 678 'Iuml', 679 'ETH', 680 'Ntilde', 681 'Ograve', 682 'Oacute', 683 'Ocirc', 684 'Otilde', 685 'Ouml', 686 'times', 687 'Oslash', 688 'Ugrave', 689 'Uacute', 690 'Ucirc', 691 'Uuml', 692 'Yacute', 693 'THORN', 694 'szlig', 695 'agrave', 696 'aacute', 697 'acirc', 698 'atilde', 699 'auml', 700 'aring', 701 'aelig', 702 'ccedil', 703 'egrave', 704 'eacute', 705 'ecirc', 706 'euml', 707 'igrave', 708 'iacute', 709 'icirc', 710 'iuml', 711 'eth', 712 'ntilde', 713 'ograve', 714 'oacute', 715 'ocirc', 716 'otilde', 717 'ouml', 718 'divide', 719 'oslash', 720 'ugrave', 721 'uacute', 722 'ucirc', 723 'uuml', 724 'yacute', 725 'thorn', 726 'yuml', 727 'quot', 728 'amp', 729 'lt', 730 'gt', 731 'apos', 732 'OElig', 733 'oelig', 734 'Scaron', 735 'scaron', 736 'Yuml', 737 'circ', 738 'tilde', 739 'ensp', 740 'emsp', 741 'thinsp', 742 'zwnj', 743 'zwj', 744 'lrm', 745 'rlm', 746 'ndash', 747 'mdash', 748 'lsquo', 749 'rsquo', 750 'sbquo', 751 'ldquo', 752 'rdquo', 753 'bdquo', 754 'dagger', 755 'Dagger', 756 'permil', 757 'lsaquo', 758 'rsaquo', 759 'euro', 760 'fnof', 761 'Alpha', 762 'Beta', 763 'Gamma', 764 'Delta', 765 'Epsilon', 766 'Zeta', 767 'Eta', 768 'Theta', 769 'Iota', 770 'Kappa', 771 'Lambda', 772 'Mu', 773 'Nu', 774 'Xi', 775 'Omicron', 776 'Pi', 777 'Rho', 778 'Sigma', 779 'Tau', 780 'Upsilon', 781 'Phi', 782 'Chi', 783 'Psi', 784 'Omega', 785 'alpha', 786 'beta', 787 'gamma', 788 'delta', 789 'epsilon', 790 'zeta', 791 'eta', 792 'theta', 793 'iota', 794 'kappa', 795 'lambda', 796 'mu', 797 'nu', 798 'xi', 799 'omicron', 800 'pi', 801 'rho', 802 'sigmaf', 803 'sigma', 804 'tau', 805 'upsilon', 806 'phi', 807 'chi', 808 'psi', 809 'omega', 810 'thetasym', 811 'upsih', 812 'piv', 813 'bull', 814 'hellip', 815 'prime', 816 'Prime', 817 'oline', 818 'frasl', 819 'weierp', 820 'image', 821 'real', 822 'trade', 823 'alefsym', 824 'larr', 825 'uarr', 826 'rarr', 827 'darr', 828 'harr', 829 'crarr', 830 'lArr', 831 'uArr', 832 'rArr', 833 'dArr', 834 'hArr', 835 'forall', 836 'part', 837 'exist', 838 'empty', 839 'nabla', 840 'isin', 841 'notin', 842 'ni', 843 'prod', 844 'sum', 845 'minus', 846 'lowast', 847 'radic', 848 'prop', 849 'infin', 850 'ang', 851 'and', 852 'or', 853 'cap', 854 'cup', 855 'int', 856 'sim', 857 'cong', 858 'asymp', 859 'ne', 860 'equiv', 861 'le', 862 'ge', 863 'sub', 864 'sup', 865 'nsub', 866 'sube', 867 'supe', 868 'oplus', 869 'otimes', 870 'perp', 871 'sdot', 872 'lceil', 873 'rceil', 874 'lfloor', 875 'rfloor', 876 'lang', 877 'rang', 878 'loz', 879 'spades', 880 'clubs', 881 'hearts', 882 'diams', 883 'sup1', 884 'sup2', 885 'sup3', 886 'frac14', 887 'frac12', 888 'frac34', 889 'there4', 890 ); 891 892 /** 893 * @var string[] $allowedxmlentitynames Array of KSES allowed XML entity names. 894 * @since 5.5.0 895 */ 896 $allowedxmlentitynames = array( 897 'amp', 898 'lt', 899 'gt', 900 'apos', 901 'quot', 902 ); 903 904 $allowedposttags = array_map( '_wp_add_global_attributes', $allowedposttags ); 905 } else { 906 $required_kses_globals = array( 907 'allowedposttags', 908 'allowedtags', 909 'allowedentitynames', 910 'allowedxmlentitynames', 911 ); 912 $missing_kses_globals = array(); 913 914 foreach ( $required_kses_globals as $global_name ) { 915 if ( ! isset( $GLOBALS[ $global_name ] ) || ! is_array( $GLOBALS[ $global_name ] ) ) { 916 $missing_kses_globals[] = '<code>$' . $global_name . '</code>'; 917 } 918 } 919 920 if ( $missing_kses_globals ) { 921 _doing_it_wrong( 922 'wp_kses_allowed_html', 923 sprintf( 924 /* translators: 1: CUSTOM_TAGS, 2: Global variable names. */ 925 __( 'When using the %1$s constant, make sure to set these globals to an array: %2$s.' ), 926 '<code>CUSTOM_TAGS</code>', 927 implode( ', ', $missing_kses_globals ) 928 ), 929 '6.2.0' 930 ); 931 } 932 933 $allowedtags = wp_kses_array_lc( $allowedtags ); 934 $allowedposttags = wp_kses_array_lc( $allowedposttags ); 935 } 936 937 /** 938 * Filters text content and strips out disallowed HTML. 939 * 940 * This function makes sure that only the allowed HTML element names, attribute 941 * names, attribute values, and HTML entities will occur in the given text string. 942 * 943 * This function expects unslashed data. 944 * 945 * @see wp_kses_post() for specifically filtering post content and fields. 946 * @see wp_allowed_protocols() for the default allowed protocols in link URLs. 947 * 948 * @since 1.0.0 949 * 950 * @param string $content Text content to filter. 951 * @param array[]|string $allowed_html An array of allowed HTML elements and attributes, 952 * or a context name such as 'post'. See wp_kses_allowed_html() 953 * for the list of accepted context names. 954 * @param string[] $allowed_protocols Optional. Array of allowed URL protocols. 955 * Defaults to the result of wp_allowed_protocols(). 956 * @return string Filtered content containing only the allowed HTML. 957 */ 958 function wp_kses( $content, $allowed_html, $allowed_protocols = array() ) { 959 if ( empty( $allowed_protocols ) ) { 960 $allowed_protocols = wp_allowed_protocols(); 961 } 962 963 $content = wp_kses_no_null( $content, array( 'slash_zero' => 'keep' ) ); 964 $content = wp_kses_normalize_entities( $content ); 965 $content = wp_kses_hook( $content, $allowed_html, $allowed_protocols ); 966 967 return wp_kses_split( $content, $allowed_html, $allowed_protocols ); 968 } 969 970 /** 971 * Filters one HTML attribute and ensures its value is allowed. 972 * 973 * This function can escape data in some situations where `wp_kses()` must strip the whole attribute. 974 * 975 * @since 4.2.3 976 * 977 * @param string $attr The 'whole' attribute, including name and value. 978 * @param string $element The HTML element name to which the attribute belongs. 979 * @return string Filtered attribute. 980 */ 981 function wp_kses_one_attr( $attr, $element ) { 982 $uris = wp_kses_uri_attributes(); 983 $allowed_html = wp_kses_allowed_html( 'post' ); 984 $allowed_protocols = wp_allowed_protocols(); 985 $attr = wp_kses_no_null( $attr, array( 'slash_zero' => 'keep' ) ); 986 987 // Preserve leading and trailing whitespace. 988 $matches = array(); 989 preg_match( '/^\s*/', $attr, $matches ); 990 $lead = $matches[0]; 991 preg_match( '/\s*$/', $attr, $matches ); 992 $trail = $matches[0]; 993 if ( empty( $trail ) ) { 994 $attr = substr( $attr, strlen( $lead ) ); 995 } else { 996 $attr = substr( $attr, strlen( $lead ), -strlen( $trail ) ); 997 } 998 999 // Parse attribute name and value from input. 1000 $split = preg_split( '/\s*=\s*/', $attr, 2 ); 1001 $name = $split[0]; 1002 if ( count( $split ) === 2 ) { 1003 $value = $split[1]; 1004 1005 /* 1006 * Remove quotes surrounding $value. 1007 * Also guarantee correct quoting in $attr for this one attribute. 1008 */ 1009 if ( '' === $value ) { 1010 $quote = ''; 1011 } else { 1012 $quote = $value[0]; 1013 } 1014 if ( '"' === $quote || "'" === $quote ) { 1015 if ( ! str_ends_with( $value, $quote ) ) { 1016 return ''; 1017 } 1018 $value = substr( $value, 1, -1 ); 1019 } else { 1020 $quote = '"'; 1021 } 1022 1023 // Sanitize quotes, angle braces, and entities. 1024 $value = esc_attr( $value ); 1025 1026 // Sanitize URI values. 1027 if ( in_array( strtolower( $name ), $uris, true ) ) { 1028 $value = wp_kses_bad_protocol( $value, $allowed_protocols ); 1029 } 1030 1031 $attr = "$name=$quote$value$quote"; 1032 $vless = 'n'; 1033 } else { 1034 $value = ''; 1035 $vless = 'y'; 1036 } 1037 1038 // Sanitize attribute by name. 1039 wp_kses_attr_check( $name, $value, $attr, $vless, $element, $allowed_html ); 1040 1041 // Restore whitespace. 1042 return $lead . $attr . $trail; 1043 } 1044 1045 /** 1046 * Returns an array of allowed HTML tags and attributes for a given context. 1047 * 1048 * @since 3.5.0 1049 * @since 5.0.1 `form` removed as allowable HTML tag. 1050 * 1051 * @global array $allowedposttags 1052 * @global array $allowedtags 1053 * @global array $allowedentitynames 1054 * 1055 * @param string|array $context The context for which to retrieve tags. Allowed values are 'post', 1056 * 'strip', 'data', 'entities', or the name of a field filter such as 1057 * 'pre_user_description', or an array of allowed HTML elements and attributes. 1058 * @return array Array of allowed HTML tags and their allowed attributes. 1059 */ 1060 function wp_kses_allowed_html( $context = '' ) { 1061 global $allowedposttags, $allowedtags, $allowedentitynames; 1062 1063 if ( is_array( $context ) ) { 1064 // When `$context` is an array it's actually an array of allowed HTML elements and attributes. 1065 $html = $context; 1066 $context = 'explicit'; 1067 1068 /** 1069 * Filters the HTML tags that are allowed for a given context. 1070 * 1071 * HTML tags and attribute names are case-insensitive in HTML but must be 1072 * added to the KSES allow list in lowercase. An item added to the allow list 1073 * in upper or mixed case will not recognized as permitted by KSES. 1074 * 1075 * @since 3.5.0 1076 * 1077 * @param array[] $html Allowed HTML tags. 1078 * @param string $context Context name. 1079 */ 1080 return apply_filters( 'wp_kses_allowed_html', $html, $context ); 1081 } 1082 1083 switch ( $context ) { 1084 case 'post': 1085 /** This filter is documented in wp-includes/kses.php */ 1086 $tags = apply_filters( 'wp_kses_allowed_html', $allowedposttags, $context ); 1087 1088 // 5.0.1 removed the `<form>` tag, allow it if a filter is allowing it's sub-elements `<input>` or `<select>`. 1089 if ( ! CUSTOM_TAGS && ! isset( $tags['form'] ) && ( isset( $tags['input'] ) || isset( $tags['select'] ) ) ) { 1090 $tags = $allowedposttags; 1091 1092 $tags['form'] = array( 1093 'action' => true, 1094 'accept' => true, 1095 'accept-charset' => true, 1096 'enctype' => true, 1097 'method' => true, 1098 'name' => true, 1099 'target' => true, 1100 ); 1101 1102 /** This filter is documented in wp-includes/kses.php */ 1103 $tags = apply_filters( 'wp_kses_allowed_html', $tags, $context ); 1104 } 1105 1106 return $tags; 1107 1108 case 'user_description': 1109 case 'pre_term_description': 1110 case 'pre_user_description': 1111 $tags = $allowedtags; 1112 $tags['a']['rel'] = true; 1113 $tags['a']['target'] = true; 1114 /** This filter is documented in wp-includes/kses.php */ 1115 return apply_filters( 'wp_kses_allowed_html', $tags, $context ); 1116 1117 case 'strip': 1118 /** This filter is documented in wp-includes/kses.php */ 1119 return apply_filters( 'wp_kses_allowed_html', array(), $context ); 1120 1121 case 'entities': 1122 /** This filter is documented in wp-includes/kses.php */ 1123 return apply_filters( 'wp_kses_allowed_html', $allowedentitynames, $context ); 1124 1125 case 'data': 1126 default: 1127 /** This filter is documented in wp-includes/kses.php */ 1128 return apply_filters( 'wp_kses_allowed_html', $allowedtags, $context ); 1129 } 1130 } 1131 1132 /** 1133 * You add any KSES hooks here. 1134 * 1135 * There is currently only one KSES WordPress hook, {@see 'pre_kses'}, and it is called here. 1136 * All parameters are passed to the hooks and expected to receive a string. 1137 * 1138 * @since 1.0.0 1139 * 1140 * @param string $content Content to filter through KSES. 1141 * @param array[]|string $allowed_html An array of allowed HTML elements and attributes, 1142 * or a context name such as 'post'. See wp_kses_allowed_html() 1143 * for the list of accepted context names. 1144 * @param string[] $allowed_protocols Array of allowed URL protocols. 1145 * @return string Filtered content through {@see 'pre_kses'} hook. 1146 */ 1147 function wp_kses_hook( $content, $allowed_html, $allowed_protocols ) { 1148 /** 1149 * Filters content to be run through KSES. 1150 * 1151 * @since 2.3.0 1152 * 1153 * @param string $content Content to filter through KSES. 1154 * @param array[]|string $allowed_html An array of allowed HTML elements and attributes, 1155 * or a context name such as 'post'. See wp_kses_allowed_html() 1156 * for the list of accepted context names. 1157 * @param string[] $allowed_protocols Array of allowed URL protocols. 1158 */ 1159 return apply_filters( 'pre_kses', $content, $allowed_html, $allowed_protocols ); 1160 } 1161 1162 /** 1163 * Returns the version number of KSES. 1164 * 1165 * @since 1.0.0 1166 * 1167 * @return string KSES version number. 1168 */ 1169 function wp_kses_version() { 1170 return '0.2.2'; 1171 } 1172 1173 /** 1174 * Searches for HTML tags, no matter how malformed. 1175 * 1176 * It also matches stray `>` characters. 1177 * 1178 * @since 1.0.0 1179 * @since 6.6.0 Recognize additional forms of invalid HTML which convert into comments. 1180 * 1181 * @global array[]|string $pass_allowed_html An array of allowed HTML elements and attributes, 1182 * or a context name such as 'post'. 1183 * @global string[] $pass_allowed_protocols Array of allowed URL protocols. 1184 * 1185 * @param string $content Content to filter. 1186 * @param array[]|string $allowed_html An array of allowed HTML elements and attributes, 1187 * or a context name such as 'post'. See wp_kses_allowed_html() 1188 * for the list of accepted context names. 1189 * @param string[] $allowed_protocols Array of allowed URL protocols. 1190 * @return string Content with fixed HTML tags 1191 */ 1192 function wp_kses_split( $content, $allowed_html, $allowed_protocols ) { 1193 global $pass_allowed_html, $pass_allowed_protocols; 1194 1195 $pass_allowed_html = $allowed_html; 1196 $pass_allowed_protocols = $allowed_protocols; 1197 1198 $token_pattern = <<<REGEX 1199 ~ 1200 ( # Detect comments of various flavors before attempting to find tags. 1201 (<!--.*?(-->|$)) # - Normative HTML comments. 1202 | 1203 </[^a-zA-Z][^>]*> # - Closing tags with invalid tag names. 1204 | 1205 <![^>]*> # - Invalid markup declaration nodes. Not all invalid nodes 1206 # are matched so as to avoid breaking legacy behaviors. 1207 ) 1208 | 1209 (<[^>]*(>|$)|>) # Tag-like spans of text. 1210 ~x 1211 REGEX; 1212 return preg_replace_callback( $token_pattern, '_wp_kses_split_callback', $content ); 1213 } 1214 1215 /** 1216 * Returns an array of HTML attribute names whose value contains a URL. 1217 * 1218 * This function returns a list of all HTML attributes that must contain 1219 * a URL according to the HTML specification. 1220 * 1221 * This list includes URI attributes both allowed and disallowed by KSES. 1222 * 1223 * @link https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes 1224 * 1225 * @since 5.0.1 1226 * 1227 * @return string[] HTML attribute names whose value contains a URL. 1228 */ 1229 function wp_kses_uri_attributes() { 1230 $uri_attributes = array( 1231 'action', 1232 'archive', 1233 'background', 1234 'cite', 1235 'classid', 1236 'codebase', 1237 'data', 1238 'formaction', 1239 'href', 1240 'icon', 1241 'longdesc', 1242 'manifest', 1243 'poster', 1244 'profile', 1245 'src', 1246 'usemap', 1247 'xmlns', 1248 ); 1249 1250 /** 1251 * Filters the list of attributes that are required to contain a URL. 1252 * 1253 * Use this filter to add any `data-` attributes that are required to be 1254 * validated as a URL. 1255 * 1256 * @since 5.0.1 1257 * 1258 * @param string[] $uri_attributes HTML attribute names whose value contains a URL. 1259 */ 1260 $uri_attributes = apply_filters( 'wp_kses_uri_attributes', $uri_attributes ); 1261 1262 return $uri_attributes; 1263 } 1264 1265 /** 1266 * Callback for `wp_kses_split()`. 1267 * 1268 * @since 3.1.0 1269 * @access private 1270 * @ignore 1271 * 1272 * @global array[]|string $pass_allowed_html An array of allowed HTML elements and attributes, 1273 * or a context name such as 'post'. 1274 * @global string[] $pass_allowed_protocols Array of allowed URL protocols. 1275 * 1276 * @param array $matches preg_replace regexp matches 1277 * @return string 1278 */ 1279 function _wp_kses_split_callback( $matches ) { 1280 global $pass_allowed_html, $pass_allowed_protocols; 1281 1282 return wp_kses_split2( $matches[0], $pass_allowed_html, $pass_allowed_protocols ); 1283 } 1284 1285 /** 1286 * Callback for `wp_kses_split()` for fixing malformed HTML tags. 1287 * 1288 * This function does a lot of work. It rejects some very malformed things like 1289 * `<:::>`. It returns an empty string, if the element isn't allowed (look ma, no 1290 * `strip_tags()`!). Otherwise it splits the tag into an element and an attribute 1291 * list. 1292 * 1293 * After the tag is split into an element and an attribute list, it is run 1294 * through another filter which will remove illegal attributes and once that is 1295 * completed, will be returned. 1296 * 1297 * @access private 1298 * @ignore 1299 * @since 1.0.0 1300 * @since 6.6.0 Recognize additional forms of invalid HTML which convert into comments. 1301 * 1302 * @param string $content Content to filter. 1303 * @param array[]|string $allowed_html An array of allowed HTML elements and attributes, 1304 * or a context name such as 'post'. See wp_kses_allowed_html() 1305 * for the list of accepted context names. 1306 * @param string[] $allowed_protocols Array of allowed URL protocols. 1307 * 1308 * @return string Fixed HTML element 1309 */ 1310 function wp_kses_split2( $content, $allowed_html, $allowed_protocols ) { 1311 $content = wp_kses_stripslashes( $content ); 1312 1313 /* 1314 * The regex pattern used to split HTML into chunks attempts 1315 * to split on HTML token boundaries. This function should 1316 * thus receive chunks that _either_ start with meaningful 1317 * syntax tokens, like a tag `<div>` or a comment `<!-- ... -->`. 1318 * 1319 * If the first character of the `$content` chunk _isn't_ one 1320 * of these syntax elements, which always starts with `<`, then 1321 * the match had to be for the final alternation of `>`. In such 1322 * case, it's probably standing on its own and could be encoded 1323 * with a character reference to remove ambiguity. 1324 * 1325 * In other words, if this chunk isn't from a match of a syntax 1326 * token, it's just a plaintext greater-than (`>`) sign. 1327 */ 1328 if ( ! str_starts_with( $content, '<' ) ) { 1329 return '>'; 1330 } 1331 1332 /* 1333 * When certain invalid syntax constructs appear, the HTML parser 1334 * shifts into what's called the "bogus comment state." This is a 1335 * plaintext state that consumes everything until the nearest `>` 1336 * and then transforms the entire span into an HTML comment. 1337 * 1338 * Preserve these comments and do not treat them like tags. 1339 * 1340 * @see https://html.spec.whatwg.org/#bogus-comment-state 1341 */ 1342 if ( 1 === preg_match( '~^(?:</[^a-zA-Z][^>]*>|<![a-z][^>]*>)$~', $content ) ) { 1343 /** 1344 * Since the pattern matches `</…>` and also `<!…>`, this will 1345 * preserve the type of the cleaned-up token in the output. 1346 */ 1347 $opener = $content[1]; 1348 $content = substr( $content, 2, -1 ); 1349 1350 do { 1351 $prev = $content; 1352 $content = wp_kses( $content, $allowed_html, $allowed_protocols ); 1353 } while ( $prev !== $content ); 1354 1355 // Recombine the modified inner content with the original token structure. 1356 return "<{$opener}{$content}>"; 1357 } 1358 1359 /* 1360 * Normative HTML comments should be handled separately as their 1361 * parsing rules differ from those for tags and text nodes. 1362 */ 1363 if ( str_starts_with( $content, '<!--' ) ) { 1364 $content = str_replace( array( '<!--', '-->' ), '', $content ); 1365 1366 while ( ( $newstring = wp_kses( $content, $allowed_html, $allowed_protocols ) ) !== $content ) { 1367 $content = $newstring; 1368 } 1369 1370 if ( '' === $content ) { 1371 return ''; 1372 } 1373 1374 // Prevent multiple dashes in comments. 1375 $content = preg_replace( '/--+/', '-', $content ); 1376 // Prevent three dashes closing a comment. 1377 $content = preg_replace( '/-$/', '', $content ); 1378 1379 return "<!--{$content}-->"; 1380 } 1381 1382 // It's seriously malformed. 1383 if ( ! preg_match( '%^<\s*(/\s*)?([a-zA-Z0-9-]+)([^>]*)>?$%', $content, $matches ) ) { 1384 return ''; 1385 } 1386 1387 $slash = trim( $matches[1] ); 1388 $elem = $matches[2]; 1389 $attrlist = $matches[3]; 1390 1391 if ( ! is_array( $allowed_html ) ) { 1392 $allowed_html = wp_kses_allowed_html( $allowed_html ); 1393 } 1394 1395 // They are using a not allowed HTML element. 1396 if ( ! isset( $allowed_html[ strtolower( $elem ) ] ) ) { 1397 return ''; 1398 } 1399 1400 // No attributes are allowed for closing elements. 1401 if ( '' !== $slash ) { 1402 return "</$elem>"; 1403 } 1404 1405 return wp_kses_attr( $elem, $attrlist, $allowed_html, $allowed_protocols ); 1406 } 1407 1408 /** 1409 * Removes all attributes, if none are allowed for this element. 1410 * 1411 * If some are allowed it calls `wp_kses_hair()` to split them further, and then 1412 * it builds up new HTML code from the data that `wp_kses_hair()` returns. It also 1413 * removes `<` and `>` characters, if there are any left. One more thing it does 1414 * is to check if the tag has a closing XHTML slash, and if it does, it puts one 1415 * in the returned code as well. 1416 * 1417 * An array of allowed values can be defined for attributes. If the attribute value 1418 * doesn't fall into the list, the attribute will be removed from the tag. 1419 * 1420 * Attributes can be marked as required. If a required attribute is not present, 1421 * KSES will remove all attributes from the tag. As KSES doesn't match opening and 1422 * closing tags, it's not possible to safely remove the tag itself, the safest 1423 * fallback is to strip all attributes from the tag, instead. 1424 * 1425 * @since 1.0.0 1426 * @since 5.9.0 Added support for an array of allowed values for attributes. 1427 * Added support for required attributes. 1428 * 1429 * @param string $element HTML element/tag. 1430 * @param string $attr HTML attributes from HTML element to closing HTML element tag. 1431 * @param array[]|string $allowed_html An array of allowed HTML elements and attributes, 1432 * or a context name such as 'post'. See wp_kses_allowed_html() 1433 * for the list of accepted context names. 1434 * @param string[] $allowed_protocols Array of allowed URL protocols. 1435 * @return string Sanitized HTML element. 1436 */ 1437 function wp_kses_attr( $element, $attr, $allowed_html, $allowed_protocols ) { 1438 if ( ! is_array( $allowed_html ) ) { 1439 $allowed_html = wp_kses_allowed_html( $allowed_html ); 1440 } 1441 1442 // Is there a closing XHTML slash at the end of the attributes? 1443 $xhtml_slash = ''; 1444 if ( preg_match( '%\s*/\s*$%', $attr ) ) { 1445 $xhtml_slash = ' /'; 1446 } 1447 1448 // Are any attributes allowed at all for this element? 1449 $element_low = strtolower( $element ); 1450 if ( empty( $allowed_html[ $element_low ] ) || true === $allowed_html[ $element_low ] ) { 1451 return "<$element$xhtml_slash>"; 1452 } 1453 1454 // Split it. 1455 $attrarr = wp_kses_hair( $attr, $allowed_protocols ); 1456 1457 // Check if there are attributes that are required. 1458 $required_attrs = array_filter( 1459 $allowed_html[ $element_low ], 1460 static function ( $required_attr_limits ) { 1461 return isset( $required_attr_limits['required'] ) && true === $required_attr_limits['required']; 1462 } 1463 ); 1464 1465 /* 1466 * If a required attribute check fails, we can return nothing for a self-closing tag, 1467 * but for a non-self-closing tag the best option is to return the element with attributes, 1468 * as KSES doesn't handle matching the relevant closing tag. 1469 */ 1470 $stripped_tag = ''; 1471 if ( empty( $xhtml_slash ) ) { 1472 $stripped_tag = "<$element>"; 1473 } 1474 1475 // Go through $attrarr, and save the allowed attributes for this element in $attr2. 1476 $attr2 = ''; 1477 foreach ( $attrarr as $arreach ) { 1478 // Check if this attribute is required. 1479 $required = isset( $required_attrs[ strtolower( $arreach['name'] ) ] ); 1480 1481 if ( wp_kses_attr_check( $arreach['name'], $arreach['value'], $arreach['whole'], $arreach['vless'], $element, $allowed_html ) ) { 1482 $attr2 .= ' ' . $arreach['whole']; 1483 1484 // If this was a required attribute, we can mark it as found. 1485 if ( $required ) { 1486 unset( $required_attrs[ strtolower( $arreach['name'] ) ] ); 1487 } 1488 } elseif ( $required ) { 1489 // This attribute was required, but didn't pass the check. The entire tag is not allowed. 1490 return $stripped_tag; 1491 } 1492 } 1493 1494 // If some required attributes weren't set, the entire tag is not allowed. 1495 if ( ! empty( $required_attrs ) ) { 1496 return $stripped_tag; 1497 } 1498 1499 // Remove any "<" or ">" characters. 1500 $attr2 = preg_replace( '/[<>]/', '', $attr2 ); 1501 1502 return "<$element$attr2$xhtml_slash>"; 1503 } 1504 1505 /** 1506 * Determines whether an attribute is allowed. 1507 * 1508 * @since 4.2.3 1509 * @since 5.0.0 Added support for `data-*` wildcard attributes. 1510 * 1511 * @param string $name The attribute name. Passed by reference. Returns empty string when not allowed. 1512 * @param string $value The attribute value. Passed by reference. Returns a filtered value. 1513 * @param string $whole The `name=value` input. Passed by reference. Returns filtered input. 1514 * @param string $vless Whether the attribute is valueless. Use 'y' or 'n'. 1515 * @param string $element The name of the element to which this attribute belongs. 1516 * @param array $allowed_html The full list of allowed elements and attributes. 1517 * @return bool Whether or not the attribute is allowed. 1518 */ 1519 function wp_kses_attr_check( &$name, &$value, &$whole, $vless, $element, $allowed_html ) { 1520 $name_low = strtolower( $name ); 1521 $element_low = strtolower( $element ); 1522 1523 if ( ! isset( $allowed_html[ $element_low ] ) ) { 1524 $name = ''; 1525 $value = ''; 1526 $whole = ''; 1527 return false; 1528 } 1529 1530 $allowed_attr = $allowed_html[ $element_low ]; 1531 1532 if ( ! isset( $allowed_attr[ $name_low ] ) || '' === $allowed_attr[ $name_low ] ) { 1533 /* 1534 * Allow `data-*` attributes. 1535 * 1536 * When specifying `$allowed_html`, the attribute name should be set as 1537 * `data-*` (not to be mixed with the HTML 4.0 `data` attribute, see 1538 * https://www.w3.org/TR/html40/struct/objects.html#adef-data). 1539 * 1540 * Note: the attribute name should only contain `A-Za-z0-9_-` chars. 1541 */ 1542 if ( str_starts_with( $name_low, 'data-' ) && ! empty( $allowed_attr['data-*'] ) 1543 && preg_match( '/^data-[a-z0-9_-]+$/', $name_low, $match ) 1544 ) { 1545 /* 1546 * Add the whole attribute name to the allowed attributes and set any restrictions 1547 * for the `data-*` attribute values for the current element. 1548 */ 1549 $allowed_attr[ $match[0] ] = $allowed_attr['data-*']; 1550 } else { 1551 $name = ''; 1552 $value = ''; 1553 $whole = ''; 1554 return false; 1555 } 1556 } 1557 1558 if ( 'style' === $name_low ) { 1559 $new_value = safecss_filter_attr( $value ); 1560 1561 if ( empty( $new_value ) ) { 1562 $name = ''; 1563 $value = ''; 1564 $whole = ''; 1565 return false; 1566 } 1567 1568 $whole = str_replace( $value, $new_value, $whole ); 1569 $value = $new_value; 1570 } 1571 1572 if ( is_array( $allowed_attr[ $name_low ] ) ) { 1573 // There are some checks. 1574 foreach ( $allowed_attr[ $name_low ] as $currkey => $currval ) { 1575 if ( ! wp_kses_check_attr_val( $value, $vless, $currkey, $currval ) ) { 1576 $name = ''; 1577 $value = ''; 1578 $whole = ''; 1579 return false; 1580 } 1581 } 1582 } 1583 1584 return true; 1585 } 1586 1587 /** 1588 * Given a string of HTML attributes and values, parse into a structured attribute list. 1589 * 1590 * This function performs a number of transformations while parsing attribute strings: 1591 * - It normalizes attribute values and surrounds them with double quotes. 1592 * - It normalizes HTML character references inside attribute values. 1593 * - It removes “bad” URL protocols from attribute values. 1594 * 1595 * Otherwise this reads the attributes as if they were part of an HTML tag. It performs 1596 * these transformations to lower the risk of mis-parsing down the line and to perform 1597 * URL sanitization in line with the rest of the `kses` subsystem. Importantly, it does 1598 * not decode the attribute values, meaning that special HTML syntax characters will 1599 * be left with character references in the `value` property. 1600 * 1601 * Example: 1602 * 1603 * $attrs = wp_kses_hair( 'class="is-wide" inert data-lazy=\'<img>\' =/🐮=/' ); 1604 * $attrs === array( 1605 * 'class' => array( 'name' => 'class', 'value' => 'is-wide', 'whole' => 'class="is-wide"', 'vless' => 'n' ), 1606 * 'inert' => array( 'name' => 'inert', 'value' => '', 'whole' => 'inert', 'vless' => 'y' ), 1607 * 'data-lazy' => array( 'name' => 'data-lazy', 'value' => '<img>', 'whole' => 'data-lazy="<img>"', 'vless' => 'n' ), 1608 * '=' => array( 'name' => '=', 'value' => '', 'whole' => '=', 'vless' => 'y' ), 1609 * '🐮' => array( 'name' => '🐮', 'value' => '/', 'whole' => '🐮="/"', 'vless' => 'n' ), 1610 * ); 1611 * 1612 * @since 1.0.0 1613 * @since 7.0.0 Reliably parses HTML via the HTML API. 1614 * 1615 * @param string $attr Attribute list from HTML element to closing HTML element tag. 1616 * @param string[] $allowed_protocols Array of allowed URL protocols. 1617 * @return array<string, array{name: string, value: string, whole: string, vless: 'y'|'n'}> Array of attribute information after parsing. 1618 */ 1619 function wp_kses_hair( $attr, $allowed_protocols ) { 1620 $attributes = array(); 1621 $uris = wp_kses_uri_attributes(); 1622 1623 $processor = new WP_HTML_Tag_Processor( "<wp {$attr}>" ); 1624 $processor->next_token(); 1625 1626 $syntax_characters = array( 1627 '&' => '&', 1628 '<' => '<', 1629 '>' => '>', 1630 "'" => ''', 1631 '"' => '"', 1632 ); 1633 1634 foreach ( $processor->get_attribute_names_with_prefix( '' ) as $name ) { 1635 $value = $processor->get_attribute( $name ); 1636 $is_bool = true === $value; 1637 if ( is_string( $value ) && in_array( $name, $uris, true ) ) { 1638 $value = wp_kses_bad_protocol( $value, $allowed_protocols ); 1639 } 1640 1641 // Reconstruct and normalize the attribute value. 1642 $recoded = $is_bool ? '' : strtr( $value, $syntax_characters ); 1643 $whole = $is_bool ? $name : "{$name}=\"{$recoded}\""; 1644 1645 $attributes[ $name ] = array( 1646 'name' => $name, 1647 'value' => $recoded, 1648 'whole' => $whole, 1649 'vless' => $is_bool ? 'y' : 'n', 1650 ); 1651 } 1652 1653 return $attributes; 1654 } 1655 1656 /** 1657 * Finds all attributes of an HTML element. 1658 * 1659 * Does not modify input. May return "evil" output. 1660 * 1661 * Based on `wp_kses_split2()` and `wp_kses_attr()`. 1662 * 1663 * @since 4.2.3 1664 * 1665 * @param string $element HTML element. 1666 * @return array|false List of attributes found in the element. Returns false on failure. 1667 */ 1668 function wp_kses_attr_parse( $element ) { 1669 $valid = preg_match( '%^(<\s*)(/\s*)?([a-zA-Z0-9]+\s*)([^>]*)(>?)$%', $element, $matches ); 1670 if ( 1 !== $valid ) { 1671 return false; 1672 } 1673 1674 $begin = $matches[1]; 1675 $slash = $matches[2]; 1676 $elname = $matches[3]; 1677 $attr = $matches[4]; 1678 $end = $matches[5]; 1679 1680 if ( '' !== $slash ) { 1681 // Closing elements do not get parsed. 1682 return false; 1683 } 1684 1685 // Is there a closing XHTML slash at the end of the attributes? 1686 if ( 1 === preg_match( '%\s*/\s*$%', $attr, $matches ) ) { 1687 $xhtml_slash = $matches[0]; 1688 $attr = substr( $attr, 0, -strlen( $xhtml_slash ) ); 1689 } else { 1690 $xhtml_slash = ''; 1691 } 1692 1693 // Split it. 1694 $attrarr = wp_kses_hair_parse( $attr ); 1695 if ( false === $attrarr ) { 1696 return false; 1697 } 1698 1699 // Make sure all input is returned by adding front and back matter. 1700 array_unshift( $attrarr, $begin . $slash . $elname ); 1701 array_push( $attrarr, $xhtml_slash . $end ); 1702 1703 return $attrarr; 1704 } 1705 1706 /** 1707 * Builds an attribute list from string containing attributes. 1708 * 1709 * Does not modify input. May return "evil" output. 1710 * In case of unexpected input, returns false instead of stripping things. 1711 * 1712 * Based on `wp_kses_hair()` but does not return a multi-dimensional array. 1713 * 1714 * @since 4.2.3 1715 * 1716 * @param string $attr Attribute list from HTML element to closing HTML element tag. 1717 * @return array|false List of attributes found in $attr. Returns false on failure. 1718 */ 1719 function wp_kses_hair_parse( $attr ) { 1720 if ( '' === $attr ) { 1721 return array(); 1722 } 1723 1724 $regex = 1725 '(?: 1726 [_a-zA-Z][-_a-zA-Z0-9:.]* # Attribute name. 1727 | 1728 \[\[?[^\[\]]+\]\]? # Shortcode in the name position implies unfiltered_html. 1729 ) 1730 (?: # Attribute value. 1731 \s*=\s* # All values begin with "=". 1732 (?: 1733 "[^"]*" # Double-quoted. 1734 | 1735 \'[^\']*\' # Single-quoted. 1736 | 1737 [^\s"\']+ # Non-quoted. 1738 (?:\s|$) # Must have a space. 1739 ) 1740 | 1741 (?:\s|$) # If attribute has no value, space is required. 1742 ) 1743 \s* # Trailing space is optional except as mentioned above. 1744 '; 1745 1746 /* 1747 * Although it is possible to reduce this procedure to a single regexp, 1748 * we must run that regexp twice to get exactly the expected result. 1749 * 1750 * Note: do NOT remove the `x` modifiers as they are essential for the above regex! 1751 */ 1752 1753 $validation = "/^($regex)+$/x"; 1754 $extraction = "/$regex/x"; 1755 1756 if ( 1 === preg_match( $validation, $attr ) ) { 1757 preg_match_all( $extraction, $attr, $attrarr ); 1758 return $attrarr[0]; 1759 } else { 1760 return false; 1761 } 1762 } 1763 1764 /** 1765 * Performs different checks for attribute values. 1766 * 1767 * The currently implemented checks are "maxlen", "minlen", "maxval", "minval", 1768 * and "valueless". 1769 * 1770 * @since 1.0.0 1771 * 1772 * @param string $value Attribute value. 1773 * @param string $vless Whether the attribute is valueless. Use 'y' or 'n'. 1774 * @param string $checkname What $checkvalue is checking for. 1775 * @param mixed $checkvalue What constraint the value should pass. 1776 * @return bool Whether check passes. 1777 */ 1778 function wp_kses_check_attr_val( $value, $vless, $checkname, $checkvalue ) { 1779 $ok = true; 1780 1781 switch ( strtolower( $checkname ) ) { 1782 case 'maxlen': 1783 /* 1784 * The maxlen check makes sure that the attribute value has a length not 1785 * greater than the given value. This can be used to avoid Buffer Overflows 1786 * in WWW clients and various Internet servers. 1787 */ 1788 1789 if ( strlen( $value ) > $checkvalue ) { 1790 $ok = false; 1791 } 1792 break; 1793 1794 case 'minlen': 1795 /* 1796 * The minlen check makes sure that the attribute value has a length not 1797 * smaller than the given value. 1798 */ 1799 1800 if ( strlen( $value ) < $checkvalue ) { 1801 $ok = false; 1802 } 1803 break; 1804 1805 case 'maxval': 1806 /* 1807 * The maxval check does two things: it checks that the attribute value is 1808 * an integer from 0 and up, without an excessive amount of zeroes or 1809 * whitespace (to avoid Buffer Overflows). It also checks that the attribute 1810 * value is not greater than the given value. 1811 * This check can be used to avoid Denial of Service attacks. 1812 */ 1813 1814 if ( ! preg_match( '/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value ) ) { 1815 $ok = false; 1816 } 1817 if ( $value > $checkvalue ) { 1818 $ok = false; 1819 } 1820 break; 1821 1822 case 'minval': 1823 /* 1824 * The minval check makes sure that the attribute value is a positive integer, 1825 * and that it is not smaller than the given value. 1826 */ 1827 1828 if ( ! preg_match( '/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value ) ) { 1829 $ok = false; 1830 } 1831 if ( $value < $checkvalue ) { 1832 $ok = false; 1833 } 1834 break; 1835 1836 case 'valueless': 1837 /* 1838 * The valueless check makes sure if the attribute has a value 1839 * (like `<a href="blah">`) or not (`<option selected>`). If the given value 1840 * is a "y" or a "Y", the attribute must not have a value. 1841 * If the given value is an "n" or an "N", the attribute must have a value. 1842 */ 1843 1844 if ( strtolower( $checkvalue ) !== $vless ) { 1845 $ok = false; 1846 } 1847 break; 1848 1849 case 'values': 1850 /* 1851 * The values check is used when you want to make sure that the attribute 1852 * has one of the given values. 1853 */ 1854 1855 if ( false === array_search( strtolower( $value ), $checkvalue, true ) ) { 1856 $ok = false; 1857 } 1858 break; 1859 1860 case 'value_callback': 1861 /* 1862 * The value_callback check is used when you want to make sure that the attribute 1863 * value is accepted by the callback function. 1864 */ 1865 1866 if ( ! call_user_func( $checkvalue, $value ) ) { 1867 $ok = false; 1868 } 1869 break; 1870 } // End switch. 1871 1872 return $ok; 1873 } 1874 1875 /** 1876 * Sanitizes a string and removed disallowed URL protocols. 1877 * 1878 * This function removes all non-allowed protocols from the beginning of the 1879 * string. It ignores whitespace and the case of the letters, and it does 1880 * understand HTML entities. It does its work recursively, so it won't be 1881 * fooled by a string like `javascript:javascript:alert(57)`. 1882 * 1883 * @since 1.0.0 1884 * 1885 * @param string $content Content to filter bad protocols from. 1886 * @param string[] $allowed_protocols Array of allowed URL protocols. 1887 * @return string Filtered content. 1888 */ 1889 function wp_kses_bad_protocol( $content, $allowed_protocols ) { 1890 $content = wp_kses_no_null( $content ); 1891 1892 // Short-circuit if the string starts with `https://` or `http://`. Most common cases. 1893 if ( 1894 ( str_starts_with( $content, 'https://' ) && in_array( 'https', $allowed_protocols, true ) ) || 1895 ( str_starts_with( $content, 'http://' ) && in_array( 'http', $allowed_protocols, true ) ) 1896 ) { 1897 return $content; 1898 } 1899 1900 $iterations = 0; 1901 1902 do { 1903 $original_content = $content; 1904 $content = wp_kses_bad_protocol_once( $content, $allowed_protocols ); 1905 } while ( $original_content !== $content && ++$iterations < 6 ); 1906 1907 if ( $original_content !== $content ) { 1908 return ''; 1909 } 1910 1911 return $content; 1912 } 1913 1914 /** 1915 * Removes any invalid control characters in a text string. 1916 * 1917 * Also removes any instance of the `\0` string. 1918 * 1919 * @since 1.0.0 1920 * 1921 * @param string $content Content to filter null characters from. 1922 * @param array $options Set 'slash_zero' => 'keep' when '\0' is allowed. Default is 'remove'. 1923 * @return string Filtered content. 1924 */ 1925 function wp_kses_no_null( $content, $options = null ) { 1926 if ( ! isset( $options['slash_zero'] ) ) { 1927 $options = array( 'slash_zero' => 'remove' ); 1928 } 1929 1930 $content = preg_replace( '/[\x00-\x08\x0B\x0C\x0E-\x1F]/', '', $content ); 1931 if ( 'remove' === $options['slash_zero'] ) { 1932 $content = preg_replace( '/\\\\+0+/', '', $content ); 1933 } 1934 1935 return $content; 1936 } 1937 1938 /** 1939 * Strips slashes from in front of quotes. 1940 * 1941 * This function changes the character sequence `\"` to just `"`. It leaves all other 1942 * slashes alone. The quoting from `preg_replace(//e)` requires this. 1943 * 1944 * @since 1.0.0 1945 * 1946 * @param string $content String to strip slashes from. 1947 * @return string Fixed string with quoted slashes. 1948 */ 1949 function wp_kses_stripslashes( $content ) { 1950 return preg_replace( '%\\\\"%', '"', $content ); 1951 } 1952 1953 /** 1954 * Converts the keys of an array to lowercase. 1955 * 1956 * @since 1.0.0 1957 * 1958 * @param array $inarray Unfiltered array. 1959 * @return array Fixed array with all lowercase keys. 1960 */ 1961 function wp_kses_array_lc( $inarray ) { 1962 $outarray = array(); 1963 1964 foreach ( (array) $inarray as $inkey => $inval ) { 1965 $outkey = strtolower( $inkey ); 1966 $outarray[ $outkey ] = array(); 1967 1968 foreach ( (array) $inval as $inkey2 => $inval2 ) { 1969 $outkey2 = strtolower( $inkey2 ); 1970 $outarray[ $outkey ][ $outkey2 ] = $inval2; 1971 } 1972 } 1973 1974 return $outarray; 1975 } 1976 1977 /** 1978 * Handles parsing errors in `wp_kses_hair()`. 1979 * 1980 * The general plan is to remove everything to and including some whitespace, 1981 * but it deals with quotes and apostrophes as well. 1982 * 1983 * @since 1.0.0 1984 * 1985 * @param string $attr 1986 * @return string 1987 */ 1988 function wp_kses_html_error( $attr ) { 1989 return preg_replace( '/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $attr ); 1990 } 1991 1992 /** 1993 * Sanitizes content from bad protocols and other characters. 1994 * 1995 * This function searches for URL protocols at the beginning of the string, while 1996 * handling whitespace and HTML entities. 1997 * 1998 * @since 1.0.0 1999 * 2000 * @param string $content Content to check for bad protocols. 2001 * @param string[] $allowed_protocols Array of allowed URL protocols. 2002 * @param int $count Depth of call recursion to this function. 2003 * @return string Sanitized content. 2004 */ 2005 function wp_kses_bad_protocol_once( $content, $allowed_protocols, $count = 1 ) { 2006 $content = preg_replace( '/(�*58(?![;0-9])|�*3a(?![;a-f0-9]))/i', '$1;', $content ); 2007 $content2 = preg_split( '/:|�*58;|�*3a;|:/i', $content, 2 ); 2008 2009 if ( isset( $content2[1] ) && ! preg_match( '%/\?%', $content2[0] ) ) { 2010 $content = trim( $content2[1] ); 2011 $protocol = wp_kses_bad_protocol_once2( $content2[0], $allowed_protocols ); 2012 if ( 'feed:' === $protocol ) { 2013 if ( $count > 2 ) { 2014 return ''; 2015 } 2016 $content = wp_kses_bad_protocol_once( $content, $allowed_protocols, ++$count ); 2017 if ( empty( $content ) ) { 2018 return $content; 2019 } 2020 } 2021 $content = $protocol . $content; 2022 } 2023 2024 return $content; 2025 } 2026 2027 /** 2028 * Callback for `wp_kses_bad_protocol_once()` regular expression. 2029 * 2030 * This function processes URL protocols, checks to see if they're in the 2031 * list of allowed protocols or not, and returns different data depending 2032 * on the answer. 2033 * 2034 * @access private 2035 * @ignore 2036 * @since 1.0.0 2037 * 2038 * @param string $scheme URI scheme to check against the list of allowed protocols. 2039 * @param string[] $allowed_protocols Array of allowed URL protocols. 2040 * @return string Sanitized content. 2041 */ 2042 function wp_kses_bad_protocol_once2( $scheme, $allowed_protocols ) { 2043 $scheme = wp_kses_decode_entities( $scheme ); 2044 $scheme = preg_replace( '/\s/', '', $scheme ); 2045 $scheme = wp_kses_no_null( $scheme ); 2046 $scheme = strtolower( $scheme ); 2047 2048 $allowed = false; 2049 foreach ( (array) $allowed_protocols as $one_protocol ) { 2050 if ( strtolower( $one_protocol ) === $scheme ) { 2051 $allowed = true; 2052 break; 2053 } 2054 } 2055 2056 if ( $allowed ) { 2057 return "$scheme:"; 2058 } else { 2059 return ''; 2060 } 2061 } 2062 2063 /** 2064 * Converts and fixes HTML entities. 2065 * 2066 * This function normalizes HTML entities. It will convert `AT&T` to the correct 2067 * `AT&T`, `:` to `:`, `&#XYZZY;` to `&#XYZZY;` and so on. 2068 * 2069 * When `$context` is set to 'xml', HTML entities are converted to their code points. For 2070 * example, `AT&T…&#XYZZY;` is converted to `AT&T…&#XYZZY;`. 2071 * 2072 * @since 1.0.0 2073 * @since 5.5.0 Added `$context` parameter. 2074 * 2075 * @param string $content Content to normalize entities. 2076 * @param string $context Context for normalization. Can be either 'html' or 'xml'. 2077 * Default 'html'. 2078 * @return string Content with normalized entities. 2079 */ 2080 function wp_kses_normalize_entities( $content, $context = 'html' ) { 2081 // Disarm all entities by converting & to & 2082 $content = str_replace( '&', '&', $content ); 2083 2084 /* 2085 * Decode any character references that are now double-encoded. 2086 * 2087 * It's important that the following normalizations happen in the correct order. 2088 * 2089 * At this point, all `&` have been transformed to `&`. Double-encoded named character 2090 * references like `&amp;` will be decoded back to their single-encoded form `&`. 2091 * 2092 * First, numeric (decimal and hexadecimal) character references must be handled so that 2093 * `&#09;` becomes `	`. If the named character references were handled first, there 2094 * would be no way to know whether the double-encoded character reference had been produced 2095 * in this function or was the original input. 2096 * 2097 * Consider the two examples, first with named entity decoding followed by numeric 2098 * entity decoding. We'll use U+002E FULL STOP (.) in our example, this table follows the 2099 * string processing from left to right: 2100 * 2101 * | Input | &-encoded | Named ref double-decoded | Numeric ref double-decoded | 2102 * | ------------ | ---------------- | ------------------------- | -------------------------- | 2103 * | `.` | `&#x2E;` | `&#x2E;` | `.` | 2104 * | `&#x2E;` | `&amp;#x2E;` | `&#x2E;` | `.` | 2105 * 2106 * Notice in the example above that different inputs result in the same result. The second case 2107 * was not normalized and produced HTML that is semantically different from the input. 2108 * 2109 * | Input | &-encoded | Numeric ref double-decoded | Named ref double-decoded | 2110 * | ------------ | ---------------- | --------------------------- | ------------------------ | 2111 * | `.` | `&#x2E;` | `.` | `.` | 2112 * | `&#x2E;` | `&amp;#x2E;` | `&amp;#x2E;` | `&#x2E;` | 2113 * 2114 * Here, each input is normalized to an appropriate output. 2115 */ 2116 $content = preg_replace_callback( '/&#(0*[0-9]{1,7});/', 'wp_kses_normalize_entities2', $content ); 2117 $content = preg_replace_callback( '/&#[Xx](0*[0-9A-Fa-f]{1,6});/', 'wp_kses_normalize_entities3', $content ); 2118 if ( 'xml' === $context ) { 2119 $content = preg_replace_callback( '/&([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_xml_named_entities', $content ); 2120 } else { 2121 $content = preg_replace_callback( '/&([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_named_entities', $content ); 2122 } 2123 2124 return $content; 2125 } 2126 2127 /** 2128 * Callback for `wp_kses_normalize_entities()` regular expression. 2129 * 2130 * This function only accepts valid named entity references, which are finite, 2131 * case-sensitive, and highly scrutinized by HTML and XML validators. 2132 * 2133 * @since 3.0.0 2134 * 2135 * @global array $allowedentitynames 2136 * 2137 * @param array $matches preg_replace_callback() matches array. 2138 * @return string Correctly encoded entity. 2139 */ 2140 function wp_kses_named_entities( $matches ) { 2141 global $allowedentitynames; 2142 2143 if ( empty( $matches[1] ) ) { 2144 return ''; 2145 } 2146 2147 $i = $matches[1]; 2148 return ( ! in_array( $i, $allowedentitynames, true ) ) ? "&$i;" : "&$i;"; 2149 } 2150 2151 /** 2152 * Callback for `wp_kses_normalize_entities()` regular expression. 2153 * 2154 * This function only accepts valid named entity references, which are finite, 2155 * case-sensitive, and highly scrutinized by XML validators. HTML named entity 2156 * references are converted to their code points. 2157 * 2158 * @since 5.5.0 2159 * 2160 * @global array $allowedentitynames 2161 * @global array $allowedxmlentitynames 2162 * 2163 * @param array $matches preg_replace_callback() matches array. 2164 * @return string Correctly encoded entity. 2165 */ 2166 function wp_kses_xml_named_entities( $matches ) { 2167 global $allowedentitynames, $allowedxmlentitynames; 2168 2169 if ( empty( $matches[1] ) ) { 2170 return ''; 2171 } 2172 2173 $i = $matches[1]; 2174 2175 if ( in_array( $i, $allowedxmlentitynames, true ) ) { 2176 return "&$i;"; 2177 } elseif ( in_array( $i, $allowedentitynames, true ) ) { 2178 return html_entity_decode( "&$i;", ENT_HTML5 ); 2179 } 2180 2181 return "&$i;"; 2182 } 2183 2184 /** 2185 * Callback for `wp_kses_normalize_entities()` regular expression. 2186 * 2187 * This function helps `wp_kses_normalize_entities()` to only accept 16-bit 2188 * values and nothing more for `&#number;` entities. 2189 * 2190 * @access private 2191 * @ignore 2192 * @since 1.0.0 2193 * 2194 * @param array $matches `preg_replace_callback()` matches array. 2195 * @return string Correctly encoded entity. 2196 */ 2197 function wp_kses_normalize_entities2( $matches ) { 2198 if ( empty( $matches[1] ) ) { 2199 return ''; 2200 } 2201 2202 $i = $matches[1]; 2203 2204 if ( valid_unicode( $i ) ) { 2205 $i = str_pad( ltrim( $i, '0' ), 3, '0', STR_PAD_LEFT ); 2206 $i = "&#$i;"; 2207 } else { 2208 $i = "&#$i;"; 2209 } 2210 2211 return $i; 2212 } 2213 2214 /** 2215 * Callback for `wp_kses_normalize_entities()` for regular expression. 2216 * 2217 * This function helps `wp_kses_normalize_entities()` to only accept valid Unicode 2218 * numeric entities in hex form. 2219 * 2220 * @since 2.7.0 2221 * @access private 2222 * @ignore 2223 * 2224 * @param array $matches `preg_replace_callback()` matches array. 2225 * @return string Correctly encoded entity. 2226 */ 2227 function wp_kses_normalize_entities3( $matches ) { 2228 if ( empty( $matches[1] ) ) { 2229 return ''; 2230 } 2231 2232 $hexchars = $matches[1]; 2233 2234 return ( ! valid_unicode( hexdec( $hexchars ) ) ) ? "&#x$hexchars;" : '&#x' . ltrim( $hexchars, '0' ) . ';'; 2235 } 2236 2237 /** 2238 * Determines if a Unicode codepoint is valid. 2239 * 2240 * The definition of a valid Unicode codepoint is taken from the XML definition: 2241 * 2242 * > Characters 2243 * > 2244 * > … 2245 * > Legal characters are tab, carriage return, line feed, and the legal characters of 2246 * > Unicode and ISO/IEC 10646. 2247 * > … 2248 * > Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] 2249 * 2250 * @since 2.7.0 2251 * 2252 * @see https://www.w3.org/TR/xml/#charsets 2253 * 2254 * @param int $i Unicode codepoint. 2255 * @return bool Whether or not the codepoint is a valid Unicode codepoint. 2256 */ 2257 function valid_unicode( $i ) { 2258 $i = (int) $i; 2259 2260 return ( 2261 0x9 === $i || // U+0009 HORIZONTAL TABULATION (HT) 2262 0xA === $i || // U+000A LINE FEED (LF) 2263 0xD === $i || // U+000D CARRIAGE RETURN (CR) 2264 /* 2265 * The valid Unicode characters according to the XML specification: 2266 * 2267 * > any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. 2268 */ 2269 ( 0x20 <= $i && $i <= 0xD7FF ) || 2270 ( 0xE000 <= $i && $i <= 0xFFFD ) || 2271 ( 0x10000 <= $i && $i <= 0x10FFFF ) 2272 ); 2273 } 2274 2275 /** 2276 * Converts all numeric HTML entities to their named counterparts. 2277 * 2278 * This function decodes numeric HTML entities (`A` and `A`). 2279 * It doesn't do anything with named entities like `ä`, but we don't 2280 * need them in the allowed URL protocols system anyway. 2281 * 2282 * @since 1.0.0 2283 * 2284 * @param string $content Content to change entities. 2285 * @return string Content after decoded entities. 2286 */ 2287 function wp_kses_decode_entities( $content ) { 2288 $content = preg_replace_callback( '/&#([0-9]+);/', '_wp_kses_decode_entities_chr', $content ); 2289 $content = preg_replace_callback( '/&#[Xx]([0-9A-Fa-f]+);/', '_wp_kses_decode_entities_chr_hexdec', $content ); 2290 2291 return $content; 2292 } 2293 2294 /** 2295 * Regex callback for `wp_kses_decode_entities()`. 2296 * 2297 * @since 2.9.0 2298 * @access private 2299 * @ignore 2300 * 2301 * @param array $matches preg match 2302 * @return string 2303 */ 2304 function _wp_kses_decode_entities_chr( $matches ) { 2305 return chr( $matches[1] ); 2306 } 2307 2308 /** 2309 * Regex callback for `wp_kses_decode_entities()`. 2310 * 2311 * @since 2.9.0 2312 * @access private 2313 * @ignore 2314 * 2315 * @param array $matches preg match 2316 * @return string 2317 */ 2318 function _wp_kses_decode_entities_chr_hexdec( $matches ) { 2319 return chr( hexdec( $matches[1] ) ); 2320 } 2321 2322 /** 2323 * Sanitize content with allowed HTML KSES rules. 2324 * 2325 * This function expects slashed data. 2326 * 2327 * @since 1.0.0 2328 * 2329 * @param string $data Content to filter, expected to be escaped with slashes. 2330 * @return string Filtered content. 2331 */ 2332 function wp_filter_kses( $data ) { 2333 return addslashes( wp_kses( stripslashes( $data ), current_filter() ) ); 2334 } 2335 2336 /** 2337 * Sanitize content with allowed HTML KSES rules. 2338 * 2339 * This function expects unslashed data. 2340 * 2341 * @since 2.9.0 2342 * 2343 * @param string $data Content to filter, expected to not be escaped. 2344 * @return string Filtered content. 2345 */ 2346 function wp_kses_data( $data ) { 2347 return wp_kses( $data, current_filter() ); 2348 } 2349 2350 /** 2351 * Sanitizes content for allowed HTML tags for post content. 2352 * 2353 * Post content refers to the page contents of the 'post' type and not `$_POST` 2354 * data from forms. 2355 * 2356 * This function expects slashed data. 2357 * 2358 * @since 2.0.0 2359 * 2360 * @param string $data Post content to filter, expected to be escaped with slashes. 2361 * @return string Filtered post content with allowed HTML tags and attributes intact. 2362 */ 2363 function wp_filter_post_kses( $data ) { 2364 return addslashes( wp_kses( stripslashes( $data ), 'post' ) ); 2365 } 2366 2367 /** 2368 * Sanitizes global styles user content removing unsafe rules. 2369 * 2370 * @since 5.9.0 2371 * 2372 * @param string $data Post content to filter. 2373 * @return string Filtered post content with unsafe rules removed. 2374 */ 2375 function wp_filter_global_styles_post( $data ) { 2376 $decoded_data = json_decode( wp_unslash( $data ), true ); 2377 $json_decoding_error = json_last_error(); 2378 if ( 2379 JSON_ERROR_NONE === $json_decoding_error && 2380 is_array( $decoded_data ) && 2381 isset( $decoded_data['isGlobalStylesUserThemeJSON'] ) && 2382 $decoded_data['isGlobalStylesUserThemeJSON'] 2383 ) { 2384 unset( $decoded_data['isGlobalStylesUserThemeJSON'] ); 2385 2386 $data_to_encode = WP_Theme_JSON::remove_insecure_properties( $decoded_data, 'custom' ); 2387 2388 $data_to_encode['isGlobalStylesUserThemeJSON'] = true; 2389 return wp_slash( wp_json_encode( $data_to_encode ) ); 2390 } 2391 return $data; 2392 } 2393 2394 /** 2395 * Sanitizes content for allowed HTML tags for post content. 2396 * 2397 * Post content refers to the page contents of the 'post' type and not `$_POST` 2398 * data from forms. 2399 * 2400 * This function expects unslashed data. 2401 * 2402 * @since 2.9.0 2403 * 2404 * @param string $data Post content to filter. 2405 * @return string Filtered post content with allowed HTML tags and attributes intact. 2406 */ 2407 function wp_kses_post( $data ) { 2408 return wp_kses( $data, 'post' ); 2409 } 2410 2411 /** 2412 * Navigates through an array, object, or scalar, and sanitizes content for 2413 * allowed HTML tags for post content. 2414 * 2415 * @since 4.4.2 2416 * 2417 * @see map_deep() 2418 * 2419 * @param mixed $data The array, object, or scalar value to inspect. 2420 * @return mixed The filtered content. 2421 */ 2422 function wp_kses_post_deep( $data ) { 2423 return map_deep( $data, 'wp_kses_post' ); 2424 } 2425 2426 /** 2427 * Strips all HTML from a text string. 2428 * 2429 * This function expects slashed data. 2430 * 2431 * @since 2.1.0 2432 * 2433 * @param string $data Content to strip all HTML from. 2434 * @return string Filtered content without any HTML. 2435 */ 2436 function wp_filter_nohtml_kses( $data ) { 2437 return addslashes( wp_kses( stripslashes( $data ), 'strip' ) ); 2438 } 2439 2440 /** 2441 * Adds all KSES input form content filters. 2442 * 2443 * All hooks have default priority. The `wp_filter_kses()` function is added to 2444 * the 'pre_comment_content' and 'title_save_pre' hooks. 2445 * 2446 * The `wp_filter_post_kses()` function is added to the 'content_save_pre', 2447 * 'excerpt_save_pre', and 'content_filtered_save_pre' hooks. 2448 * 2449 * @since 2.0.0 2450 */ 2451 function kses_init_filters() { 2452 // Normal filtering. 2453 add_filter( 'title_save_pre', 'wp_filter_kses' ); 2454 2455 // Comment filtering. 2456 if ( current_user_can( 'unfiltered_html' ) ) { 2457 add_filter( 'pre_comment_content', 'wp_filter_post_kses' ); 2458 } else { 2459 add_filter( 'pre_comment_content', 'wp_filter_kses' ); 2460 } 2461 2462 // Global Styles filtering: Global Styles filters should be executed before normal post_kses HTML filters. 2463 add_filter( 'content_save_pre', 'wp_filter_global_styles_post', 9 ); 2464 add_filter( 'content_filtered_save_pre', 'wp_filter_global_styles_post', 9 ); 2465 2466 // Post filtering. 2467 add_filter( 'content_save_pre', 'wp_filter_post_kses' ); 2468 add_filter( 'excerpt_save_pre', 'wp_filter_post_kses' ); 2469 add_filter( 'content_filtered_save_pre', 'wp_filter_post_kses' ); 2470 } 2471 2472 /** 2473 * Removes all KSES input form content filters. 2474 * 2475 * A quick procedural method to removing all of the filters that KSES uses for 2476 * content in WordPress Loop. 2477 * 2478 * Does not remove the `kses_init()` function from {@see 'init'} hook (priority is 2479 * default). Also does not remove `kses_init()` function from {@see 'set_current_user'} 2480 * hook (priority is also default). 2481 * 2482 * @since 2.0.6 2483 */ 2484 function kses_remove_filters() { 2485 // Normal filtering. 2486 remove_filter( 'title_save_pre', 'wp_filter_kses' ); 2487 2488 // Comment filtering. 2489 remove_filter( 'pre_comment_content', 'wp_filter_post_kses' ); 2490 remove_filter( 'pre_comment_content', 'wp_filter_kses' ); 2491 2492 // Global Styles filtering. 2493 remove_filter( 'content_save_pre', 'wp_filter_global_styles_post', 9 ); 2494 remove_filter( 'content_filtered_save_pre', 'wp_filter_global_styles_post', 9 ); 2495 2496 // Post filtering. 2497 remove_filter( 'content_save_pre', 'wp_filter_post_kses' ); 2498 remove_filter( 'excerpt_save_pre', 'wp_filter_post_kses' ); 2499 remove_filter( 'content_filtered_save_pre', 'wp_filter_post_kses' ); 2500 } 2501 2502 /** 2503 * Sets up most of the KSES filters for input form content. 2504 * 2505 * First removes all of the KSES filters in case the current user does not need 2506 * to have KSES filter the content. If the user does not have `unfiltered_html` 2507 * capability, then KSES filters are added. 2508 * 2509 * @since 2.0.0 2510 */ 2511 function kses_init() { 2512 kses_remove_filters(); 2513 2514 if ( ! current_user_can( 'unfiltered_html' ) ) { 2515 kses_init_filters(); 2516 } 2517 } 2518 2519 /** 2520 * Filters an inline style attribute and removes disallowed rules. 2521 * 2522 * @since 2.8.1 2523 * @since 4.4.0 Added support for `min-height`, `max-height`, `min-width`, and `max-width`. 2524 * @since 4.6.0 Added support for `list-style-type`. 2525 * @since 5.0.0 Added support for `background-image`. 2526 * @since 5.1.0 Added support for `text-transform`. 2527 * @since 5.2.0 Added support for `background-position` and `grid-template-columns`. 2528 * @since 5.3.0 Added support for `grid`, `flex` and `column` layout properties. 2529 * Extended `background-*` support for individual properties. 2530 * @since 5.3.1 Added support for gradient backgrounds. 2531 * @since 5.7.1 Added support for `object-position`. 2532 * @since 5.8.0 Added support for `calc()` and `var()` values. 2533 * @since 6.1.0 Added support for `min()`, `max()`, `minmax()`, `clamp()`, 2534 * nested `var()` values, and assigning values to CSS variables. 2535 * Added support for `object-fit`, `gap`, `column-gap`, `row-gap`, and `flex-wrap`. 2536 * Extended `margin-*` and `padding-*` support for logical properties. 2537 * @since 6.2.0 Added support for `aspect-ratio`, `position`, `top`, `right`, `bottom`, `left`, 2538 * and `z-index` CSS properties. 2539 * @since 6.3.0 Extended support for `filter` to accept a URL and added support for repeat(). 2540 * Added support for `box-shadow`. 2541 * @since 6.4.0 Added support for `writing-mode`. 2542 * @since 6.5.0 Added support for `background-repeat`. 2543 * @since 6.6.0 Added support for `grid-column`, `grid-row`, and `container-type`. 2544 * @since 6.9.0 Added support for `white-space`. 2545 * 2546 * @param string $css A string of CSS rules. 2547 * @param string $deprecated Not used. 2548 * @return string Filtered string of CSS rules. 2549 */ 2550 function safecss_filter_attr( $css, $deprecated = '' ) { 2551 if ( ! empty( $deprecated ) ) { 2552 _deprecated_argument( __FUNCTION__, '2.8.1' ); // Never implemented. 2553 } 2554 2555 $css = wp_kses_no_null( $css ); 2556 $css = str_replace( array( "\n", "\r", "\t" ), '', $css ); 2557 2558 $allowed_protocols = wp_allowed_protocols(); 2559 2560 $css_array = explode( ';', trim( $css ) ); 2561 2562 /** 2563 * Filters the list of allowed CSS attributes. 2564 * 2565 * @since 2.8.1 2566 * 2567 * @param string[] $attr Array of allowed CSS attributes. 2568 */ 2569 $allowed_attr = apply_filters( 2570 'safe_style_css', 2571 array( 2572 'background', 2573 'background-color', 2574 'background-image', 2575 'background-position', 2576 'background-repeat', 2577 'background-size', 2578 'background-attachment', 2579 'background-blend-mode', 2580 2581 'border', 2582 'border-radius', 2583 'border-width', 2584 'border-color', 2585 'border-style', 2586 'border-right', 2587 'border-right-color', 2588 'border-right-style', 2589 'border-right-width', 2590 'border-bottom', 2591 'border-bottom-color', 2592 'border-bottom-left-radius', 2593 'border-bottom-right-radius', 2594 'border-bottom-style', 2595 'border-bottom-width', 2596 'border-bottom-right-radius', 2597 'border-bottom-left-radius', 2598 'border-left', 2599 'border-left-color', 2600 'border-left-style', 2601 'border-left-width', 2602 'border-top', 2603 'border-top-color', 2604 'border-top-left-radius', 2605 'border-top-right-radius', 2606 'border-top-style', 2607 'border-top-width', 2608 'border-top-left-radius', 2609 'border-top-right-radius', 2610 2611 'border-spacing', 2612 'border-collapse', 2613 'caption-side', 2614 2615 'columns', 2616 'column-count', 2617 'column-fill', 2618 'column-gap', 2619 'column-rule', 2620 'column-span', 2621 'column-width', 2622 2623 'color', 2624 'filter', 2625 'font', 2626 'font-family', 2627 'font-size', 2628 'font-style', 2629 'font-variant', 2630 'font-weight', 2631 'letter-spacing', 2632 'line-height', 2633 'text-align', 2634 'text-decoration', 2635 'text-indent', 2636 'text-transform', 2637 'white-space', 2638 2639 'height', 2640 'min-height', 2641 'max-height', 2642 2643 'width', 2644 'min-width', 2645 'max-width', 2646 2647 'margin', 2648 'margin-right', 2649 'margin-bottom', 2650 'margin-left', 2651 'margin-top', 2652 'margin-block-start', 2653 'margin-block-end', 2654 'margin-inline-start', 2655 'margin-inline-end', 2656 2657 'padding', 2658 'padding-right', 2659 'padding-bottom', 2660 'padding-left', 2661 'padding-top', 2662 'padding-block-start', 2663 'padding-block-end', 2664 'padding-inline-start', 2665 'padding-inline-end', 2666 2667 'flex', 2668 'flex-basis', 2669 'flex-direction', 2670 'flex-flow', 2671 'flex-grow', 2672 'flex-shrink', 2673 'flex-wrap', 2674 2675 'gap', 2676 'column-gap', 2677 'row-gap', 2678 2679 'grid-template-columns', 2680 'grid-auto-columns', 2681 'grid-column-start', 2682 'grid-column-end', 2683 'grid-column', 2684 'grid-column-gap', 2685 'grid-template-rows', 2686 'grid-auto-rows', 2687 'grid-row-start', 2688 'grid-row-end', 2689 'grid-row', 2690 'grid-row-gap', 2691 'grid-gap', 2692 2693 'justify-content', 2694 'justify-items', 2695 'justify-self', 2696 'align-content', 2697 'align-items', 2698 'align-self', 2699 2700 'clear', 2701 'cursor', 2702 'direction', 2703 'float', 2704 'list-style-type', 2705 'object-fit', 2706 'object-position', 2707 'opacity', 2708 'overflow', 2709 'vertical-align', 2710 'writing-mode', 2711 2712 'position', 2713 'top', 2714 'right', 2715 'bottom', 2716 'left', 2717 'z-index', 2718 'box-shadow', 2719 'aspect-ratio', 2720 'container-type', 2721 2722 // Custom CSS properties. 2723 '--*', 2724 ) 2725 ); 2726 2727 /* 2728 * CSS attributes that accept URL data types. 2729 * 2730 * This is in accordance to the CSS spec and unrelated to 2731 * the sub-set of supported attributes above. 2732 * 2733 * See: https://developer.mozilla.org/en-US/docs/Web/CSS/url 2734 */ 2735 $css_url_data_types = array( 2736 'background', 2737 'background-image', 2738 2739 'cursor', 2740 'filter', 2741 2742 'list-style', 2743 'list-style-image', 2744 ); 2745 2746 /* 2747 * CSS attributes that accept gradient data types. 2748 * 2749 */ 2750 $css_gradient_data_types = array( 2751 'background', 2752 'background-image', 2753 ); 2754 2755 if ( empty( $allowed_attr ) ) { 2756 return $css; 2757 } 2758 2759 $css = ''; 2760 foreach ( $css_array as $css_item ) { 2761 if ( '' === $css_item ) { 2762 continue; 2763 } 2764 2765 $css_item = trim( $css_item ); 2766 $css_test_string = $css_item; 2767 $found = false; 2768 $url_attr = false; 2769 $gradient_attr = false; 2770 $is_custom_var = false; 2771 2772 if ( ! str_contains( $css_item, ':' ) ) { 2773 $found = true; 2774 } else { 2775 $parts = explode( ':', $css_item, 2 ); 2776 $css_selector = trim( $parts[0] ); 2777 2778 // Allow assigning values to CSS variables. 2779 if ( in_array( '--*', $allowed_attr, true ) && preg_match( '/^--[a-zA-Z0-9-_]+$/', $css_selector ) ) { 2780 $allowed_attr[] = $css_selector; 2781 $is_custom_var = true; 2782 } 2783 2784 if ( in_array( $css_selector, $allowed_attr, true ) ) { 2785 $found = true; 2786 $url_attr = in_array( $css_selector, $css_url_data_types, true ); 2787 $gradient_attr = in_array( $css_selector, $css_gradient_data_types, true ); 2788 } 2789 2790 if ( $is_custom_var ) { 2791 $css_value = trim( $parts[1] ); 2792 $url_attr = str_starts_with( $css_value, 'url(' ); 2793 $gradient_attr = str_contains( $css_value, '-gradient(' ); 2794 } 2795 } 2796 2797 if ( $found && $url_attr ) { 2798 // Simplified: matches the sequence `url(*)`. 2799 preg_match_all( '/url\([^)]+\)/', $parts[1], $url_matches ); 2800 2801 foreach ( $url_matches[0] as $url_match ) { 2802 // Clean up the URL from each of the matches above. 2803 preg_match( '/^url\(\s*([\'\"]?)(.*)(\g1)\s*\)$/', $url_match, $url_pieces ); 2804 2805 if ( empty( $url_pieces[2] ) ) { 2806 $found = false; 2807 break; 2808 } 2809 2810 $url = trim( $url_pieces[2] ); 2811 2812 if ( empty( $url ) || wp_kses_bad_protocol( $url, $allowed_protocols ) !== $url ) { 2813 $found = false; 2814 break; 2815 } else { 2816 // Remove the whole `url(*)` bit that was matched above from the CSS. 2817 $css_test_string = str_replace( $url_match, '', $css_test_string ); 2818 } 2819 } 2820 } 2821 2822 if ( $found && $gradient_attr ) { 2823 $css_value = trim( $parts[1] ); 2824 if ( preg_match( '/^(repeating-)?(linear|radial|conic)-gradient\(([^()]|rgb[a]?\([^()]*\))*\)$/', $css_value ) ) { 2825 // Remove the whole `gradient` bit that was matched above from the CSS. 2826 $css_test_string = str_replace( $css_value, '', $css_test_string ); 2827 } 2828 } 2829 2830 if ( $found ) { 2831 /* 2832 * Allow CSS functions like var(), calc(), etc. by removing them from the test string. 2833 * Nested functions and parentheses are also removed, so long as the parentheses are balanced. 2834 */ 2835 $css_test_string = preg_replace( 2836 '/\b(?:var|calc|min|max|minmax|clamp|repeat)(\((?:[^()]|(?1))*\))/', 2837 '', 2838 $css_test_string 2839 ); 2840 2841 /* 2842 * Disallow CSS containing \ ( & } = or comments, except for within url(), var(), calc(), etc. 2843 * which were removed from the test string above. 2844 */ 2845 $allow_css = ! preg_match( '%[\\\(&=}]|/\*%', $css_test_string ); 2846 2847 /** 2848 * Filters the check for unsafe CSS in `safecss_filter_attr`. 2849 * 2850 * Enables developers to determine whether a section of CSS should be allowed or discarded. 2851 * By default, the value will be false if the part contains \ ( & } = or comments. 2852 * Return true to allow the CSS part to be included in the output. 2853 * 2854 * @since 5.5.0 2855 * 2856 * @param bool $allow_css Whether the CSS in the test string is considered safe. 2857 * @param string $css_test_string The CSS string to test. 2858 */ 2859 $allow_css = apply_filters( 'safecss_filter_attr_allow_css', $allow_css, $css_test_string ); 2860 2861 // Only add the CSS part if it passes the regex check. 2862 if ( $allow_css ) { 2863 if ( '' !== $css ) { 2864 $css .= ';'; 2865 } 2866 2867 $css .= $css_item; 2868 } 2869 } 2870 } 2871 2872 return $css; 2873 } 2874 2875 /** 2876 * Helper function to add global attributes to a tag in the allowed HTML list. 2877 * 2878 * @since 3.5.0 2879 * @since 5.0.0 Added support for `data-*` wildcard attributes. 2880 * @since 6.0.0 Added `dir`, `lang`, and `xml:lang` to global attributes. 2881 * @since 6.3.0 Added `aria-controls`, `aria-current`, and `aria-expanded` attributes. 2882 * @since 6.4.0 Added `aria-live` and `hidden` attributes. 2883 * 2884 * @access private 2885 * @ignore 2886 * 2887 * @param array $value An array of attributes. 2888 * @return array The array of attributes with global attributes added. 2889 */ 2890 function _wp_add_global_attributes( $value ) { 2891 $global_attributes = array( 2892 'aria-controls' => true, 2893 'aria-current' => true, 2894 'aria-describedby' => true, 2895 'aria-details' => true, 2896 'aria-expanded' => true, 2897 'aria-hidden' => true, 2898 'aria-label' => true, 2899 'aria-labelledby' => true, 2900 'aria-live' => true, 2901 'class' => true, 2902 'data-*' => true, 2903 'dir' => true, 2904 'hidden' => true, 2905 'id' => true, 2906 'lang' => true, 2907 'style' => true, 2908 'title' => true, 2909 'role' => true, 2910 'xml:lang' => true, 2911 ); 2912 2913 if ( true === $value ) { 2914 $value = array(); 2915 } 2916 2917 if ( is_array( $value ) ) { 2918 return array_merge( $value, $global_attributes ); 2919 } 2920 2921 return $value; 2922 } 2923 2924 /** 2925 * Helper function to check if this is a safe PDF URL. 2926 * 2927 * @since 5.9.0 2928 * @access private 2929 * @ignore 2930 * 2931 * @param string $url The URL to check. 2932 * @return bool True if the URL is safe, false otherwise. 2933 */ 2934 function _wp_kses_allow_pdf_objects( $url ) { 2935 // We're not interested in URLs that contain query strings or fragments. 2936 if ( str_contains( $url, '?' ) || str_contains( $url, '#' ) ) { 2937 return false; 2938 } 2939 2940 // If it doesn't have a PDF extension, it's not safe. 2941 if ( ! str_ends_with( $url, '.pdf' ) ) { 2942 return false; 2943 } 2944 2945 // If the URL host matches the current site's media URL, it's safe. 2946 $upload_info = wp_upload_dir( null, false ); 2947 $parsed_url = wp_parse_url( $upload_info['url'] ); 2948 $upload_host = $parsed_url['host'] ?? ''; 2949 $upload_port = isset( $parsed_url['port'] ) ? ':' . $parsed_url['port'] : ''; 2950 2951 if ( str_starts_with( $url, "http://$upload_host$upload_port/" ) 2952 || str_starts_with( $url, "https://$upload_host$upload_port/" ) 2953 ) { 2954 return true; 2955 } 2956 2957 return false; 2958 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated : Thu Jan 29 08:20:07 2026 | Cross-referenced by PHPXref |