[ Index ]

PHP Cross Reference of WordPress Trunk (Updated Daily)

Search

title

Body

[close]

/wp-includes/ -> class-wp-email-address.php (source)

   1  <?php
   2  /**
   3   * Class 'WP_Email_Address'.
   4   *
   5   * @package WordPress
   6   * @since 7.1.0
   7   */
   8  
   9  /**
  10   * WP_Email_Address Class.
  11   *
  12   * Represents a validated email address. The address may or may not be deliverable.
  13   *
  14   * Use the static factory method {@see WP_Email_Address::from_string()} to create instances
  15   * of this class rather than the constructor. This method only returns an instance for
  16   * validated email addresses, and `null` if the provided email address fails to validate.
  17   *
  18   * Example:
  19   *
  20   *     $email = WP_Email_Address::from_string( 'wordpress@wordpress.org' );
  21   *     'wordpress'     === $email->get_local_part();
  22   *     'wordpress.org' === $email->get_domain();
  23   *
  24   * @see self::from_string()        to parse and validate a provided email address.
  25   * @see self::get_localpart()      for the local part or mailbox of the address.
  26   * @see self::get_ascii_domain()   for an encoded version of the domain best suited for
  27   *                                 printing in contexts where other software reads it and
  28   *                                 decodes it, such as in an `<a href>` attribute.
  29   * @see self::get_unicode_domain() for a decoded version of the domain best suited for
  30   *                                 printing in contexts where humans read it, where any
  31   *                                 Unicode characters print as they are, not as punycode.
  32   *
  33   * @since 7.1.0
  34   */
  35  final class WP_Email_Address {
  36      /**
  37       * Regex for the local part when Unicode is not enabled.
  38       *
  39       * Matches the character set from the WHATWG email specification:
  40       * https://html.spec.whatwg.org/multipage/input.html#email-state-(type=email)
  41       *
  42       * @since 7.1.0
  43       * @var string
  44       */
  45      const LOCAL_PART_ASCII_REGEX = '/^[a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]+$/';
  46  
  47      /**
  48       * Regex for the local part when Unicode is enabled.
  49       *
  50       * Extends the WHATWG character set to allow Unicode letters and numbers,
  51       * and applies the same grapheme-cluster structure used for domain labels:
  52       * each cluster must open with a non-combining character.
  53       *
  54       * @since 7.1.0
  55       * @var string
  56       */
  57      const LOCAL_PART_UNICODE_REGEX = '/^([\p{L}\p{N}.!#$%&\'*+\/=?^_`{|}~-]\p{M}*)+$/u';
  58  
  59      /**
  60       * Pattern for a single ASCII domain label (no dot).
  61       *
  62       * Matches a label from the WHATWG email specification: starts and ends with
  63       * a letter or digit; internal characters may include hyphens.
  64       *
  65       * @since 7.1.0
  66       * @var string
  67       */
  68      const DOMAIN_LABEL_ASCII = '[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?';
  69  
  70      /**
  71       * Pattern for a single Unicode domain label (no dot).
  72       *
  73       * Extends the ASCII label pattern to allow Unicode letters and numbers,
  74       * with grapheme-cluster structure: each cluster must open with a letter or
  75       * digit (not a combining mark), followed by zero or more combining marks.
  76       *
  77       * @since 7.1.0
  78       * @var string
  79       */
  80      const DOMAIN_LABEL_UNICODE = '[\p{L}\p{N}]\p{M}*(?:(?:[\p{L}\p{N}-]\p{M}*)*[\p{L}\p{N}]\p{M}*)?';
  81  
  82      /**
  83       * Regex for the domain when Unicode is not enabled.
  84       *
  85       * Assembled from {@see self::DOMAIN_LABEL_ASCII}: one label, then zero or
  86       * more dot-separated labels.
  87       *
  88       * @since 7.1.0
  89       * @var string
  90       */
  91      const DOMAIN_ASCII_REGEX = '/^' . self::DOMAIN_LABEL_ASCII . '(?:\.' . self::DOMAIN_LABEL_ASCII . ')*$/';
  92  
  93      /**
  94       * Regex for the domain when Unicode is enabled.
  95       *
  96       * Assembled from {@see self::DOMAIN_LABEL_UNICODE}: one label, then zero or
  97       * more dot-prefixed labels.
  98       *
  99       * @since 7.1.0
 100       * @var string
 101       */
 102      const DOMAIN_UNICODE_REGEX = '/^' . self::DOMAIN_LABEL_UNICODE . '(?:\.' . self::DOMAIN_LABEL_UNICODE . ')*$/u';
 103  
 104      /**
 105       * The local part of the email address (the portion before the '@').
 106       *
 107       * @since 7.1.0
 108       * @var string
 109       */
 110      private $localpart;
 111  
 112      /**
 113       * The email domain using punycode transcription instead of Unicode characters.
 114       *
 115       * Example:
 116       *
 117       *     $email = WP_Email_Address::from_string( 'checkout@bücher.tld' );
 118       *     'xn--bcher-kva.tld' === $email->get_ascii_domain();
 119       *
 120       * @see self::$decoded_domain
 121       *
 122       * @since 7.1.0
 123       * @var string
 124       */
 125      private $encoded_domain;
 126  
 127      /**
 128       * The email domain, which may contain Unicode characters.
 129       *
 130       * Example:
 131       *
 132       *     $email = WP_Email_Address::from_string( 'checkout@bücher.tld' );
 133       *     'bücher.tld' === $email->get_unicode_domain();
 134       *
 135       * @see self::$encoded_domain
 136       *
 137       * @since 7.1.0
 138       * @var string
 139       */
 140      private $decoded_domain;
 141  
 142      /**
 143       * Private constructor. Use {@see WP_Email_Address::from_string()} to create instances.
 144       *
 145       * @since 7.1.0
 146       * @private
 147       *
 148       * @param string $localpart           The local part of the email address.
 149       * @param string $ascii_domain        The domain part of the email address, which may include punycode transcription.
 150       * @param string|null $unicode_domain The domain part of the email address, which may contain Unicode characters, or
 151       *                                    null if no Unicode translation exists.
 152       */
 153  	private function __construct( string $localpart, string $ascii_domain, ?string $unicode_domain ) {
 154          $this->localpart      = $localpart;
 155          $this->encoded_domain = $ascii_domain;
 156          $this->decoded_domain = $unicode_domain;
 157      }
 158  
 159      /**
 160       * Creates a WP_Email_Address from a string.
 161       *
 162       * This method is intended to accept all strings that are considered valid email
 163       * addresses by the WHATWG HTML specification for the `email` input type
 164       * {@link https://html.spec.whatwg.org/multipage/input.html#email-state-(type=email)}
 165       * and some additional addresses, while rejecting strings that are more likely to
 166       * be typos, mispastes, or attacks. This class may reject a few address that are
 167       * valid according to RFC 5322, but it always accepts an address if it's valid
 168       * according to WHATWG. Put differently: If users can type an address into the
 169       * major browsers of 2026, this class accepts them, if they can't (in 2026),
 170       * this class may or may not.
 171       *
 172       * Example:
 173       *
 174       *     // Typical all-US-ASCII email address.
 175       *     $email = WP_Email_Address::from_string( 'webmaster@example.com' );
 176       *     'webmaster'   === $email->get_localpart();
 177       *     'example.com' === $email->get_ascii_domain();
 178       *     'example.com' === $email->get_unicode_domain();
 179       *
 180       *     // Punycode domains are always decoded.
 181       *     $email = WP_Email_Address::from_string( 'books@xn--bcher-kva.de' );
 182       *     'books'            === $email->get_localpart();
 183       *     'xn--bcher-kva.de' === $email->get_ascii_domain();
 184       *     'Bücher.de'        === $email->get_unicode_domain();
 185       *
 186       *     // Unicode localparts are accepted if Unicode addresses are requested (the default).
 187       *     $email = WP_Email_Address::from_string( 'bücher@example.com' );
 188       *     'bücher' === $email->get_localpart();
 189       *
 190       *     // Addresses with non-ASCII are rejected if ASCII-only addresses are requested.
 191       *     null === WP_Email_Address::from_string( 'books@xn--bcher-kva.de', 'ascii' );
 192       *     null === WP_Email_Address::from_string( 'bücher@xn--bcher-kva.de', 'ascii' );
 193       *     null === WP_Email_Address::from_string( 'bücher@Bücher.de', 'ascii' );
 194       *
 195       *     // Some valid addresses (according to RFC 5322) are rejected.
 196       *     null === WP_Email_Address::from_string( '"<iframe src=...>"@example.com' );
 197       *
 198       * Note! If an address contains punycode encodings but the required {@see idn_to_utf8()}
 199       * function is missing (from the `intl` extension), this will reject that email address.
 200       *
 201       * @since 7.1.0
 202       *
 203       * @param string            $input         The email address string to parse.
 204       * @param 'ascii'|'unicode' $character_set Allow only ASCII addresses or all valid Unicode addresses.
 205       * @return WP_Email_Address|null A WP_Email_Address instance, or null if the input fails to validate.
 206       */
 207  	public static function from_string( string $input, string $character_set = 'unicode' ): ?WP_Email_Address {
 208          // There must be exactly one '@' sign.
 209          $at_pos = strpos( $input, '@' );
 210          if ( false === $at_pos || strrpos( $input, '@' ) !== $at_pos ) {
 211              return null;
 212          }
 213  
 214          $allow_unicode  = 'unicode' === $character_set;
 215          $localpart      = substr( $input, 0, $at_pos );
 216          $ascii_domain   = substr( $input, $at_pos + 1 );
 217          $domain_labels  = explode( '.', $ascii_domain );
 218          $local_pattern  = $allow_unicode ? self::LOCAL_PART_UNICODE_REGEX : self::LOCAL_PART_ASCII_REGEX;
 219          $domain_pattern = $allow_unicode ? self::DOMAIN_UNICODE_REGEX : self::DOMAIN_ASCII_REGEX;
 220  
 221          foreach ( $domain_labels as $label ) {
 222              // DNS limits each label to 63 octets.
 223              if ( strlen( $label ) > 63 ) {
 224                  return null;
 225              }
 226          }
 227  
 228          /*
 229           * Without support for decoding punycode it’s not possible to validate
 230           * the email address, so abort if any domain labels require decoding.
 231           *
 232           * The pattern detects `xn--` prefixes and invalid ACE prefixes.
 233           */
 234          $needs_decoding = 1 === preg_match( '/(?:^|\.)..--/', $ascii_domain );
 235          if ( $needs_decoding && ! function_exists( 'idn_to_utf8' ) ) {
 236              return null;
 237          }
 238  
 239          /*
 240           * Validate each domain label, decode any punycode to UTF-8, and
 241           * reassemble the decoded labels into the local $domain variable.
 242           */
 243          if ( $needs_decoding ) {
 244              $decoded_labels = array();
 245              foreach ( $domain_labels as $label ) {
 246                  // Decode punycode labels to their Unicode form for further validation.
 247                  if ( str_starts_with( $label, 'xn--' ) ) {
 248                      $label = idn_to_utf8( $label, IDNA_DEFAULT, INTL_IDNA_VARIANT_UTS46 );
 249                      if ( false === $label ) {
 250                          return null;
 251                      }
 252                  } elseif ( 1 === preg_match( '/^..--/', $label ) ) {
 253                      // Reject labels with a reserved ACE-like prefix (two chars followed by '--').
 254                      return null;
 255                  }
 256                  $decoded_labels[] = $label;
 257              }
 258              $decoded_domain = implode( '.', $decoded_labels );
 259          } else {
 260              $decoded_domain = $ascii_domain;
 261          }
 262  
 263          // Without Unicode support, reject any non-ASCII byte in either part.
 264          if (
 265              ! $allow_unicode &&
 266              (
 267                  1 === preg_match( '/[\x80-\xff]/', $input ) ||
 268                  1 === preg_match( '/[\x80-\xff]/', $decoded_domain )
 269              )
 270          ) {
 271              return null;
 272          }
 273  
 274          // All parts must be valid UTF-8, regardless of whether Unicode is requested. (A valid ASCII string is also valid UTF-8.)
 275          if (
 276              ! wp_is_valid_utf8( $localpart ) ||
 277              ! wp_is_valid_utf8( $ascii_domain ) ||
 278              ! wp_is_valid_utf8( $decoded_domain )
 279          ) {
 280              return null;
 281          }
 282  
 283          // Validate the local part against the allowed character set.
 284          if ( 1 !== preg_match( $local_pattern, $localpart ) ) {
 285              /** This filter is documented in wp-includes/formatting.php */
 286              if ( ! apply_filters( 'is_email', false, $input, 'local_invalid_chars' ) ) {
 287                  return null;
 288              }
 289          }
 290  
 291          // The domain must contain at least one dot.
 292          if ( ! str_contains( $ascii_domain, '.' ) ) {
 293              /** This filter is documented in wp-includes/formatting.php */
 294              if ( ! apply_filters( 'is_email', false, $input, 'domain_no_periods' ) ) {
 295                  return null;
 296              }
 297          }
 298  
 299          // Validate the domain against the allowed structure.
 300          if ( 1 !== preg_match( $domain_pattern, $decoded_domain ) ) {
 301              return null;
 302          }
 303  
 304          return new self( $localpart, $ascii_domain, $decoded_domain );
 305      }
 306  
 307      /**
 308       * Returns the local part of the email address (the portion before the '@').
 309       *
 310       * Example:
 311       *
 312       *     $email = WP_Email_Address::from_string( 'checkout@bücher.tld' );
 313       *     'checkout' === $email->get_localpart();
 314       *
 315       * @since 7.1.0
 316       *
 317       * @return string The local part of the email address.
 318       */
 319  	public function get_localpart(): string {
 320          return $this->localpart;
 321      }
 322  
 323      /**
 324       * Returns the ASCII form of the domain, suitable for contexts in which
 325       * other software will be reading and decoding it. May contain punycode.
 326       *
 327       * Example:
 328       *
 329       *     $email = WP_Email_Address::from_string( 'checkout@bücher.tld' );
 330       *     'xn--bcher-kva.tld' === $email->get_ascii_domain();
 331       *
 332       * Note! Do not mix a Unicode local part with an ASCII domain part.
 333       *       Prefer to keep the entire address in one form.
 334       *
 335       * @see self::get_unicode_domain()
 336       *
 337       * @return string Form of domain for machines, potentially containing
 338       *                punycode translation of Unicode characters.
 339       */
 340  	public function get_ascii_domain(): string {
 341          return $this->encoded_domain;
 342      }
 343  
 344      /**
 345       * Returns the Unicode form of the domain, suitable for contexts in which
 346       * humans will be reading it. May contain Unicode characters.
 347       *
 348       * Example:
 349       *
 350       *     $email = WP_Email_Address::from_string( 'checkout@bücher.tld' );
 351       *     'bücher.tld' === $email->get_unicode_domain();
 352       *
 353       * Note! Do not mix a Unicode local part with an ASCII domain part.
 354       *       Prefer to keep the entire address in one form.
 355       *
 356       * @see self::get_ascii_domain()
 357       *
 358       * @since 7.1.0
 359       *
 360       * @return string The domain part of the email address.
 361       */
 362  	public function get_unicode_domain(): string {
 363          return $this->decoded_domain;
 364      }
 365  
 366      /**
 367       * Returns the complete email address for contexts in which software
 368       * will read it; may contain punycode transliterated Unicode characters.
 369       *
 370       * Use this method in places such as an `<a href>` attribute where other
 371       * software will decode the address.
 372       *
 373       * The returned value can always be passed to {@see WP_Email_Address::from_string()}
 374       * and will produce an equivalent WP_Email_Address instance.
 375       *
 376       * @see self::get_unicode_address()
 377       *
 378       * @since 7.1.0
 379       *
 380       * @return string
 381       */
 382  	public function get_ascii_address(): string {
 383          return $this->localpart . '@' . $this->encoded_domain;
 384      }
 385  
 386      /**
 387       * Returns the complete email address for contexts in which humans
 388       * will read it; may contain Unicode characters in the domain.
 389       *
 390       * Use this method in places such as HTML text nodes which visually
 391       * show the email address and domain.
 392       *
 393       * The returned value can always be passed to {@see WP_Email_Address::from_string()}
 394       * and will produce an equivalent WP_Email_Address instance.
 395       *
 396       * @see self::get_ascii_address()
 397       *
 398       * @since 7.1.0
 399       *
 400       * @return string The complete email address.
 401       */
 402  	public function get_unicode_address(): string {
 403          return $this->localpart . '@' . $this->decoded_domain;
 404      }
 405  }


Generated : Sun Jun 14 08:20:09 2026 Cross-referenced by PHPXref