3 * To validate an email address according to RFCs 5321, 5322 and others
5 * Copyright © 2008-2011, Dominic Sayers
6 * Test schema documentation Copyright © 2011, Daniel Marschall
9 * Redistribution and use in source and binary forms, with or without modification,
10 * are permitted provided that the following conditions are met:
12 * - Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 * - Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
17 * - Neither the name of Dominic Sayers nor the names of its contributors may be
18 * used to endorse or promote products derived from this software without
19 * specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
28 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 * @author Dominic Sayers <dominic@sayers.cc>
34 * @copyright 2008-2011 Dominic Sayers
35 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
36 * @link http://www.dominicsayers.com/isemail
37 * @version 3.04.1 - Changed my link to http://isemail.info throughout
40 // The quality of this code has been improved greatly by using PHPLint
41 // Copyright (c) 2010 Umberto Salsi
42 // This is free software; see the license for copying conditions.
43 // More info: http://www.icosaedro.it/phplint/
45 require_module 'standard';
46 require_module 'pcre';
49 if (!defined('ISEMAIL_VALID')) {
50 /*:diagnostic constants start:*/
51 // This part of the code is generated using data from test/meta.xml. Beware of making manual alterations
53 define('ISEMAIL_VALID_CATEGORY', 1);
54 define('ISEMAIL_DNSWARN', 7);
55 define('ISEMAIL_RFC5321', 15);
56 define('ISEMAIL_CFWS', 31);
57 define('ISEMAIL_DEPREC', 63);
58 define('ISEMAIL_RFC5322', 127);
59 define('ISEMAIL_ERR', 255);
63 define('ISEMAIL_VALID', 0);
64 // Address is valid but a DNS check was not successful
65 define('ISEMAIL_DNSWARN_NO_MX_RECORD', 5);
66 define('ISEMAIL_DNSWARN_NO_RECORD', 6);
67 // Address is valid for SMTP but has unusual elements
68 define('ISEMAIL_RFC5321_TLD', 9);
69 define('ISEMAIL_RFC5321_TLDNUMERIC', 10);
70 define('ISEMAIL_RFC5321_QUOTEDSTRING', 11);
71 define('ISEMAIL_RFC5321_ADDRESSLITERAL', 12);
72 define('ISEMAIL_RFC5321_IPV6DEPRECATED', 13);
73 // Address is valid within the message but cannot be used unmodified for the envelope
74 define('ISEMAIL_CFWS_COMMENT', 17);
75 define('ISEMAIL_CFWS_FWS', 18);
76 // Address contains deprecated elements but may still be valid in restricted contexts
77 define('ISEMAIL_DEPREC_LOCALPART', 33);
78 define('ISEMAIL_DEPREC_FWS', 34);
79 define('ISEMAIL_DEPREC_QTEXT', 35);
80 define('ISEMAIL_DEPREC_QP', 36);
81 define('ISEMAIL_DEPREC_COMMENT', 37);
82 define('ISEMAIL_DEPREC_CTEXT', 38);
83 define('ISEMAIL_DEPREC_CFWS_NEAR_AT', 49);
84 // The address is only valid according to the broad definition of RFC 5322. It is otherwise invalid.
85 define('ISEMAIL_RFC5322_DOMAIN', 65);
86 define('ISEMAIL_RFC5322_TOOLONG', 66);
87 define('ISEMAIL_RFC5322_LOCAL_TOOLONG', 67);
88 define('ISEMAIL_RFC5322_DOMAIN_TOOLONG', 68);
89 define('ISEMAIL_RFC5322_LABEL_TOOLONG', 69);
90 define('ISEMAIL_RFC5322_DOMAINLITERAL', 70);
91 define('ISEMAIL_RFC5322_DOMLIT_OBSDTEXT', 71);
92 define('ISEMAIL_RFC5322_IPV6_GRPCOUNT', 72);
93 define('ISEMAIL_RFC5322_IPV6_2X2XCOLON', 73);
94 define('ISEMAIL_RFC5322_IPV6_BADCHAR', 74);
95 define('ISEMAIL_RFC5322_IPV6_MAXGRPS', 75);
96 define('ISEMAIL_RFC5322_IPV6_COLONSTRT', 76);
97 define('ISEMAIL_RFC5322_IPV6_COLONEND', 77);
98 // Address is invalid for any purpose
99 define('ISEMAIL_ERR_EXPECTING_DTEXT', 129);
100 define('ISEMAIL_ERR_NOLOCALPART', 130);
101 define('ISEMAIL_ERR_NODOMAIN', 131);
102 define('ISEMAIL_ERR_CONSECUTIVEDOTS', 132);
103 define('ISEMAIL_ERR_ATEXT_AFTER_CFWS', 133);
104 define('ISEMAIL_ERR_ATEXT_AFTER_QS', 134);
105 define('ISEMAIL_ERR_ATEXT_AFTER_DOMLIT', 135);
106 define('ISEMAIL_ERR_EXPECTING_QPAIR', 136);
107 define('ISEMAIL_ERR_EXPECTING_ATEXT', 137);
108 define('ISEMAIL_ERR_EXPECTING_QTEXT', 138);
109 define('ISEMAIL_ERR_EXPECTING_CTEXT', 139);
110 define('ISEMAIL_ERR_BACKSLASHEND', 140);
111 define('ISEMAIL_ERR_DOT_START', 141);
112 define('ISEMAIL_ERR_DOT_END', 142);
113 define('ISEMAIL_ERR_DOMAINHYPHENSTART', 143);
114 define('ISEMAIL_ERR_DOMAINHYPHENEND', 144);
115 define('ISEMAIL_ERR_UNCLOSEDQUOTEDSTR', 145);
116 define('ISEMAIL_ERR_UNCLOSEDCOMMENT', 146);
117 define('ISEMAIL_ERR_UNCLOSEDDOMLIT', 147);
118 define('ISEMAIL_ERR_FWS_CRLF_X2', 148);
119 define('ISEMAIL_ERR_FWS_CRLF_END', 149);
120 define('ISEMAIL_ERR_CR_NO_LF', 150);
121 // End of generated code
122 /*:diagnostic constants end:*/
125 define('ISEMAIL_THRESHOLD', 16);
128 define('ISEMAIL_COMPONENT_LOCALPART', 0);
129 define('ISEMAIL_COMPONENT_DOMAIN', 1);
130 define('ISEMAIL_COMPONENT_LITERAL', 2);
131 define('ISEMAIL_CONTEXT_COMMENT', 3);
132 define('ISEMAIL_CONTEXT_FWS', 4);
133 define('ISEMAIL_CONTEXT_QUOTEDSTRING', 5);
134 define('ISEMAIL_CONTEXT_QUOTEDPAIR', 6);
136 // Miscellaneous string constants
137 define('ISEMAIL_STRING_AT', '@');
138 define('ISEMAIL_STRING_BACKSLASH', '\\');
139 define('ISEMAIL_STRING_DOT', '.');
140 define('ISEMAIL_STRING_DQUOTE', '"');
141 define('ISEMAIL_STRING_OPENPARENTHESIS', '(');
142 define('ISEMAIL_STRING_CLOSEPARENTHESIS', ')');
143 define('ISEMAIL_STRING_OPENSQBRACKET', '[');
144 define('ISEMAIL_STRING_CLOSESQBRACKET', ']');
145 define('ISEMAIL_STRING_HYPHEN', '-');
146 define('ISEMAIL_STRING_COLON', ':');
147 define('ISEMAIL_STRING_DOUBLECOLON', '::');
148 define('ISEMAIL_STRING_SP', ' ');
149 define('ISEMAIL_STRING_HTAB', "\t");
150 define('ISEMAIL_STRING_CR', "\r");
151 define('ISEMAIL_STRING_LF', "\n");
152 define('ISEMAIL_STRING_IPV6TAG', 'IPv6:');
153 // US-ASCII visible characters not valid for atext (http://tools.ietf.org/html/rfc5322#section-3.2.3)
154 define('ISEMAIL_STRING_SPECIALS', '()<>[]:;@\\,."');
158 * Check that an email address conforms to RFCs 5321, 5322 and others
160 * As of Version 3.0, we are now distinguishing clearly between a Mailbox
161 * as defined by RFC 5321 and an addr-spec as defined by RFC 5322. Depending
162 * on the context, either can be regarded as a valid email address. The
163 * RFC 5321 Mailbox specification is more restrictive (comments, white space
164 * and obsolete forms are not allowed)
166 * @param string $email The email address to check
167 * @param boolean $checkDNS If true then a DNS check for MX records will be made
168 * @param mixed $errorlevel Determines the boundary between valid and invalid addresses.
169 * Status codes above this number will be returned as-is,
170 * status codes below will be returned as ISEMAIL_VALID. Thus the
171 * calling program can simply look for ISEMAIL_VALID if it is
172 * only interested in whether an address is valid or not. The
173 * errorlevel will determine how "picky" is_email() is about
176 * If omitted or passed as false then is_email() will return
177 * true or false rather than an integer error or warning.
179 * NB Note the difference between $errorlevel = false and
181 * @param array $parsedata If passed, returns the parsed address components
183 /*.mixed.*/ function is_email($email, $checkDNS = false, $errorlevel = false, &$parsedata = array()) {
184 // Check that $email is a valid address. Read the following RFCs to understand the constraints:
185 // (http://tools.ietf.org/html/rfc5321)
186 // (http://tools.ietf.org/html/rfc5322)
187 // (http://tools.ietf.org/html/rfc4291#section-2.2)
188 // (http://tools.ietf.org/html/rfc1123#section-2.1)
189 // (http://tools.ietf.org/html/rfc3696) (guidance only)
190 // version 2.0: Enhance $diagnose parameter to $errorlevel
191 // version 3.0: Introduced status categories
192 // revision 3.1: BUG: $parsedata was passed by value instead of by reference
194 if (is_bool($errorlevel)) {
195 $threshold = ISEMAIL_VALID
;
196 $diagnose = (bool) $errorlevel;
200 switch ((int) $errorlevel) {
202 $threshold = ISEMAIL_THRESHOLD
;
203 break; // For backward compatibility
205 $threshold = ISEMAIL_VALID
;
206 break; // For backward compatibility
208 $threshold = (int) $errorlevel;
212 $return_status = array(ISEMAIL_VALID
);
214 // Parse the address into components, character by character
215 $raw_length = strlen($email);
216 $context = ISEMAIL_COMPONENT_LOCALPART
; // Where we are
217 $context_stack = array($context); // Where we have been
218 $context_prior = ISEMAIL_COMPONENT_LOCALPART
; // Where we just came from
219 $token = ''; // The current character
220 $token_prior = ''; // The previous character
222 ISEMAIL_COMPONENT_LOCALPART
=> '',
223 ISEMAIL_COMPONENT_DOMAIN
=> ''
224 ); // For the components of the address
227 ISEMAIL_COMPONENT_LOCALPART
=> array(''),
228 ISEMAIL_COMPONENT_DOMAIN
=> array('')
229 ); // For the dot-atom elements of the address
232 $hyphen_flag = false; // Hyphen cannot occur at the end of a subdomain
233 $end_or_die = false; // CFWS can only appear at the end of the element
235 //-echo "<table style=\"clear:left;\">"; // debug
236 for ($i = 0; $i < $raw_length; $i++
) {
238 //-echo "<tr><td><strong>$context|",(($end_or_die) ? 'true' : 'false'),
239 //"|$token|" . max($return_status) . "</strong></td>"; // debug
242 //-------------------------------------------------------------
244 //-------------------------------------------------------------
245 case ISEMAIL_COMPONENT_LOCALPART
:
246 // http://tools.ietf.org/html/rfc5322#section-3.4.1
247 // local-part = dot-atom / quoted-string / obs-local-part
249 // dot-atom = [CFWS] dot-atom-text [CFWS]
251 // dot-atom-text = 1*atext *("." 1*atext)
253 // quoted-string = [CFWS]
254 // DQUOTE *([FWS] qcontent) [FWS] DQUOTE
257 // obs-local-part = word *("." word)
259 // word = atom / quoted-string
261 // atom = [CFWS] 1*atext [CFWS]
264 case ISEMAIL_STRING_OPENPARENTHESIS
:
265 if ($element_len === 0) {
266 // Comments are OK at the beginning of an element
267 $return_status[] = ($element_count === 0) ?
268 ISEMAIL_CFWS_COMMENT
: ISEMAIL_DEPREC_COMMENT
;
270 $return_status[] = ISEMAIL_CFWS_COMMENT
;
272 // We can't start a comment in the middle of an element, so this better be the end
274 $context_stack[] = $context;
275 $context = ISEMAIL_CONTEXT_COMMENT
;
277 // Next dot-atom element
278 case ISEMAIL_STRING_DOT
:
279 if ($element_len === 0) {
280 // Another dot, already?
282 $return_status[] = ($element_count === 0)
283 ? ISEMAIL_ERR_DOT_START
: ISEMAIL_ERR_CONSECUTIVEDOTS
;
285 // The entire local-part can be a quoted string for RFC 5321
286 // If it's just one atom that is quoted then it's an RFC 5322 obsolete form
288 $return_status[] = ISEMAIL_DEPREC_LOCALPART
;
291 // CFWS & quoted strings are OK again now we're at the beginning of an element
292 // (although they are obsolete forms)
296 $parsedata[ISEMAIL_COMPONENT_LOCALPART
] .= $token;
297 $atomlist[ISEMAIL_COMPONENT_LOCALPART
][$element_count] = '';
300 case ISEMAIL_STRING_DQUOTE
:
301 if ($element_len === 0) {
302 // The entire local-part can be a quoted string for RFC 5321
303 // If it's just one atom that is quoted then it's an RFC 5322 obsolete form
304 $return_status[] = ($element_count === 0)
305 ? ISEMAIL_RFC5321_QUOTEDSTRING
: ISEMAIL_DEPREC_LOCALPART
;
307 $parsedata[ISEMAIL_COMPONENT_LOCALPART
] .= $token;
308 $atomlist[ISEMAIL_COMPONENT_LOCALPART
][$element_count] .= $token;
310 $end_or_die = true; // Quoted string must be the entire element
311 $context_stack[] = $context;
312 $context = ISEMAIL_CONTEXT_QUOTEDSTRING
;
314 $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT
; // Fatal error
318 // Folding White Space
319 case ISEMAIL_STRING_CR
:
320 case ISEMAIL_STRING_SP
:
321 case ISEMAIL_STRING_HTAB
:
322 if (($token === ISEMAIL_STRING_CR
)
323 && ((++
$i === $raw_length) ||
($email[$i] !== ISEMAIL_STRING_LF
))) {
324 $return_status[] = ISEMAIL_ERR_CR_NO_LF
;
328 if ($element_len === 0) {
329 $return_status[] = ($element_count === 0) ? ISEMAIL_CFWS_FWS
: ISEMAIL_DEPREC_FWS
;
331 // We can't start FWS in the middle of an element, so this better be the end
335 $context_stack[] = $context;
336 $context = ISEMAIL_CONTEXT_FWS
;
337 $token_prior = $token;
341 case ISEMAIL_STRING_AT
:
342 // At this point we should have a valid local-part
343 if (count($context_stack) !== 1) {
344 die('Unexpected item on context stack');
347 if ($parsedata[ISEMAIL_COMPONENT_LOCALPART
] === '') {
348 $return_status[] = ISEMAIL_ERR_NOLOCALPART
; // Fatal error
349 } elseif ($element_len === 0) {
350 $return_status[] = ISEMAIL_ERR_DOT_END
; // Fatal error
351 } elseif (strlen($parsedata[ISEMAIL_COMPONENT_LOCALPART
]) > 64) {
352 // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.1
353 // The maximum total length of a user name or other local-part is 64
355 $return_status[] = ISEMAIL_RFC5322_LOCAL_TOOLONG
;
356 } elseif (($context_prior === ISEMAIL_CONTEXT_COMMENT
) ||
($context_prior === ISEMAIL_CONTEXT_FWS
)) {
357 // http://tools.ietf.org/html/rfc5322#section-3.4.1
358 // Comments and folding white space
359 // SHOULD NOT be used around the "@" in the addr-spec.
361 // http://tools.ietf.org/html/rfc2119
362 // 4. SHOULD NOT This phrase, or the phrase "NOT RECOMMENDED" mean that
363 // there may exist valid reasons in particular circumstances when the
364 // particular behavior is acceptable or even useful, but the full
365 // implications should be understood and the case carefully weighed
366 // before implementing any behavior described with this label.
367 $return_status[] = ISEMAIL_DEPREC_CFWS_NEAR_AT
;
369 // Clear everything down for the domain parsing
370 $context = ISEMAIL_COMPONENT_DOMAIN
; // Where we are
371 $context_stack = array($context); // Where we have been
374 $end_or_die = false; // CFWS can only appear at the end of the element
379 // http://tools.ietf.org/html/rfc5322#section-3.2.3
380 // atext = ALPHA / DIGIT / ; Printable US-ASCII
381 // "!" / "#" / ; characters not including
382 // "$" / "%" / ; specials. Used for atoms.
392 // We have encountered atext where it is no longer valid
393 switch ($context_prior) {
394 case ISEMAIL_CONTEXT_COMMENT
:
395 case ISEMAIL_CONTEXT_FWS
:
396 $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_CFWS
;
398 case ISEMAIL_CONTEXT_QUOTEDSTRING
:
399 $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_QS
;
402 die("More atext found where none is allowed, but unrecognised prior context: $context_prior");
405 $context_prior = $context;
407 if (($ord < 33) ||
($ord > 126) ||
($ord === 10)
408 ||
(!is_bool(strpos(ISEMAIL_STRING_SPECIALS
, $token)))) {
409 $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT
; // Fatal error
412 $parsedata[ISEMAIL_COMPONENT_LOCALPART
] .= $token;
413 $atomlist[ISEMAIL_COMPONENT_LOCALPART
][$element_count] .= $token;
418 //-------------------------------------------------------------
420 //-------------------------------------------------------------
421 case ISEMAIL_COMPONENT_DOMAIN
:
422 // http://tools.ietf.org/html/rfc5322#section-3.4.1
423 // domain = dot-atom / domain-literal / obs-domain
425 // dot-atom = [CFWS] dot-atom-text [CFWS]
427 // dot-atom-text = 1*atext *("." 1*atext)
429 // domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
431 // dtext = %d33-90 / ; Printable US-ASCII
432 // %d94-126 / ; characters not including
433 // obs-dtext ; "[", "]", or "\"
435 // obs-domain = atom *("." atom)
437 // atom = [CFWS] 1*atext [CFWS]
440 // http://tools.ietf.org/html/rfc5321#section-4.1.2
441 // Mailbox = Local-part "@" ( Domain / address-literal )
443 // Domain = sub-domain *("." sub-domain)
445 // address-literal = "[" ( IPv4-address-literal /
446 // IPv6-address-literal /
447 // General-address-literal ) "]"
448 // ; See Section 4.1.3
450 // http://tools.ietf.org/html/rfc5322#section-3.4.1
451 // Note: A liberal syntax for the domain portion of addr-spec is
452 // given here. However, the domain portion contains addressing
453 // information specified by and used in other protocols (e.g.,
454 // [RFC1034], [RFC1035], [RFC1123], [RFC5321]). It is therefore
455 // incumbent upon implementations to conform to the syntax of
456 // addresses for the context in which they are used.
457 // is_email() author's note: it's not clear how to interpret this in
458 // the context of a general email address validator. The conclusion I
459 // have reached is this: "addressing information" must comply with
460 // RFC 5321 (and in turn RFC 1035), anything that is "semantically
461 // invisible" must comply only with RFC 5322.
464 case ISEMAIL_STRING_OPENPARENTHESIS
:
465 if ($element_len === 0) {
466 // Comments at the start of the domain are deprecated in the text
467 // Comments at the start of a subdomain are obs-domain
468 // (http://tools.ietf.org/html/rfc5322#section-3.4.1)
469 $return_status[] = ($element_count === 0) ?
470 ISEMAIL_DEPREC_CFWS_NEAR_AT
: ISEMAIL_DEPREC_COMMENT
;
472 $return_status[] = ISEMAIL_CFWS_COMMENT
;
473 // We can't start a comment in the middle of an element, so this better be the end
477 $context_stack[] = $context;
478 $context = ISEMAIL_CONTEXT_COMMENT
;
480 // Next dot-atom element
481 case ISEMAIL_STRING_DOT
:
482 if ($element_len === 0) {
483 // Another dot, already?
484 $return_status[] = ($element_count === 0) ?
485 ISEMAIL_ERR_DOT_START
: ISEMAIL_ERR_CONSECUTIVEDOTS
; // Fatal error
486 } elseif ($hyphen_flag) {
487 // Previous subdomain ended in a hyphen
488 $return_status[] = ISEMAIL_ERR_DOMAINHYPHENEND
; // Fatal error
490 // Nowhere in RFC 5321 does it say explicitly that the
491 // domain part of a Mailbox must be a valid domain according
492 // to the DNS standards set out in RFC 1035, but this *is*
493 // implied in several places. For instance, wherever the idea
494 // of host routing is discussed the RFC says that the domain
495 // must be looked up in the DNS. This would be nonsense unless
496 // the domain was designed to be a valid DNS domain. Hence we
497 // must conclude that the RFC 1035 restriction on label length
498 // also applies to RFC 5321 domains.
500 // http://tools.ietf.org/html/rfc1035#section-2.3.4
501 // labels 63 octets or less
502 if ($element_len > 63) {
503 $return_status[] = ISEMAIL_RFC5322_LABEL_TOOLONG
;
506 // CFWS is OK again now we're at the beginning of an element
507 // (although it may be obsolete CFWS)
511 $atomlist[ISEMAIL_COMPONENT_DOMAIN
][$element_count] = '';
512 $parsedata[ISEMAIL_COMPONENT_DOMAIN
] .= $token;
516 case ISEMAIL_STRING_OPENSQBRACKET
:
517 if ($parsedata[ISEMAIL_COMPONENT_DOMAIN
] === '') {
518 $end_or_die = true; // Domain literal must be the only component
520 $context_stack[] = $context;
521 $context = ISEMAIL_COMPONENT_LITERAL
;
522 $parsedata[ISEMAIL_COMPONENT_DOMAIN
] .= $token;
523 $atomlist[ISEMAIL_COMPONENT_DOMAIN
][$element_count] .= $token;
524 $parsedata[ISEMAIL_COMPONENT_LITERAL
] = '';
526 $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT
; // Fatal error
530 // Folding White Space
531 case ISEMAIL_STRING_CR
:
532 case ISEMAIL_STRING_SP
:
533 case ISEMAIL_STRING_HTAB
:
534 if (($token === ISEMAIL_STRING_CR
) && ((++
$i === $raw_length)
535 ||
($email[$i] !== ISEMAIL_STRING_LF
))) {
536 $return_status[] = ISEMAIL_ERR_CR_NO_LF
;
540 if ($element_len === 0) {
541 $return_status[] = ($element_count === 0) ?
542 ISEMAIL_DEPREC_CFWS_NEAR_AT
: ISEMAIL_DEPREC_FWS
;
544 $return_status[] = ISEMAIL_CFWS_FWS
;
545 // We can't start FWS in the middle of an element, so this better be the end
549 $context_stack[] = $context;
550 $context = ISEMAIL_CONTEXT_FWS
;
551 $token_prior = $token;
555 // RFC 5322 allows any atext...
556 // http://tools.ietf.org/html/rfc5322#section-3.2.3
557 // atext = ALPHA / DIGIT / ; Printable US-ASCII
558 // "!" / "#" / ; characters not including
559 // "$" / "%" / ; specials. Used for atoms.
569 // But RFC 5321 only allows letter-digit-hyphen to comply with DNS rules (RFCs 1034 & 1123)
570 // http://tools.ietf.org/html/rfc5321#section-4.1.2
571 // sub-domain = Let-dig [Ldh-str]
573 // Let-dig = ALPHA / DIGIT
575 // Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig
578 // We have encountered atext where it is no longer valid
579 switch ($context_prior) {
580 case ISEMAIL_CONTEXT_COMMENT
:
581 case ISEMAIL_CONTEXT_FWS
:
582 $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_CFWS
;
584 case ISEMAIL_COMPONENT_LITERAL
:
585 $return_status[] = ISEMAIL_ERR_ATEXT_AFTER_DOMLIT
;
588 die("More atext found where none is allowed, but unrecognised prior context: $context_prior");
593 $hyphen_flag = false; // Assume this token isn't a hyphen unless we discover it is
595 if (($ord < 33) ||
($ord > 126) ||
(!is_bool(strpos(ISEMAIL_STRING_SPECIALS
, $token)))) {
596 $return_status[] = ISEMAIL_ERR_EXPECTING_ATEXT
; // Fatal error
597 } elseif ($token === ISEMAIL_STRING_HYPHEN
) {
598 if ($element_len === 0) {
599 // Hyphens can't be at the beginning of a subdomain
600 $return_status[] = ISEMAIL_ERR_DOMAINHYPHENSTART
; // Fatal error
604 } elseif (!(($ord > 47 && $ord < 58)
605 ||
($ord > 64 && $ord < 91)
606 ||
($ord > 96 && $ord < 123))) {
607 // Not an RFC 5321 subdomain, but still OK by RFC 5322
608 $return_status[] = ISEMAIL_RFC5322_DOMAIN
;
611 $parsedata[ISEMAIL_COMPONENT_DOMAIN
] .= $token;
612 $atomlist[ISEMAIL_COMPONENT_DOMAIN
][$element_count] .= $token;
616 //-------------------------------------------------------------
618 //-------------------------------------------------------------
619 case ISEMAIL_COMPONENT_LITERAL
:
620 // http://tools.ietf.org/html/rfc5322#section-3.4.1
621 // domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
623 // dtext = %d33-90 / ; Printable US-ASCII
624 // %d94-126 / ; characters not including
625 // obs-dtext ; "[", "]", or "\"
627 // obs-dtext = obs-NO-WS-CTL / quoted-pair
629 // End of domain literal
630 case ISEMAIL_STRING_CLOSESQBRACKET
:
631 if ((int) max($return_status) < ISEMAIL_DEPREC
) {
632 // Could be a valid RFC 5321 address literal, so let's check
634 // http://tools.ietf.org/html/rfc5321#section-4.1.2
635 // address-literal = "[" ( IPv4-address-literal /
636 // IPv6-address-literal /
637 // General-address-literal ) "]"
638 // ; See Section 4.1.3
640 // http://tools.ietf.org/html/rfc5321#section-4.1.3
641 // IPv4-address-literal = Snum 3("." Snum)
643 // IPv6-address-literal = "IPv6:" IPv6-addr
645 // General-address-literal = Standardized-tag ":" 1*dcontent
647 // Standardized-tag = Ldh-str
648 // ; Standardized-tag MUST be specified in a
649 // ; Standards-Track RFC and registered with IANA
651 // dcontent = %d33-90 / ; Printable US-ASCII
652 // %d94-126 ; excl. "[", "\", "]"
655 // ; representing a decimal integer
656 // ; value in the range 0 through 255
658 // IPv6-addr = IPv6-full / IPv6-comp / IPv6v4-full / IPv6v4-comp
660 // IPv6-hex = 1*4HEXDIG
662 // IPv6-full = IPv6-hex 7(":" IPv6-hex)
664 // IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] "::"
665 // [IPv6-hex *5(":" IPv6-hex)]
666 // ; The "::" represents at least 2 16-bit groups of
667 // ; zeros. No more than 6 groups in addition to the
668 // ; "::" may be present.
670 // IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal
672 // IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::"
673 // [IPv6-hex *3(":" IPv6-hex) ":"]
674 // IPv4-address-literal
675 // ; The "::" represents at least 2 16-bit groups of
676 // ; zeros. No more than 4 groups in addition to the
677 // ; "::" and IPv4-address-literal may be present.
679 // is_email() author's note: We can't use ip2long() to validate
680 // IPv4 addresses because it accepts abbreviated addresses
681 // (xxx.xxx.xxx), expanding the last group to complete the address.
682 // filter_var() validates IPv6 address inconsistently (up to PHP 5.3.3
683 // at least) -- see http://bugs.php.net/bug.php?id=53236 for example
685 $matchesIP = array();
686 /*.mixed.*/ $index = false;
687 $addressliteral = $parsedata[ISEMAIL_COMPONENT_LITERAL
];
689 // Extract IPv4 part from the end of the address-literal (if there is one)
691 '/\\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/',
695 $index = strrpos($addressliteral, $matchesIP[0]);
697 // Convert IPv4 part to IPv6 format for further testing
698 $addressliteral = substr($addressliteral, 0, $index) . '0:0';
703 // Nothing there except a valid IPv4 address, so...
704 $return_status[] = ISEMAIL_RFC5321_ADDRESSLITERAL
;
705 } elseif (strncasecmp($addressliteral, ISEMAIL_STRING_IPV6TAG
, 5) !== 0) {
706 $return_status[] = ISEMAIL_RFC5322_DOMAINLITERAL
;
708 $IPv6 = substr($addressliteral, 5);
709 // Revision 2.7: Daniel Marschall's new IPv6 testing strategy
710 $matchesIP = explode(ISEMAIL_STRING_COLON
, $IPv6);
711 $groupCount = count($matchesIP);
712 $index = strpos($IPv6, ISEMAIL_STRING_DOUBLECOLON
);
714 if ($index === false) {
715 // We need exactly the right number of groups
716 if ($groupCount !== $max_groups) {
717 $return_status[] = ISEMAIL_RFC5322_IPV6_GRPCOUNT
;
720 if ($index !== strrpos($IPv6, ISEMAIL_STRING_DOUBLECOLON
)) {
721 $return_status[] = ISEMAIL_RFC5322_IPV6_2X2XCOLON
;
723 if ($index === 0 ||
$index === (strlen($IPv6) - 2)) {
725 // RFC 4291 allows :: at the start or end of an address with 7 other groups in addition
728 if ($groupCount > $max_groups) {
729 $return_status[] = ISEMAIL_RFC5322_IPV6_MAXGRPS
;
730 } elseif ($groupCount === $max_groups) {
731 $return_status[] = ISEMAIL_RFC5321_IPV6DEPRECATED
; // Eliding a single "::"
736 // Revision 2.7: Daniel Marschall's new IPv6 testing strategy
737 if ((substr($IPv6, 0, 1) === ISEMAIL_STRING_COLON
) && (substr($IPv6, 1, 1) !== ISEMAIL_STRING_COLON
)) {
738 $return_status[] = ISEMAIL_RFC5322_IPV6_COLONSTRT
; // Address starts with a single colon
739 } elseif ((substr($IPv6, -1) === ISEMAIL_STRING_COLON
) && (substr($IPv6, -2, 1) !== ISEMAIL_STRING_COLON
)) {
740 $return_status[] = ISEMAIL_RFC5322_IPV6_COLONEND
; // Address ends with a single colon
741 } elseif (count(preg_grep('/^[0-9A-Fa-f]{0,4}$/', $matchesIP, PREG_GREP_INVERT
)) !== 0) {
742 $return_status[] = ISEMAIL_RFC5322_IPV6_BADCHAR
; // Check for unmatched characters
744 $return_status[] = ISEMAIL_RFC5321_ADDRESSLITERAL
;
748 $return_status[] = ISEMAIL_RFC5322_DOMAINLITERAL
;
751 $parsedata[ISEMAIL_COMPONENT_DOMAIN
] .= $token;
752 $atomlist[ISEMAIL_COMPONENT_DOMAIN
][$element_count] .= $token;
754 $context_prior = $context;
755 $context = (int) array_pop($context_stack);
757 case ISEMAIL_STRING_BACKSLASH
:
758 $return_status[] = ISEMAIL_RFC5322_DOMLIT_OBSDTEXT
;
759 $context_stack[] = $context;
760 $context = ISEMAIL_CONTEXT_QUOTEDPAIR
;
762 // Folding White Space
763 case ISEMAIL_STRING_CR
:
764 case ISEMAIL_STRING_SP
:
765 case ISEMAIL_STRING_HTAB
:
766 if (($token === ISEMAIL_STRING_CR
)
767 && ((++
$i === $raw_length) ||
($email[$i] !== ISEMAIL_STRING_LF
))) {
768 $return_status[] = ISEMAIL_ERR_CR_NO_LF
;
772 $return_status[] = ISEMAIL_CFWS_FWS
;
774 $context_stack[] = $context;
775 $context = ISEMAIL_CONTEXT_FWS
;
776 $token_prior = $token;
780 // http://tools.ietf.org/html/rfc5322#section-3.4.1
781 // dtext = %d33-90 / ; Printable US-ASCII
782 // %d94-126 / ; characters not including
783 // obs-dtext ; "[", "]", or "\"
785 // obs-dtext = obs-NO-WS-CTL / quoted-pair
787 // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
788 // %d11 / ; characters that do not
789 // %d12 / ; include the carriage
790 // %d14-31 / ; return, line feed, and
791 // %d127 ; white space characters
794 // CR, LF, SP & HTAB have already been parsed above
795 if (($ord > 127) ||
($ord === 0) ||
($token === ISEMAIL_STRING_OPENSQBRACKET
)) {
796 $return_status[] = ISEMAIL_ERR_EXPECTING_DTEXT
; // Fatal error
798 } elseif (($ord < 33) ||
($ord === 127)) {
799 $return_status[] = ISEMAIL_RFC5322_DOMLIT_OBSDTEXT
;
802 $parsedata[ISEMAIL_COMPONENT_LITERAL
] .= $token;
803 $parsedata[ISEMAIL_COMPONENT_DOMAIN
] .= $token;
804 $atomlist[ISEMAIL_COMPONENT_DOMAIN
][$element_count] .= $token;
808 //-------------------------------------------------------------
810 //-------------------------------------------------------------
811 case ISEMAIL_CONTEXT_QUOTEDSTRING
:
812 // http://tools.ietf.org/html/rfc5322#section-3.2.4
813 // quoted-string = [CFWS]
814 // DQUOTE *([FWS] qcontent) [FWS] DQUOTE
817 // qcontent = qtext / quoted-pair
820 case ISEMAIL_STRING_BACKSLASH
:
821 $context_stack[] = $context;
822 $context = ISEMAIL_CONTEXT_QUOTEDPAIR
;
824 // Folding White Space
825 // Inside a quoted string, spaces are allowed as regular characters.
826 // It's only FWS if we include HTAB or CRLF
827 case ISEMAIL_STRING_CR
:
828 case ISEMAIL_STRING_HTAB
:
829 if (($token === ISEMAIL_STRING_CR
)
830 && ((++
$i === $raw_length) ||
($email[$i] !== ISEMAIL_STRING_LF
))) {
831 $return_status[] = ISEMAIL_ERR_CR_NO_LF
;
835 // http://tools.ietf.org/html/rfc5322#section-3.2.2
836 // Runs of FWS, comment, or CFWS that occur between lexical tokens in a
837 // structured header field are semantically interpreted as a single
840 // http://tools.ietf.org/html/rfc5322#section-3.2.4
841 // the CRLF in any FWS/CFWS that appears within the quoted-string [is]
842 // semantically "invisible" and therefore not part of the quoted-string
843 $parsedata[ISEMAIL_COMPONENT_LOCALPART
] .= ISEMAIL_STRING_SP
;
844 $atomlist[ISEMAIL_COMPONENT_LOCALPART
][$element_count] .= ISEMAIL_STRING_SP
;
847 $return_status[] = ISEMAIL_CFWS_FWS
;
848 $context_stack[] = $context;
849 $context = ISEMAIL_CONTEXT_FWS
;
850 $token_prior = $token;
852 // End of quoted string
853 case ISEMAIL_STRING_DQUOTE
:
854 $parsedata[ISEMAIL_COMPONENT_LOCALPART
] .= $token;
855 $atomlist[ISEMAIL_COMPONENT_LOCALPART
][$element_count] .= $token;
857 $context_prior = $context;
858 $context = (int) array_pop($context_stack);
862 // http://tools.ietf.org/html/rfc5322#section-3.2.4
863 // qtext = %d33 / ; Printable US-ASCII
864 // %d35-91 / ; characters not including
865 // %d93-126 / ; "\" or the quote character
868 // obs-qtext = obs-NO-WS-CTL
870 // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
871 // %d11 / ; characters that do not
872 // %d12 / ; include the carriage
873 // %d14-31 / ; return, line feed, and
874 // %d127 ; white space characters
877 if (($ord > 127) ||
($ord === 0) ||
($ord === 10)) {
878 $return_status[] = ISEMAIL_ERR_EXPECTING_QTEXT
; // Fatal error
879 } elseif (($ord < 32) ||
($ord === 127)) {
880 $return_status[] = ISEMAIL_DEPREC_QTEXT
;
883 $parsedata[ISEMAIL_COMPONENT_LOCALPART
] .= $token;
884 $atomlist[ISEMAIL_COMPONENT_LOCALPART
][$element_count] .= $token;
888 // http://tools.ietf.org/html/rfc5322#section-3.4.1
890 // string can be represented as a dot-atom (that is, it contains no
891 // characters other than atext characters or "." surrounded by atext
892 // characters), then the dot-atom form SHOULD be used and the quoted-
893 // string form SHOULD NOT be used.
896 //-------------------------------------------------------------
898 //-------------------------------------------------------------
899 case ISEMAIL_CONTEXT_QUOTEDPAIR
:
900 // http://tools.ietf.org/html/rfc5322#section-3.2.1
901 // quoted-pair = ("\" (VCHAR / WSP)) / obs-qp
903 // VCHAR = %d33-126 ; visible (printing) characters
904 // WSP = SP / HTAB ; white space
906 // obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR)
908 // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
909 // %d11 / ; characters that do not
910 // %d12 / ; include the carriage
911 // %d14-31 / ; return, line feed, and
912 // %d127 ; white space characters
914 // i.e. obs-qp = "\" (%d0-8, %d10-31 / %d127)
918 $return_status[] = ISEMAIL_ERR_EXPECTING_QPAIR
; // Fatal error
919 } elseif ((($ord < 31) && ($ord !== 9)) ||
($ord === 127)) {
920 // SP & HTAB are allowed
921 $return_status[] = ISEMAIL_DEPREC_QP
;
924 // At this point we know where this qpair occurred so
925 // we could check to see if the character actually
926 // needed to be quoted at all.
927 // http://tools.ietf.org/html/rfc5321#section-4.1.2
928 // the sending system SHOULD transmit the
929 // form that uses the minimum quoting possible.
930 // To do: check whether the character needs to be quoted (escaped) in this context
931 $context_prior = $context;
932 $context = (int) array_pop($context_stack); // End of qpair
933 $token = ISEMAIL_STRING_BACKSLASH
. $token;
936 case ISEMAIL_CONTEXT_COMMENT
:
938 case ISEMAIL_CONTEXT_QUOTEDSTRING
:
939 $parsedata[ISEMAIL_COMPONENT_LOCALPART
] .= $token;
940 $atomlist[ISEMAIL_COMPONENT_LOCALPART
][$element_count] .= $token;
942 // The maximum sizes specified by RFC 5321 are octet counts, so we must include the backslash
944 case ISEMAIL_COMPONENT_LITERAL
:
945 $parsedata[ISEMAIL_COMPONENT_DOMAIN
] .= $token;
946 $atomlist[ISEMAIL_COMPONENT_DOMAIN
][$element_count] .= $token;
948 // The maximum sizes specified by RFC 5321 are octet counts, so we must include the backslash
951 die("Quoted pair logic invoked in an invalid context: $context");
955 //-------------------------------------------------------------
957 //-------------------------------------------------------------
958 case ISEMAIL_CONTEXT_COMMENT
:
959 // http://tools.ietf.org/html/rfc5322#section-3.2.2
960 // comment = "(" *([FWS] ccontent) [FWS] ")"
962 // ccontent = ctext / quoted-pair / comment
965 case ISEMAIL_STRING_OPENPARENTHESIS
:
966 // Nested comments are OK
967 $context_stack[] = $context;
968 $context = ISEMAIL_CONTEXT_COMMENT
;
971 case ISEMAIL_STRING_CLOSEPARENTHESIS
:
972 $context_prior = $context;
973 $context = (int) array_pop($context_stack);
975 // http://tools.ietf.org/html/rfc5322#section-3.2.2
976 // Runs of FWS, comment, or CFWS that occur between lexical tokens in a
977 // structured header field are semantically interpreted as a single
980 // is_email() author's note: This *cannot* mean that we must add a
981 // space to the address wherever CFWS appears. This would result in
982 // any addr-spec that had CFWS outside a quoted string being invalid
984 // if (($context === ISEMAIL_COMPONENT_LOCALPART) || ($context === ISEMAIL_COMPONENT_DOMAIN)) {
985 // $parsedata[$context] .= ISEMAIL_STRING_SP;
986 // $atomlist[$context][$element_count] .= ISEMAIL_STRING_SP;
992 case ISEMAIL_STRING_BACKSLASH
:
993 $context_stack[] = $context;
994 $context = ISEMAIL_CONTEXT_QUOTEDPAIR
;
996 // Folding White Space
997 case ISEMAIL_STRING_CR
:
998 case ISEMAIL_STRING_SP
:
999 case ISEMAIL_STRING_HTAB
:
1000 if (($token === ISEMAIL_STRING_CR
)
1001 && ((++
$i === $raw_length) ||
($email[$i] !== ISEMAIL_STRING_LF
))) {
1002 $return_status[] = ISEMAIL_ERR_CR_NO_LF
;
1006 $return_status[] = ISEMAIL_CFWS_FWS
;
1008 $context_stack[] = $context;
1009 $context = ISEMAIL_CONTEXT_FWS
;
1010 $token_prior = $token;
1014 // http://tools.ietf.org/html/rfc5322#section-3.2.3
1015 // ctext = %d33-39 / ; Printable US-ASCII
1016 // %d42-91 / ; characters not including
1017 // %d93-126 / ; "(", ")", or "\"
1020 // obs-ctext = obs-NO-WS-CTL
1022 // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control
1023 // %d11 / ; characters that do not
1024 // %d12 / ; include the carriage
1025 // %d14-31 / ; return, line feed, and
1026 // %d127 ; white space characters
1029 if (($ord > 127) ||
($ord === 0) ||
($ord === 10)) {
1030 $return_status[] = ISEMAIL_ERR_EXPECTING_CTEXT
; // Fatal error
1032 } elseif (($ord < 32) ||
($ord === 127)) {
1033 $return_status[] = ISEMAIL_DEPREC_CTEXT
;
1037 //-------------------------------------------------------------
1038 // Folding White Space
1039 //-------------------------------------------------------------
1040 case ISEMAIL_CONTEXT_FWS
:
1041 // http://tools.ietf.org/html/rfc5322#section-3.2.2
1042 // FWS = ([*WSP CRLF] 1*WSP) / obs-FWS
1043 // ; Folding white space
1045 // But note the erratum:
1046 // http://www.rfc-editor.org/errata_search.php?rfc=5322&eid=1908:
1047 // In the obsolete syntax, any amount of folding white space MAY be
1048 // inserted where the obs-FWS rule is allowed. This creates the
1049 // possibility of having two consecutive "folds" in a line, and
1050 // therefore the possibility that a line which makes up a folded header
1051 // field could be composed entirely of white space.
1053 // obs-FWS = 1*([CRLF] WSP)
1054 if ($token_prior === ISEMAIL_STRING_CR
) {
1055 if ($token === ISEMAIL_STRING_CR
) {
1056 $return_status[] = ISEMAIL_ERR_FWS_CRLF_X2
; // Fatal error
1059 if (isset($crlf_count)) {
1060 if (++
$crlf_count > 1) {
1061 $return_status[] = ISEMAIL_DEPREC_FWS
; // Multiple folds = obsolete FWS
1069 case ISEMAIL_STRING_CR
:
1070 if ((++
$i === $raw_length) ||
($email[$i] !== ISEMAIL_STRING_LF
)) {
1071 $return_status[] = ISEMAIL_ERR_CR_NO_LF
; // Fatal error
1074 case ISEMAIL_STRING_SP
:
1075 case ISEMAIL_STRING_HTAB
:
1078 if ($token_prior === ISEMAIL_STRING_CR
) {
1079 $return_status[] = ISEMAIL_ERR_FWS_CRLF_END
; // Fatal error
1083 if (isset($crlf_count)) {
1087 $context_prior = $context;
1088 $context = (int) array_pop($context_stack); // End of FWS
1090 // http://tools.ietf.org/html/rfc5322#section-3.2.2
1091 // Runs of FWS, comment, or CFWS that occur between lexical tokens in a
1092 // structured header field are semantically interpreted as a single
1095 // is_email() author's note: This *cannot* mean that we must add a
1096 // space to the address wherever CFWS appears. This would result in
1097 // any addr-spec that had CFWS outside a quoted string being invalid
1099 // if (($context === ISEMAIL_COMPONENT_LOCALPART) || ($context === ISEMAIL_COMPONENT_DOMAIN)) {
1100 // $parsedata[$context] .= ISEMAIL_STRING_SP;
1101 // $atomlist[$context][$element_count] .= ISEMAIL_STRING_SP;
1105 $i--; // Look at this token again in the parent context
1108 $token_prior = $token;
1110 //-------------------------------------------------------------
1111 // A context we aren't expecting
1112 //-------------------------------------------------------------
1114 die("Unknown context: $context");
1117 //-echo "<td>$context|",(($end_or_die) ? 'true' : 'false'),"|$token|" . max($return_status) . "</td></tr>"; // debug
1118 if ((int) max($return_status) > ISEMAIL_RFC5322
) {
1119 break; // No point going on if we've got a fatal error
1123 // Some simple final tests
1124 if ((int) max($return_status) < ISEMAIL_RFC5322
) {
1125 if ($context === ISEMAIL_CONTEXT_QUOTEDSTRING
) {
1126 $return_status[] = ISEMAIL_ERR_UNCLOSEDQUOTEDSTR
; // Fatal error
1127 } elseif ($context === ISEMAIL_CONTEXT_QUOTEDPAIR
) {
1128 $return_status[] = ISEMAIL_ERR_BACKSLASHEND
; // Fatal error
1129 } elseif ($context === ISEMAIL_CONTEXT_COMMENT
) {
1130 $return_status[] = ISEMAIL_ERR_UNCLOSEDCOMMENT
; // Fatal error
1131 } elseif ($context === ISEMAIL_COMPONENT_LITERAL
) {
1132 $return_status[] = ISEMAIL_ERR_UNCLOSEDDOMLIT
; // Fatal error
1133 } elseif ($token === ISEMAIL_STRING_CR
) {
1134 $return_status[] = ISEMAIL_ERR_FWS_CRLF_END
; // Fatal error
1135 } elseif ($parsedata[ISEMAIL_COMPONENT_DOMAIN
] === '') {
1136 $return_status[] = ISEMAIL_ERR_NODOMAIN
; // Fatal error
1137 } elseif ($element_len === 0) {
1138 $return_status[] = ISEMAIL_ERR_DOT_END
; // Fatal error
1139 } elseif ($hyphen_flag) {
1140 $return_status[] = ISEMAIL_ERR_DOMAINHYPHENEND
; // Fatal error
1141 } elseif (strlen($parsedata[ISEMAIL_COMPONENT_DOMAIN
]) > 255) {
1142 // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.2
1143 // The maximum total length of a domain name or number is 255 octets.
1144 $return_status[] = ISEMAIL_RFC5322_DOMAIN_TOOLONG
;
1146 $parsedata[ISEMAIL_COMPONENT_LOCALPART
] . ISEMAIL_STRING_AT
. $parsedata[ISEMAIL_COMPONENT_DOMAIN
]
1148 // http://tools.ietf.org/html/rfc5321#section-4.1.2
1149 // Forward-path = Path
1151 // Path = "<" [ A-d-l ":" ] Mailbox ">"
1153 // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3
1154 // The maximum total length of a reverse-path or forward-path is 256
1155 // octets (including the punctuation and element separators).
1157 // Thus, even without (obsolete) routing information, the Mailbox can
1158 // only be 254 characters long. This is confirmed by this verified
1159 // erratum to RFC 3696:
1161 // http://www.rfc-editor.org/errata_search.php?rfc=3696&eid=1690
1162 // However, there is a restriction in RFC 2821 on the length of an
1163 // address in MAIL and RCPT commands of 254 characters. Since addresses
1164 // that do not fit in those fields are not normally useful, the upper
1165 // limit on address lengths should normally be considered to be 254.
1166 $return_status[] = ISEMAIL_RFC5322_TOOLONG
;
1167 } elseif ($element_len > 63) {
1168 // http://tools.ietf.org/html/rfc1035#section-2.3.4
1169 // labels 63 octets or less
1170 $return_status[] = ISEMAIL_RFC5322_LABEL_TOOLONG
;
1175 $dns_checked = false;
1177 if ($checkDNS && ((int) max($return_status) < ISEMAIL_DNSWARN
) && function_exists('dns_get_record')) {
1178 // http://tools.ietf.org/html/rfc5321#section-2.3.5
1180 // be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
1181 // in Section 5) are permitted, as are CNAME RRs whose targets can be
1182 // resolved, in turn, to MX or address RRs.
1184 // http://tools.ietf.org/html/rfc5321#section-5.1
1185 // The lookup first attempts to locate an MX record associated with the
1186 // name. If a CNAME record is found, the resulting name is processed as
1187 // if it were the initial name. ... If an empty list of MXs is returned,
1188 // the address is treated as if it was associated with an implicit MX
1189 // RR, with a preference of 0, pointing to that host.
1191 // is_email() author's note: We will regard the existence of a CNAME to be
1192 // sufficient evidence of the domain's existence. For performance reasons
1193 // we will not repeat the DNS lookup for the CNAME's target, but we will
1194 // raise a warning because we didn't immediately find an MX record.
1195 if ($element_count === 0) {
1196 $parsedata[ISEMAIL_COMPONENT_DOMAIN
] .= '.';
1197 // Checking TLD DNS seems to work only if you explicitly check from the root
1199 // Not using checkdnsrr because of a suspected bug in PHP 5.3 (http://bugs.php.net/bug.php?id=51844)
1200 $result = @dns_get_record
($parsedata[ISEMAIL_COMPONENT_DOMAIN
], DNS_MX
);
1202 if ((is_bool($result) && !(bool) $result)) {
1203 $return_status[] = ISEMAIL_DNSWARN_NO_RECORD
;
1204 // Domain can't be found in DNS
1206 if (count($result) === 0) {
1207 $return_status[] = ISEMAIL_DNSWARN_NO_MX_RECORD
; // MX-record for domain can't be found
1208 $result = @dns_get_record
($parsedata[ISEMAIL_COMPONENT_DOMAIN
], DNS_A + DNS_CNAME
);
1210 if (count($result) === 0) {
1211 $return_status[] = ISEMAIL_DNSWARN_NO_RECORD
;
1212 // No usable records for the domain can be found
1215 $dns_checked = true;
1220 // Check for TLD addresses
1221 // -----------------------
1222 // TLD addresses are specifically allowed in RFC 5321 but they are
1223 // unusual to say the least. We will allocate a separate
1224 // status to these addresses on the basis that they are more likely
1225 // to be typos than genuine addresses (unless we've already
1226 // established that the domain does have an MX record)
1228 // http://tools.ietf.org/html/rfc5321#section-2.3.5
1230 // of a top-level domain used by itself in an email address, a single
1231 // string is used without any dots. This makes the requirement,
1232 // described in more detail below, that only fully-qualified domain
1233 // names appear in SMTP transactions on the public Internet,
1234 // particularly important where top-level domains are involved.
1238 // The format of TLDs has changed a number of times. The standards
1239 // used by IANA have been largely ignored by ICANN, leading to
1240 // confusion over the standards being followed. These are not defined
1241 // anywhere, except as a general component of a DNS host name (a label).
1242 // However, this could potentially lead to 123.123.123.123 being a
1243 // valid DNS name (rather than an IP address) and thereby creating
1244 // an ambiguity. The most authoritative statement on TLD formats that
1245 // the author can find is in a (rejected!) erratum to RFC 1123
1246 // submitted by John Klensin, the author of RFC 5321:
1248 // http://www.rfc-editor.org/errata_search.php?rfc=1123&eid=1353
1249 // However, a valid host name can never have the dotted-decimal
1250 // form #.#.#.#, since this change does not permit the highest-level
1251 // component label to start with a digit even if it is not all-numeric.
1252 if (!$dns_checked && ((int) max($return_status) < ISEMAIL_DNSWARN
)) {
1253 if ($element_count === 0) {
1254 $return_status[] = ISEMAIL_RFC5321_TLD
;
1257 if (is_numeric($atomlist[ISEMAIL_COMPONENT_DOMAIN
][$element_count][0])) {
1258 $return_status[] = ISEMAIL_RFC5321_TLDNUMERIC
;
1262 $return_status = array_unique($return_status);
1263 $final_status = (int) max($return_status);
1265 if (count($return_status) !== 1) {
1266 array_shift($return_status); // remove redundant ISEMAIL_VALID
1269 $parsedata['status'] = $return_status;
1271 if ($final_status < $threshold) {
1272 $final_status = ISEMAIL_VALID
;
1275 return ($diagnose) ?
$final_status : ($final_status < ISEMAIL_THRESHOLD
);