'use strict'; // Load modules const Punycode = require('punycode'); const Util = require('util'); // Declare internals const internals = { hasOwn: Object.prototype.hasOwnProperty, indexOf: Array.prototype.indexOf, defaultThreshold: 16, maxIPv6Groups: 8, categories: { valid: 1, dnsWarn: 7, rfc5321: 15, cfws: 31, deprecated: 63, rfc5322: 127, error: 255 }, diagnoses: { // Address is valid valid: 0, // Address is valid for SMTP but has unusual elements rfc5321TLD: 9, rfc5321TLDNumeric: 10, rfc5321QuotedString: 11, rfc5321AddressLiteral: 12, // Address is valid for message, but must be modified for envelope cfwsComment: 17, cfwsFWS: 18, // Address contains non-ASCII when the allowUnicode option is false // Has to be > internals.defaultThreshold so that it's rejected // without an explicit errorLevel: undesiredNonAscii: 25, // Address contains deprecated elements, but may still be valid in some contexts deprecatedLocalPart: 33, deprecatedFWS: 34, deprecatedQTEXT: 35, deprecatedQP: 36, deprecatedComment: 37, deprecatedCTEXT: 38, deprecatedIPv6: 39, deprecatedCFWSNearAt: 49, // Address is only valid according to broad definition in RFC 5322, but is otherwise invalid rfc5322Domain: 65, rfc5322TooLong: 66, rfc5322LocalTooLong: 67, rfc5322DomainTooLong: 68, rfc5322LabelTooLong: 69, rfc5322DomainLiteral: 70, rfc5322DomainLiteralOBSDText: 71, rfc5322IPv6GroupCount: 72, rfc5322IPv62x2xColon: 73, rfc5322IPv6BadCharacter: 74, rfc5322IPv6MaxGroups: 75, rfc5322IPv6ColonStart: 76, rfc5322IPv6ColonEnd: 77, // Address is invalid for any purpose errExpectingDTEXT: 129, errNoLocalPart: 130, errNoDomain: 131, errConsecutiveDots: 132, errATEXTAfterCFWS: 133, errATEXTAfterQS: 134, errATEXTAfterDomainLiteral: 135, errExpectingQPair: 136, errExpectingATEXT: 137, errExpectingQTEXT: 138, errExpectingCTEXT: 139, errBackslashEnd: 140, errDotStart: 141, errDotEnd: 142, errDomainHyphenStart: 143, errDomainHyphenEnd: 144, errUnclosedQuotedString: 145, errUnclosedComment: 146, errUnclosedDomainLiteral: 147, errFWSCRLFx2: 148, errFWSCRLFEnd: 149, errCRNoLF: 150, errUnknownTLD: 160, errDomainTooShort: 161, errDotAfterDomainLiteral: 162 }, components: { localpart: 0, domain: 1, literal: 2, contextComment: 3, contextFWS: 4, contextQuotedString: 5, contextQuotedPair: 6 } }; internals.specials = function () { const specials = '()<>[]:;@\\,."'; // US-ASCII visible characters not valid for atext (http://tools.ietf.org/html/rfc5322#section-3.2.3) const lookup = new Array(0x100); lookup.fill(false); for (let i = 0; i < specials.length; ++i) { lookup[specials.codePointAt(i)] = true; } return function (code) { return lookup[code]; }; }(); internals.c0Controls = function () { const lookup = new Array(0x100); lookup.fill(false); // add C0 control characters for (let i = 0; i < 33; ++i) { lookup[i] = true; } return function (code) { return lookup[code]; }; }(); internals.c1Controls = function () { const lookup = new Array(0x100); lookup.fill(false); // add C1 control characters for (let i = 127; i < 160; ++i) { lookup[i] = true; } return function (code) { return lookup[code]; }; }(); internals.regex = { ipV4: /\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)$/, ipV6: /^[a-fA-F\d]{0,4}$/ }; internals.normalizeSupportsNul = '\0'.normalize('NFC') === '\0'; // $lab:coverage:off$ internals.nulNormalize = function (email) { return email.split('\0').map((part) => part.normalize('NFC')).join('\0'); }; // $lab:coverage:on$ internals.normalize = function (email) { return email.normalize('NFC'); }; // $lab:coverage:off$ if (!internals.normalizeSupportsNul) { internals.normalize = function (email) { if (email.indexOf('\0') >= 0) { return internals.nulNormalize(email); } return email.normalize('NFC'); }; } // $lab:coverage:on$ internals.checkIpV6 = function (items) { return items.every((value) => internals.regex.ipV6.test(value)); }; internals.isIterable = Array.isArray; /* $lab:coverage:off$ */ if (typeof Symbol !== 'undefined') { internals.isIterable = (value) => Array.isArray(value) || (!!value && typeof value === 'object' && typeof value[Symbol.iterator] === 'function'); } /* $lab:coverage:on$ */ // Node 10 introduced isSet and isMap, which are useful for cross-context type // checking. // $lab:coverage:off$ internals._isSet = (value) => value instanceof Set; internals._isMap = (value) => value instanceof Map; internals.isSet = Util.types && Util.types.isSet || internals._isSet; internals.isMap = Util.types && Util.types.isMap || internals._isMap; // $lab:coverage:on$ /** * Normalize the given lookup "table" to an iterator. Outputs items in arrays * and sets, keys from maps (regardless of the corresponding value), and own * enumerable keys from all other objects (intended to be plain objects). * * @param {*} table The table to convert. * @returns {Iterable<*>} The converted table. */ internals.normalizeTable = function (table) { if (internals.isSet(table) || Array.isArray(table)) { return table; } if (internals.isMap(table)) { return table.keys(); } return Object.keys(table); }; /** * Convert the given domain atom to its canonical form using Nameprep and string * lowercasing. Domain atoms that are all-ASCII will not undergo any changes via * Nameprep, and domain atoms that have already been canonicalized will not be * altered. * * @param {string} atom The atom to canonicalize. * @returns {string} The canonicalized atom. */ internals.canonicalizeAtom = function (atom) { return Punycode.toASCII(atom).toLowerCase(); }; /** * Check whether any of the values in the given iterable, when passed through * the iteratee function, are equal to the given value. * * @param {Iterable<*>} iterable The iterable to check. * @param {function(*): *} iteratee The iteratee that receives each item from * the iterable. * @param {*} value The reference value. * @returns {boolean} Whether the given value matches any of the items in the * iterable per the iteratee. */ internals.includesMapped = function (iterable, iteratee, value) { for (const item of iterable) { if (value === iteratee(item)) { return true; } } return false; }; /** * Check whether the given top-level domain atom is valid based on the * configured blacklist/whitelist. * * @param {string} tldAtom The atom to check. * @param {Object} options * {*} tldBlacklist The set of domains to consider invalid. * {*} tldWhitelist The set of domains to consider valid. * @returns {boolean} Whether the given domain atom is valid per the blacklist/ * whitelist. */ internals.validDomain = function (tldAtom, options) { // Nameprep handles case-sensitive unicode stuff, but doesn't touch // uppercase ASCII characters. const canonicalTldAtom = internals.canonicalizeAtom(tldAtom); if (options.tldBlacklist) { return !internals.includesMapped( internals.normalizeTable(options.tldBlacklist), internals.canonicalizeAtom, canonicalTldAtom); } return internals.includesMapped( internals.normalizeTable(options.tldWhitelist), internals.canonicalizeAtom, canonicalTldAtom); }; /** * Check whether the domain atoms has an address literal part followed by a * normal domain atom part. For example, [127.0.0.1].com. * * @param {string[]} domainAtoms The parsed domain atoms. * @returns {boolean} Whether there exists both a normal domain atom and an * address literal. */ internals.hasDomainLiteralThenAtom = function (domainAtoms) { let hasDomainLiteral = false; for (let i = 0; i < domainAtoms.length; ++i) { if (domainAtoms[i][0] === '[') { hasDomainLiteral = true; } else if (hasDomainLiteral) { return true; } } return false; }; /** * Check that an email address conforms to RFCs 5321, 5322, 6530 and others * * We distinguish clearly between a Mailbox as defined by RFC 5321 and an * addr-spec as defined by RFC 5322. Depending on the context, either can be * regarded as a valid email address. The RFC 5321 Mailbox specification is * more restrictive (comments, white space and obsolete forms are not allowed). * * @param {string} email The email address to check. See README for specifics. * @param {Object} options The (optional) options: * {*} errorLevel Determines the boundary between valid and invalid * addresses. * {*} tldBlacklist The set of domains to consider invalid. * {*} tldWhitelist The set of domains to consider valid. * {*} allowUnicode Whether to allow non-ASCII characters, defaults to true. * {*} minDomainAtoms The minimum number of domain atoms which must be present * for the address to be valid. * @param {function(number|boolean)} callback The (optional) callback handler. * @return {*} */ exports.validate = internals.validate = function (email, options, callback) { options = options || {}; if (typeof email !== 'string') { throw new TypeError('expected string email'); } email = internals.normalize(email); // The callback function is deprecated. // $lab:coverage:off$ if (typeof options === 'function') { callback = options; options = {}; } if (typeof callback !== 'function') { callback = null; } // $lab:coverage:on$ let diagnose; let threshold; if (typeof options.errorLevel === 'number') { diagnose = true; threshold = options.errorLevel; } else { diagnose = !!options.errorLevel; threshold = internals.diagnoses.valid; } if (options.tldWhitelist) { if (typeof options.tldWhitelist === 'string') { options.tldWhitelist = [options.tldWhitelist]; } else if (typeof options.tldWhitelist !== 'object') { throw new TypeError('expected array or object tldWhitelist'); } } if (options.tldBlacklist) { if (typeof options.tldBlacklist === 'string') { options.tldBlacklist = [options.tldBlacklist]; } else if (typeof options.tldBlacklist !== 'object') { throw new TypeError('expected array or object tldBlacklist'); } } if (options.minDomainAtoms && (options.minDomainAtoms !== ((+options.minDomainAtoms) | 0) || options.minDomainAtoms < 0)) { throw new TypeError('expected positive integer minDomainAtoms'); } // Normalize the set of excluded diagnoses. if (options.excludeDiagnoses) { if (!internals.isIterable(options.excludeDiagnoses)) { throw new TypeError('expected iterable excludeDiagnoses'); } // This won't catch cross-realm Sets pre-Node 10, but it will cast the // value to an in-realm Set representation. if (!internals.isSet(options.excludeDiagnoses)) { options.excludeDiagnoses = new Set(options.excludeDiagnoses); } } let maxResult = internals.diagnoses.valid; const updateResult = (value) => { if (value > maxResult && (!options.excludeDiagnoses || !options.excludeDiagnoses.has(value))) { maxResult = value; } }; const allowUnicode = options.allowUnicode === undefined || !!options.allowUnicode; if (!allowUnicode && /[^\x00-\x7f]/.test(email)) { updateResult(internals.diagnoses.undesiredNonAscii); } const context = { now: internals.components.localpart, prev: internals.components.localpart, stack: [internals.components.localpart] }; let prevToken = ''; const parseData = { local: '', domain: '' }; const atomData = { locals: [''], domains: [''] }; let elementCount = 0; let elementLength = 0; let crlfCount = 0; let charCode; let hyphenFlag = false; let assertEnd = false; const emailLength = email.length; let token; // Token is used outside the loop, must declare similarly for (let i = 0; i < emailLength; i += token.length) { // Utilize codepoints to account for Unicode surrogate pairs token = String.fromCodePoint(email.codePointAt(i)); switch (context.now) { // Local-part case internals.components.localpart: // http://tools.ietf.org/html/rfc5322#section-3.4.1 // local-part = dot-atom / quoted-string / obs-local-part // // dot-atom = [CFWS] dot-atom-text [CFWS] // // dot-atom-text = 1*atext *("." 1*atext) // // quoted-string = [CFWS] // DQUOTE *([FWS] qcontent) [FWS] DQUOTE // [CFWS] // // obs-local-part = word *("." word) // // word = atom / quoted-string // // atom = [CFWS] 1*atext [CFWS] switch (token) { // Comment case '(': if (elementLength === 0) { // Comments are OK at the beginning of an element updateResult(elementCount === 0 ? internals.diagnoses.cfwsComment : internals.diagnoses.deprecatedComment); } else { updateResult(internals.diagnoses.cfwsComment); // Cannot start a comment in an element, should be end assertEnd = true; } context.stack.push(context.now); context.now = internals.components.contextComment; break; // Next dot-atom element case '.': if (elementLength === 0) { // Another dot, already? updateResult(elementCount === 0 ? internals.diagnoses.errDotStart : internals.diagnoses.errConsecutiveDots); } else { // The entire local-part can be a quoted string for RFC 5321; if one atom is quoted it's an RFC 5322 obsolete form if (assertEnd) { updateResult(internals.diagnoses.deprecatedLocalPart); } // CFWS & quoted strings are OK again now we're at the beginning of an element (although they are obsolete forms) assertEnd = false; elementLength = 0; ++elementCount; parseData.local += token; atomData.locals[elementCount] = ''; } break; // Quoted string case '"': if (elementLength === 0) { // The entire local-part can be a quoted string for RFC 5321; if one atom is quoted it's an RFC 5322 obsolete form updateResult(elementCount === 0 ? internals.diagnoses.rfc5321QuotedString : internals.diagnoses.deprecatedLocalPart); parseData.local += token; atomData.locals[elementCount] += token; elementLength += Buffer.byteLength(token, 'utf8'); // Quoted string must be the entire element assertEnd = true; context.stack.push(context.now); context.now = internals.components.contextQuotedString; } else { updateResult(internals.diagnoses.errExpectingATEXT); } break; // Folding white space case '\r': if (emailLength === ++i || email[i] !== '\n') { // Fatal error updateResult(internals.diagnoses.errCRNoLF); break; } // Fallthrough case ' ': case '\t': if (elementLength === 0) { updateResult(elementCount === 0 ? internals.diagnoses.cfwsFWS : internals.diagnoses.deprecatedFWS); } else { // We can't start FWS in the middle of an element, better be end assertEnd = true; } context.stack.push(context.now); context.now = internals.components.contextFWS; prevToken = token; break; case '@': // At this point we should have a valid local-part // $lab:coverage:off$ if (context.stack.length !== 1) { throw new Error('unexpected item on context stack'); } // $lab:coverage:on$ if (parseData.local.length === 0) { // Fatal error updateResult(internals.diagnoses.errNoLocalPart); } else if (elementLength === 0) { // Fatal error updateResult(internals.diagnoses.errDotEnd); } // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.1 the maximum total length of a user name or other local-part is 64 // octets else if (Buffer.byteLength(parseData.local, 'utf8') > 64) { updateResult(internals.diagnoses.rfc5322LocalTooLong); } // http://tools.ietf.org/html/rfc5322#section-3.4.1 comments and folding white space SHOULD NOT be used around "@" in the // addr-spec // // http://tools.ietf.org/html/rfc2119 // 4. SHOULD NOT this phrase, or the phrase "NOT RECOMMENDED" mean that there may exist valid reasons in particular // circumstances when the particular behavior is acceptable or even useful, but the full implications should be understood // and the case carefully weighed before implementing any behavior described with this label. else if (context.prev === internals.components.contextComment || context.prev === internals.components.contextFWS) { updateResult(internals.diagnoses.deprecatedCFWSNearAt); } // Clear everything down for the domain parsing context.now = internals.components.domain; context.stack[0] = internals.components.domain; elementCount = 0; elementLength = 0; assertEnd = false; // CFWS can only appear at the end of the element break; // ATEXT default: // http://tools.ietf.org/html/rfc5322#section-3.2.3 // atext = ALPHA / DIGIT / ; Printable US-ASCII // "!" / "#" / ; characters not including // "$" / "%" / ; specials. Used for atoms. // "&" / "'" / // "*" / "+" / // "-" / "/" / // "=" / "?" / // "^" / "_" / // "`" / "{" / // "|" / "}" / // "~" if (assertEnd) { // We have encountered atext where it is no longer valid switch (context.prev) { case internals.components.contextComment: case internals.components.contextFWS: updateResult(internals.diagnoses.errATEXTAfterCFWS); break; case internals.components.contextQuotedString: updateResult(internals.diagnoses.errATEXTAfterQS); break; // $lab:coverage:off$ default: throw new Error('more atext found where none is allowed, but unrecognized prev context: ' + context.prev); // $lab:coverage:on$ } } else { context.prev = context.now; charCode = token.codePointAt(0); // Especially if charCode == 10 if (internals.specials(charCode) || internals.c0Controls(charCode) || internals.c1Controls(charCode)) { // Fatal error updateResult(internals.diagnoses.errExpectingATEXT); } parseData.local += token; atomData.locals[elementCount] += token; elementLength += Buffer.byteLength(token, 'utf8'); } } break; case internals.components.domain: // http://tools.ietf.org/html/rfc5322#section-3.4.1 // domain = dot-atom / domain-literal / obs-domain // // dot-atom = [CFWS] dot-atom-text [CFWS] // // dot-atom-text = 1*atext *("." 1*atext) // // domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] // // dtext = %d33-90 / ; Printable US-ASCII // %d94-126 / ; characters not including // obs-dtext ; "[", "]", or "\" // // obs-domain = atom *("." atom) // // atom = [CFWS] 1*atext [CFWS] // http://tools.ietf.org/html/rfc5321#section-4.1.2 // Mailbox = Local-part "@" ( Domain / address-literal ) // // Domain = sub-domain *("." sub-domain) // // address-literal = "[" ( IPv4-address-literal / // IPv6-address-literal / // General-address-literal ) "]" // ; See Section 4.1.3 // http://tools.ietf.org/html/rfc5322#section-3.4.1 // Note: A liberal syntax for the domain portion of addr-spec is // given here. However, the domain portion contains addressing // information specified by and used in other protocols (e.g., // [RFC1034], [RFC1035], [RFC1123], [RFC5321]). It is therefore // incumbent upon implementations to conform to the syntax of // addresses for the context in which they are used. // // is_email() author's note: it's not clear how to interpret this in // he context of a general email address validator. The conclusion I // have reached is this: "addressing information" must comply with // RFC 5321 (and in turn RFC 1035), anything that is "semantically // invisible" must comply only with RFC 5322. switch (token) { // Comment case '(': if (elementLength === 0) { // Comments at the start of the domain are deprecated in the text, comments at the start of a subdomain are obs-domain // http://tools.ietf.org/html/rfc5322#section-3.4.1 updateResult(elementCount === 0 ? internals.diagnoses.deprecatedCFWSNearAt : internals.diagnoses.deprecatedComment); } else { // We can't start a comment mid-element, better be at the end assertEnd = true; updateResult(internals.diagnoses.cfwsComment); } context.stack.push(context.now); context.now = internals.components.contextComment; break; // Next dot-atom element case '.': const punycodeLength = Punycode.toASCII(atomData.domains[elementCount]).length; if (elementLength === 0) { // Another dot, already? Fatal error. updateResult(elementCount === 0 ? internals.diagnoses.errDotStart : internals.diagnoses.errConsecutiveDots); } else if (hyphenFlag) { // Previous subdomain ended in a hyphen. Fatal error. updateResult(internals.diagnoses.errDomainHyphenEnd); } else if (punycodeLength > 63) { // RFC 5890 specifies that domain labels that are encoded using the Punycode algorithm // must adhere to the <= 63 octet requirement. // This includes string prefixes from the Punycode algorithm. // // https://tools.ietf.org/html/rfc5890#section-2.3.2.1 // labels 63 octets or less updateResult(internals.diagnoses.rfc5322LabelTooLong); } // CFWS is OK again now we're at the beginning of an element (although // it may be obsolete CFWS) assertEnd = false; elementLength = 0; ++elementCount; atomData.domains[elementCount] = ''; parseData.domain += token; break; // Domain literal case '[': if (atomData.domains[elementCount].length === 0) { if (parseData.domain.length) { // Domain literal interspersed with domain refs. updateResult(internals.diagnoses.errDotAfterDomainLiteral); } assertEnd = true; elementLength += Buffer.byteLength(token, 'utf8'); context.stack.push(context.now); context.now = internals.components.literal; parseData.domain += token; atomData.domains[elementCount] += token; parseData.literal = ''; } else { // Fatal error updateResult(internals.diagnoses.errExpectingATEXT); } break; // Folding white space case '\r': if (emailLength === ++i || email[i] !== '\n') { // Fatal error updateResult(internals.diagnoses.errCRNoLF); break; } // Fallthrough case ' ': case '\t': if (elementLength === 0) { updateResult(elementCount === 0 ? internals.diagnoses.deprecatedCFWSNearAt : internals.diagnoses.deprecatedFWS); } else { // We can't start FWS in the middle of an element, so this better be the end updateResult(internals.diagnoses.cfwsFWS); assertEnd = true; } context.stack.push(context.now); context.now = internals.components.contextFWS; prevToken = token; break; // This must be ATEXT default: // RFC 5322 allows any atext... // http://tools.ietf.org/html/rfc5322#section-3.2.3 // atext = ALPHA / DIGIT / ; Printable US-ASCII // "!" / "#" / ; characters not including // "$" / "%" / ; specials. Used for atoms. // "&" / "'" / // "*" / "+" / // "-" / "/" / // "=" / "?" / // "^" / "_" / // "`" / "{" / // "|" / "}" / // "~" // But RFC 5321 only allows letter-digit-hyphen to comply with DNS rules // (RFCs 1034 & 1123) // http://tools.ietf.org/html/rfc5321#section-4.1.2 // sub-domain = Let-dig [Ldh-str] // // Let-dig = ALPHA / DIGIT // // Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig // if (assertEnd) { // We have encountered ATEXT where it is no longer valid switch (context.prev) { case internals.components.contextComment: case internals.components.contextFWS: updateResult(internals.diagnoses.errATEXTAfterCFWS); break; case internals.components.literal: updateResult(internals.diagnoses.errATEXTAfterDomainLiteral); break; // $lab:coverage:off$ default: throw new Error('more atext found where none is allowed, but unrecognized prev context: ' + context.prev); // $lab:coverage:on$ } } charCode = token.codePointAt(0); // Assume this token isn't a hyphen unless we discover it is hyphenFlag = false; if (internals.specials(charCode) || internals.c0Controls(charCode) || internals.c1Controls(charCode)) { // Fatal error updateResult(internals.diagnoses.errExpectingATEXT); } else if (token === '-') { if (elementLength === 0) { // Hyphens cannot be at the beginning of a subdomain, fatal error updateResult(internals.diagnoses.errDomainHyphenStart); } hyphenFlag = true; } // Check if it's a neither a number nor a latin/unicode letter else if (charCode < 48 || (charCode > 122 && charCode < 192) || (charCode > 57 && charCode < 65) || (charCode > 90 && charCode < 97)) { // This is not an RFC 5321 subdomain, but still OK by RFC 5322 updateResult(internals.diagnoses.rfc5322Domain); } parseData.domain += token; atomData.domains[elementCount] += token; elementLength += Buffer.byteLength(token, 'utf8'); } break; // Domain literal case internals.components.literal: // http://tools.ietf.org/html/rfc5322#section-3.4.1 // domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS] // // dtext = %d33-90 / ; Printable US-ASCII // %d94-126 / ; characters not including // obs-dtext ; "[", "]", or "\" // // obs-dtext = obs-NO-WS-CTL / quoted-pair switch (token) { // End of domain literal case ']': if (maxResult < internals.categories.deprecated) { // Could be a valid RFC 5321 address literal, so let's check // http://tools.ietf.org/html/rfc5321#section-4.1.2 // address-literal = "[" ( IPv4-address-literal / // IPv6-address-literal / // General-address-literal ) "]" // ; See Section 4.1.3 // // http://tools.ietf.org/html/rfc5321#section-4.1.3 // IPv4-address-literal = Snum 3("." Snum) // // IPv6-address-literal = "IPv6:" IPv6-addr // // General-address-literal = Standardized-tag ":" 1*dcontent // // Standardized-tag = Ldh-str // ; Standardized-tag MUST be specified in a // ; Standards-Track RFC and registered with IANA // // dcontent = %d33-90 / ; Printable US-ASCII // %d94-126 ; excl. "[", "\", "]" // // Snum = 1*3DIGIT // ; representing a decimal integer // ; value in the range 0 through 255 // // IPv6-addr = IPv6-full / IPv6-comp / IPv6v4-full / IPv6v4-comp // // IPv6-hex = 1*4HEXDIG // // IPv6-full = IPv6-hex 7(":" IPv6-hex) // // IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] "::" // [IPv6-hex *5(":" IPv6-hex)] // ; The "::" represents at least 2 16-bit groups of // ; zeros. No more than 6 groups in addition to the // ; "::" may be present. // // IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal // // IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::" // [IPv6-hex *3(":" IPv6-hex) ":"] // IPv4-address-literal // ; The "::" represents at least 2 16-bit groups of // ; zeros. No more than 4 groups in addition to the // ; "::" and IPv4-address-literal may be present. let index = -1; let addressLiteral = parseData.literal; const matchesIP = internals.regex.ipV4.exec(addressLiteral); // Maybe extract IPv4 part from the end of the address-literal if (matchesIP) { index = matchesIP.index; if (index !== 0) { // Convert IPv4 part to IPv6 format for futher testing addressLiteral = addressLiteral.slice(0, index) + '0:0'; } } if (index === 0) { // Nothing there except a valid IPv4 address, so... updateResult(internals.diagnoses.rfc5321AddressLiteral); } else if (addressLiteral.slice(0, 5).toLowerCase() !== 'ipv6:') { updateResult(internals.diagnoses.rfc5322DomainLiteral); } else { const match = addressLiteral.slice(5); let maxGroups = internals.maxIPv6Groups; const groups = match.split(':'); index = match.indexOf('::'); if (!~index) { // Need exactly the right number of groups if (groups.length !== maxGroups) { updateResult(internals.diagnoses.rfc5322IPv6GroupCount); } } else if (index !== match.lastIndexOf('::')) { updateResult(internals.diagnoses.rfc5322IPv62x2xColon); } else { if (index === 0 || index === match.length - 2) { // RFC 4291 allows :: at the start or end of an address with 7 other groups in addition ++maxGroups; } if (groups.length > maxGroups) { updateResult(internals.diagnoses.rfc5322IPv6MaxGroups); } else if (groups.length === maxGroups) { // Eliding a single "::" updateResult(internals.diagnoses.deprecatedIPv6); } } // IPv6 testing strategy if (match[0] === ':' && match[1] !== ':') { updateResult(internals.diagnoses.rfc5322IPv6ColonStart); } else if (match[match.length - 1] === ':' && match[match.length - 2] !== ':') { updateResult(internals.diagnoses.rfc5322IPv6ColonEnd); } else if (internals.checkIpV6(groups)) { updateResult(internals.diagnoses.rfc5321AddressLiteral); } else { updateResult(internals.diagnoses.rfc5322IPv6BadCharacter); } } } else { updateResult(internals.diagnoses.rfc5322DomainLiteral); } parseData.domain += token; atomData.domains[elementCount] += token; elementLength += Buffer.byteLength(token, 'utf8'); context.prev = context.now; context.now = context.stack.pop(); break; case '\\': updateResult(internals.diagnoses.rfc5322DomainLiteralOBSDText); context.stack.push(context.now); context.now = internals.components.contextQuotedPair; break; // Folding white space case '\r': if (emailLength === ++i || email[i] !== '\n') { updateResult(internals.diagnoses.errCRNoLF); break; } // Fallthrough case ' ': case '\t': updateResult(internals.diagnoses.cfwsFWS); context.stack.push(context.now); context.now = internals.components.contextFWS; prevToken = token; break; // DTEXT default: // http://tools.ietf.org/html/rfc5322#section-3.4.1 // dtext = %d33-90 / ; Printable US-ASCII // %d94-126 / ; characters not including // obs-dtext ; "[", "]", or "\" // // obs-dtext = obs-NO-WS-CTL / quoted-pair // // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control // %d11 / ; characters that do not // %d12 / ; include the carriage // %d14-31 / ; return, line feed, and // %d127 ; white space characters charCode = token.codePointAt(0); // '\r', '\n', ' ', and '\t' have already been parsed above if ((charCode !== 127 && internals.c1Controls(charCode)) || charCode === 0 || token === '[') { // Fatal error updateResult(internals.diagnoses.errExpectingDTEXT); break; } else if (internals.c0Controls(charCode) || charCode === 127) { updateResult(internals.diagnoses.rfc5322DomainLiteralOBSDText); } parseData.literal += token; parseData.domain += token; atomData.domains[elementCount] += token; elementLength += Buffer.byteLength(token, 'utf8'); } break; // Quoted string case internals.components.contextQuotedString: // http://tools.ietf.org/html/rfc5322#section-3.2.4 // quoted-string = [CFWS] // DQUOTE *([FWS] qcontent) [FWS] DQUOTE // [CFWS] // // qcontent = qtext / quoted-pair switch (token) { // Quoted pair case '\\': context.stack.push(context.now); context.now = internals.components.contextQuotedPair; break; // Folding white space. Spaces are allowed as regular characters inside a quoted string - it's only FWS if we include '\t' or '\r\n' case '\r': if (emailLength === ++i || email[i] !== '\n') { // Fatal error updateResult(internals.diagnoses.errCRNoLF); break; } // Fallthrough case '\t': // http://tools.ietf.org/html/rfc5322#section-3.2.2 // Runs of FWS, comment, or CFWS that occur between lexical tokens in // a structured header field are semantically interpreted as a single // space character. // http://tools.ietf.org/html/rfc5322#section-3.2.4 // the CRLF in any FWS/CFWS that appears within the quoted-string [is] // semantically "invisible" and therefore not part of the // quoted-string parseData.local += ' '; atomData.locals[elementCount] += ' '; elementLength += Buffer.byteLength(token, 'utf8'); updateResult(internals.diagnoses.cfwsFWS); context.stack.push(context.now); context.now = internals.components.contextFWS; prevToken = token; break; // End of quoted string case '"': parseData.local += token; atomData.locals[elementCount] += token; elementLength += Buffer.byteLength(token, 'utf8'); context.prev = context.now; context.now = context.stack.pop(); break; // QTEXT default: // http://tools.ietf.org/html/rfc5322#section-3.2.4 // qtext = %d33 / ; Printable US-ASCII // %d35-91 / ; characters not including // %d93-126 / ; "\" or the quote character // obs-qtext // // obs-qtext = obs-NO-WS-CTL // // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control // %d11 / ; characters that do not // %d12 / ; include the carriage // %d14-31 / ; return, line feed, and // %d127 ; white space characters charCode = token.codePointAt(0); if ((charCode !== 127 && internals.c1Controls(charCode)) || charCode === 0 || charCode === 10) { updateResult(internals.diagnoses.errExpectingQTEXT); } else if (internals.c0Controls(charCode) || charCode === 127) { updateResult(internals.diagnoses.deprecatedQTEXT); } parseData.local += token; atomData.locals[elementCount] += token; elementLength += Buffer.byteLength(token, 'utf8'); } // http://tools.ietf.org/html/rfc5322#section-3.4.1 // If the string can be represented as a dot-atom (that is, it contains // no characters other than atext characters or "." surrounded by atext // characters), then the dot-atom form SHOULD be used and the quoted- // string form SHOULD NOT be used. break; // Quoted pair case internals.components.contextQuotedPair: // http://tools.ietf.org/html/rfc5322#section-3.2.1 // quoted-pair = ("\" (VCHAR / WSP)) / obs-qp // // VCHAR = %d33-126 ; visible (printing) characters // WSP = SP / HTAB ; white space // // obs-qp = "\" (%d0 / obs-NO-WS-CTL / LF / CR) // // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control // %d11 / ; characters that do not // %d12 / ; include the carriage // %d14-31 / ; return, line feed, and // %d127 ; white space characters // // i.e. obs-qp = "\" (%d0-8, %d10-31 / %d127) charCode = token.codePointAt(0); if (charCode !== 127 && internals.c1Controls(charCode)) { // Fatal error updateResult(internals.diagnoses.errExpectingQPair); } else if ((charCode < 31 && charCode !== 9) || charCode === 127) { // ' ' and '\t' are allowed updateResult(internals.diagnoses.deprecatedQP); } // At this point we know where this qpair occurred so we could check to see if the character actually needed to be quoted at all. // http://tools.ietf.org/html/rfc5321#section-4.1.2 // the sending system SHOULD transmit the form that uses the minimum quoting possible. context.prev = context.now; // End of qpair context.now = context.stack.pop(); const escapeToken = '\\' + token; switch (context.now) { case internals.components.contextComment: break; case internals.components.contextQuotedString: parseData.local += escapeToken; atomData.locals[elementCount] += escapeToken; // The maximum sizes specified by RFC 5321 are octet counts, so we must include the backslash elementLength += 2; break; case internals.components.literal: parseData.domain += escapeToken; atomData.domains[elementCount] += escapeToken; // The maximum sizes specified by RFC 5321 are octet counts, so we must include the backslash elementLength += 2; break; // $lab:coverage:off$ default: throw new Error('quoted pair logic invoked in an invalid context: ' + context.now); // $lab:coverage:on$ } break; // Comment case internals.components.contextComment: // http://tools.ietf.org/html/rfc5322#section-3.2.2 // comment = "(" *([FWS] ccontent) [FWS] ")" // // ccontent = ctext / quoted-pair / comment switch (token) { // Nested comment case '(': // Nested comments are ok context.stack.push(context.now); context.now = internals.components.contextComment; break; // End of comment case ')': context.prev = context.now; context.now = context.stack.pop(); break; // Quoted pair case '\\': context.stack.push(context.now); context.now = internals.components.contextQuotedPair; break; // Folding white space case '\r': if (emailLength === ++i || email[i] !== '\n') { // Fatal error updateResult(internals.diagnoses.errCRNoLF); break; } // Fallthrough case ' ': case '\t': updateResult(internals.diagnoses.cfwsFWS); context.stack.push(context.now); context.now = internals.components.contextFWS; prevToken = token; break; // CTEXT default: // http://tools.ietf.org/html/rfc5322#section-3.2.3 // ctext = %d33-39 / ; Printable US-ASCII // %d42-91 / ; characters not including // %d93-126 / ; "(", ")", or "\" // obs-ctext // // obs-ctext = obs-NO-WS-CTL // // obs-NO-WS-CTL = %d1-8 / ; US-ASCII control // %d11 / ; characters that do not // %d12 / ; include the carriage // %d14-31 / ; return, line feed, and // %d127 ; white space characters charCode = token.codePointAt(0); if (charCode === 0 || charCode === 10 || (charCode !== 127 && internals.c1Controls(charCode))) { // Fatal error updateResult(internals.diagnoses.errExpectingCTEXT); break; } else if (internals.c0Controls(charCode) || charCode === 127) { updateResult(internals.diagnoses.deprecatedCTEXT); } } break; // Folding white space case internals.components.contextFWS: // http://tools.ietf.org/html/rfc5322#section-3.2.2 // FWS = ([*WSP CRLF] 1*WSP) / obs-FWS // ; Folding white space // But note the erratum: // http://www.rfc-editor.org/errata_search.php?rfc=5322&eid=1908: // In the obsolete syntax, any amount of folding white space MAY be // inserted where the obs-FWS rule is allowed. This creates the // possibility of having two consecutive "folds" in a line, and // therefore the possibility that a line which makes up a folded header // field could be composed entirely of white space. // // obs-FWS = 1*([CRLF] WSP) if (prevToken === '\r') { if (token === '\r') { // Fatal error updateResult(internals.diagnoses.errFWSCRLFx2); break; } if (++crlfCount > 1) { // Multiple folds => obsolete FWS updateResult(internals.diagnoses.deprecatedFWS); } else { crlfCount = 1; } } switch (token) { case '\r': if (emailLength === ++i || email[i] !== '\n') { // Fatal error updateResult(internals.diagnoses.errCRNoLF); } break; case ' ': case '\t': break; default: if (prevToken === '\r') { // Fatal error updateResult(internals.diagnoses.errFWSCRLFEnd); } crlfCount = 0; // End of FWS context.prev = context.now; context.now = context.stack.pop(); // Look at this token again in the parent context --i; } prevToken = token; break; // Unexpected context // $lab:coverage:off$ default: throw new Error('unknown context: ' + context.now); // $lab:coverage:on$ } // Primary state machine if (maxResult > internals.categories.rfc5322) { // Fatal error, no point continuing break; } } // Token loop // Check for errors if (maxResult < internals.categories.rfc5322) { const punycodeLength = Punycode.toASCII(parseData.domain).length; // Fatal errors if (context.now === internals.components.contextQuotedString) { updateResult(internals.diagnoses.errUnclosedQuotedString); } else if (context.now === internals.components.contextQuotedPair) { updateResult(internals.diagnoses.errBackslashEnd); } else if (context.now === internals.components.contextComment) { updateResult(internals.diagnoses.errUnclosedComment); } else if (context.now === internals.components.literal) { updateResult(internals.diagnoses.errUnclosedDomainLiteral); } else if (token === '\r') { updateResult(internals.diagnoses.errFWSCRLFEnd); } else if (parseData.domain.length === 0) { updateResult(internals.diagnoses.errNoDomain); } else if (elementLength === 0) { updateResult(internals.diagnoses.errDotEnd); } else if (hyphenFlag) { updateResult(internals.diagnoses.errDomainHyphenEnd); } // Other errors else if (punycodeLength > 255) { // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.2 // The maximum total length of a domain name or number is 255 octets. updateResult(internals.diagnoses.rfc5322DomainTooLong); } else if (Buffer.byteLength(parseData.local, 'utf8') + punycodeLength + /* '@' */ 1 > 254) { // http://tools.ietf.org/html/rfc5321#section-4.1.2 // Forward-path = Path // // Path = "<" [ A-d-l ":" ] Mailbox ">" // // http://tools.ietf.org/html/rfc5321#section-4.5.3.1.3 // The maximum total length of a reverse-path or forward-path is 256 octets (including the punctuation and element separators). // // Thus, even without (obsolete) routing information, the Mailbox can only be 254 characters long. This is confirmed by this verified // erratum to RFC 3696: // // http://www.rfc-editor.org/errata_search.php?rfc=3696&eid=1690 // However, there is a restriction in RFC 2821 on the length of an address in MAIL and RCPT commands of 254 characters. Since // addresses that do not fit in those fields are not normally useful, the upper limit on address lengths should normally be considered // to be 254. updateResult(internals.diagnoses.rfc5322TooLong); } else if (elementLength > 63) { // http://tools.ietf.org/html/rfc1035#section-2.3.4 // labels 63 octets or less updateResult(internals.diagnoses.rfc5322LabelTooLong); } else if (options.minDomainAtoms && atomData.domains.length < options.minDomainAtoms && (atomData.domains.length !== 1 || atomData.domains[0][0] !== '[')) { updateResult(internals.diagnoses.errDomainTooShort); } else if (internals.hasDomainLiteralThenAtom(atomData.domains)) { updateResult(internals.diagnoses.errDotAfterDomainLiteral); } else if (options.tldWhitelist || options.tldBlacklist) { const tldAtom = atomData.domains[elementCount]; if (!internals.validDomain(tldAtom, options)) { updateResult(internals.diagnoses.errUnknownTLD); } } } // Check for errors // Finish if (maxResult < internals.categories.dnsWarn) { // Per RFC 5321, domain atoms are limited to letter-digit-hyphen, so we only need to check code <= 57 to check for a digit const code = atomData.domains[elementCount].codePointAt(0); if (code <= 57) { updateResult(internals.diagnoses.rfc5321TLDNumeric); } } if (maxResult < threshold) { maxResult = internals.diagnoses.valid; } const finishResult = diagnose ? maxResult : maxResult < internals.defaultThreshold; // $lab:coverage:off$ if (callback) { callback(finishResult); } // $lab:coverage:on$ return finishResult; }; exports.diagnoses = internals.validate.diagnoses = (function () { const diag = {}; const keys = Object.keys(internals.diagnoses); for (let i = 0; i < keys.length; ++i) { const key = keys[i]; diag[key] = internals.diagnoses[key]; } return diag; })(); exports.normalize = internals.normalize;