Home All Groups Group Topic Archive Search About

updated regex for email validation

Author
13 Sep 2006 2:57 PM
Jim Dornbush
Has anyone seen an updated regex expression from Microsoft for the email
validation expression so that single quotes are allowed?

I've been using the canned regex for emails, but recently been informed by a
customer that the single quote is allowed as part of the email address (Mr.
O'Leary).

I prefer using the un-modified version from the framework, but will update
my local code regardless.

Author
5 Oct 2006 8:03 PM
Greg Bacon
In article <u#9v5T01GHA.1***@TK2MSFTNGP06.phx.gbl>,
    Jim Dornbush <jdornb***@xrite.com> wrote:

: Has anyone seen an updated regex expression from Microsoft for the
: email validation expression so that single quotes are allowed?
:
: I've been using the canned regex for emails, but recently been
: informed by a customer that the single quote is allowed as part of the
: email address (Mr. O'Leary).
:
: I prefer using the un-modified version from the framework, but will
: update my local code regardless.

In <11mv70vtf7i6***@corp.supernews.com>, I described a stopgap that
you might consider in a pinch.

The idea is to dump the regular expression from the Email::Address CPAN
module. Code below.

Moral: regular expressions are poor substitutes for parsers.

Enjoy,
Greg

using System;
using System.Text.RegularExpressions;

using NUnit.Framework;

namespace Application
{
  public interface SyntaxChecker
  {
    bool WellFormed(string input);
  }

  public class EmailAddressChecker : SyntaxChecker
  {
    // derivative of work with the following copyright and license:
    // Copyright (c) 2004 Casey West.  All rights reserved.
    // This module is free software; you can redistribute it and/or
    // modify it under the same terms as Perl itself.

    // see http://search.cpan.org/~cwest/Email-Address-1.80/

    #region dumped regular expression
    private static string gibberish = @"
    (?-xism:(?:(?-xism:(?-xism:(?-xism:(?-xism:(?-xism:(?-xism:\
    s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^
    \x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+))
    |(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+
    |\s+)*[^\x00-\x1F\x7F()<>\[\]:;@\,.<DQ>\s]+(?-xism:(?-xism:\
    s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^
    \x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+))
    |(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+
    |\s+)*)|(?-xism:(?-xism:(?-xism:\s*\((?:\s*(?-xism:(?-xism:(
    ?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?
    :\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x
    0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*<DQ>(?-xism:(?-xism:[
    ^\\<DQ>])|(?-xism:\\(?-xism:[^\x0A\x0D])))+<DQ>(?-xism:(?-xi
    sm:\s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xis
    m:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^()\\
    ]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\
    s*)+|\s+)*))+)?(?-xism:(?-xism:(?-xism:\s*\((?:\s*(?-xism:(?
    -xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:
    \s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[
    ^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*<(?-xism:(?-xi
    sm:(?-xism:(?-xism:(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^(
    )\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(
    ?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))
    |)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*(?-xism:[^\x00-\x1F\x7F()<
    >\[\]:;@\,.<DQ>\s]+(?:\.[^\x00-\x1F\x7F()<>\[\]:;@\,.<DQ>\s]
    +)*)(?-xism:(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+))
    |(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism:
    (?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s
    *\)\s*))+)*\s*\)\s*)+|\s+)*)|(?-xism:(?-xism:(?-xism:\s*\((?
    :\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x
    0D]))|(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xi
    sm:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*
    <DQ>(?-xism:(?-xism:[^\\<DQ>])|(?-xism:\\(?-xism:[^\x0A\x0D]
    )))+<DQ>(?-xism:(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^()\\
    ]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-x
    ism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+
    )*\s*\)\s*))+)*\s*\)\s*)+|\s+)*))\@(?-xism:(?-xism:(?-xism:(
    ?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?
    -xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^
    ()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s
    *\)\s*)+|\s+)*(?-xism:[^\x00-\x1F\x7F()<>\[\]:;@\,.<DQ>\s]+(
    ?:\.[^\x00-\x1F\x7F()<>\[\]:;@\,.<DQ>\s]+)*)(?-xism:(?-xism:
    \s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[
    ^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+)
    )|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)
    +|\s+)*)|(?-xism:(?-xism:(?-xism:\s*\((?:\s*(?-xism:(?-xism:
    (?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((
    ?:\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\
    x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*\[(?:\s*(?-xism:(?-x
    ism:[^\[\]\\])|(?-xism:\\(?-xism:[^\x0A\x0D])))+)*\s*\](?-xi
    sm:(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism:
    \\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism:(?-xism:(
    ?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+
    )*\s*\)\s*)+|\s+)*)))>(?-xism:(?-xism:\s*\((?:\s*(?-xism:(?-
    xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\
    s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^
    \x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*))|(?-xism:(?-x
    ism:(?-xism:(?-xism:(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^
    ()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*
    (?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D])
    )|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*(?-xism:[^\x00-\x1F\x7F()
    <>\[\]:;@\,.<DQ>\s]+(?:\.[^\x00-\x1F\x7F()<>\[\]:;@\,.<DQ>\s
    ]+)*)(?-xism:(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+)
    )|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism
    :(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\
    s*\)\s*))+)*\s*\)\s*)+|\s+)*)|(?-xism:(?-xism:(?-xism:\s*\((
    ?:\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\
    x0D]))|(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-x
    ism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)
    *<DQ>(?-xism:(?-xism:[^\\<DQ>])|(?-xism:\\(?-xism:[^\x0A\x0D
    ])))+<DQ>(?-xism:(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^()\
    \]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-
    xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)
    +)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*))\@(?-xism:(?-xism:(?-xism:
    (?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(
    ?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[
    ^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\
    s*\)\s*)+|\s+)*(?-xism:[^\x00-\x1F\x7F()<>\[\]:;@\,.<DQ>\s]+
    (?:\.[^\x00-\x1F\x7F()<>\[\]:;@\,.<DQ>\s]+)*)(?-xism:(?-xism
    :\s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:
    [^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+
    ))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*
    )+|\s+)*)|(?-xism:(?-xism:(?-xism:\s*\((?:\s*(?-xism:(?-xism
    :(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\(
    (?:\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A
    \x0D]))|)+)*\s*\)\s*))+)*\s*\)\s*)+|\s+)*\[(?:\s*(?-xism:(?-
    xism:[^\[\]\\])|(?-xism:\\(?-xism:[^\x0A\x0D])))+)*\s*\](?-x
    ism:(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism
    :\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:\s*(?-xism:(?-xism:
    (?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|)+)*\s*\)\s*))
    +)*\s*\)\s*)+|\s+)*))))(?-xism:\s*\((?:\s*(?-xism:(?-xism:(?
    >[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0D]))|(?-xism:\s*\((?:
    \s*(?-xism:(?-xism:(?>[^()\\]+))|(?-xism:\\(?-xism:[^\x0A\x0
    D]))|)+)*\s*\)\s*))+)*\s*\)\s*)*)"
      .Replace("<DQ>", "\"")
      .Replace("\t", "")
      .Replace(" ", "")
      .Replace("\r", "")
      .Replace("\n", "");
    #endregion

    private static Regex mailbox =
      new Regex(gibberish, RegexOptions.ExplicitCapture);

    public bool WellFormed(string address)
    {
      return mailbox.IsMatch(address);
    }
  }

  [TestFixture]
  public class Test
  {
    private SyntaxChecker checker = null;

    [SetUp]
    public void SetUp()
    {
      checker = new EmailAddressChecker();
    }

    [Test]
    public void SimpleAddress()
    {
      Assert.IsTrue(checker.WellFormed("***@example.com"));
    }

    [Test]
    public void MustHaveLocalAndDomain()
    {
      Assert.IsFalse(checker.WellFormed("justlocal"));
    }

    [Test]
    public void AmpersandInLocalPart()
    {
      Assert.IsTrue(checker.WellFormed("\"foo & ba***@example.com"));
    }

    [Test]
    public void PhraseAndAngles()
    {
      Assert.IsTrue(checker.WellFormed("Joe <j**@example.com>"));
    }

    [Test]
    public void AddressAndComment()
    {
      Assert.IsTrue(checker.WellFormed("***@example.com (Joe)"));
    }

    [Test]
    public void ContainsSingleQuoteInParenComment()
    {
      Assert.IsTrue(checker.WellFormed("ole***@example.com (Mr. O'Leary)"));
    }

    [Test]
    public void ContainsSingleQuoteInBareComment()
    {
      Assert.IsTrue(checker.WellFormed("Mr. O'Leary <ole***@example.com"));
    }

    [Test]
    public void ContainsSingleQuoteInLocalPart()
    {
      Assert.IsTrue(checker.WellFormed("o'le***@example.com"));
    }
  }
}
--
You have to choose between trusting the natural stability of gold and the
honesty and intelligence of members of the government. With due respect for
these gentlemen, I advise you, as long as the capitalist system lasts, to
vote for gold.              -- George Bernard Shaw

AddThis Social Bookmark Button