]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/test/testdata/regextst.txt
ICU-511.25.tar.gz
[apple/icu.git] / icuSources / test / testdata / regextst.txt
index da7dc051143d5e782185f9147433ba15589d3ca1..53bd73a7ef3f37435c5df6fd134fc71d923393db 100644 (file)
@@ -1,7 +1,7 @@
-# Copyright (c) 2001-2003 International Business Machines
+# Copyright (c) 2001-2012 International Business Machines
 # Corporation and others. All Rights Reserved.
 #
-#  file:  
+#  file:
 #
 #   ICU regular expression test cases.
 #
 #               <pattern>      =  "<regular expression pattern>"
 #               <match string> =  "<tagged string>"
 #                                 the quotes on the pattern and match string can be " or ' or /
-#               <tagged string> = text, with the start and end of each 
+#               <tagged string> = text, with the start and end of each
 #                                 capture group tagged with <n>...</n>.  The overall match,
 #                                 if any, is group 0, as in <0>matched text</0>
-#               <flags>         = any combination of 
+#                                  A region can be specified with <r>...</r> tags.
+#                                 Standard ICU unescape will be applied, allowing \u, \U, etc. to appear.
+#
+#               <flags>         = any combination of
 #                                   i      case insensitive match
 #                                   x      free spacing and comments
 #                                   s      dot-matches-all mode
-#                                   m      multi-line mode.  $ and ^ match at embedded new-lines
+#                                   m      multi-line mode.  
+#                                            ($ and ^ match at embedded new-lines)
+#                                   D      Unix Lines mode (only recognize 0x0a as new-line)
+#                                   Q      UREGEX_LITERAL flag.  Entire pattern is literal string.
+#                                   v      If icu configured without break iteration, this
+#                                          regex test pattern should not compile.
+#                                   e      set the UREGEX_ERROR_ON_UNKNOWN_ESCAPES flag
 #                                   d      dump the compiled pattern
 #                                   t      trace operation of match engine.
+#                                   2-9    a digit between 2 and 9, specifies the number of
+#                                          times to execute find().  The expected results are
+#                                          for the last find() in the sequence.
+#                                   G      Only check match / no match.  Do not check capture groups.
+#                                   E      Pattern compilation error expected
+#                                   L      Use LookingAt() rather than find()
+#                                   M      Use matches() rather than find().
+#
+#                                   a      Use non-Anchoring Bounds.
+#                                   b      Use Transparent Bounds.
+#                                          The a and b options only make a difference if
+#                                          a <r>region</r> has been specified in the string.
+#                                   z|Z    hitEnd was expected(z) or not expected (Z).
+#                                          With neither, hitEnd is not checked.
+#                                   y|Y    Require End expected(y) or not expected (Y).
+#
 #                                 White space must be present between the flags and the match string.
 #
 
+# Look-ahead expressions
+#
+"(?!0{5})(\d{5})"              "<0><1>00001</1></0>zzzz"
+"(?!0{5})(\d{5})z"             "<0><1>00001</1>z</0>zzz"
+"(?!0{5})(\d{5})(?!y)"         "<0><1>00001</1></0>zzzz"
+"abc(?=def)"                   "<0>abc</0>def"
+"(.*)(?=c)"                    "<0><1>ab</1></0>cdef"
+
+"(?:.*)(?=c)"                  "<r>ab</r>cdef"
+"(?:.*)(?=c)"             b    "<r><0>ab</0></r>cdef"      # transparent bounds
+"(?:.*)(?=c)"             bM   "<r><0>ab</0></r>cdef"      # transparent bounds
+
+"(?:.*)(?=(c))"           b    "<0>ab</0><1>c</1>def"      # Capture in look-ahead
+"(?=(.)\1\1)\1"                "abcc<0><1>d</1></0>ddefg"  # Backrefs to look-ahead capture
+
+".(?!\p{L})"                   "abc<0>d</0> "              # Negated look-ahead
+".(?!(\p{L}))"                 "abc<0>d</0> "              # Negated look-ahead, no capture
+                                                           #   visible outside of look-ahead
+"and(?=roid)"            L     "<0>and</0>roid"
+"and(?=roid)"            M     "<r>and</r>roid"
+"and(?=roid)"            bM    "<r><0>and</0></r>roid"
+
+"and(?!roid)"            L     "<0>and</0>roix"
+"and(?!roid)"            L     "android"
+
+"and(?!roid)"            M     "<r><0>and</0></r>roid"     # Opaque bounds
+"and(?!roid)"            bM    "<r>and</r>roid"
+"and(?!roid)"            bM    "<r><0>and</0></r>roix"
+
+#
+# Negated Lookahead, various regions and region transparency
+#
+"abc(?!def)"                   "<0>abc</0>xyz"
+"abc(?!def)"                   "abcdef"
+"abc(?!def)"                   "<r><0>abc</0></r>def"
+"abc(?!def)"              b    "<r>abc</r>def"
+"abc(?!def)"              b    "<r><0>abc</0></r>xyz"
+
+#
+#  Anchoring Bounds
+#
+"^def$"                        "abc<r><0>def</0></r>ghi"           # anchoring (default) bounds
+"^def$"                  a     "abc<r>def</r>ghi"                  # non-anchoring bounds
+"^def"                   a     "<r><0>def</0></r>ghi"              # non-anchoring bounds
+"def$"                   a     "abc<r><0>def</0></r>"              # non-anchoring bounds
+
+"^.*$"                   m     "<0>line 1</0>\n line 2"
+"^.*$"                   m2    "line 1\n<0> line 2</0>"
+"^.*$"                   m3    "line 1\n line 2"
+"^.*$"                   m     "li<r><0>ne </0></r>1\n line 2"     # anchoring bounds
+"^.*$"                   m2    "li<r>ne </r>1\n line 2"            # anchoring bounds
+"^.*$"                  am     "li<r>ne </r>1\n line 2"            # non-anchoring bounds
+"^.*$"                  am     "li\n<r><0>ne </0></r>\n1\n line 2" # non-anchoring bounds
+
+#
+#  HitEnd and RequireEnd for new-lines just before end-of-input
+#
+"xyz$"                  yz     "<0>xyz</0>\n"
+"xyz$"                  yz     "<0>xyz</0>\x{d}\x{a}"
+
+"xyz$"                 myz     "<0>xyz</0>"                        # multi-line mode
+"xyz$"                 mYZ     "<0>xyz</0>\n" 
+"xyz$"                 mYZ     "<0>xyz</0>\r\n"
+"xyz$"                 mYZ     "<0>xyz</0>\x{85}abcd"
+
+"xyz$"                  Yz     "xyz\nx"
+"xyz$"                  Yz     "xyza"
+"xyz$"                  yz     "<0>xyz</0>"
+
+#
+#  HitEnd 
+#
+"abcd"                  Lz      "a"
+"abcd"                  Lz      "ab"
+"abcd"                  Lz      "abc"
+"abcd"                  LZ      "<0>abcd</0>"
+"abcd"                  LZ      "<0>abcd</0>e"
+"abcd"                  LZ      "abcx"
+"abcd"                  LZ      "abx"
+"abcd"                  Lzi     "a"
+"abcd"                  Lzi     "ab"
+"abcd"                  Lzi     "abc"
+"abcd"                  LZi     "<0>abcd</0>"
+"abcd"                  LZi     "<0>abcd</0>e"
+"abcd"                  LZi     "abcx"
+"abcd"                  LZi     "abx"
+
+#
+#  All Unicode line endings recognized.
+#     0a, 0b, 0c, 0d, 0x85, 0x2028, 0x2029
+#     Multi-line and non-multiline mode take different paths, so repeated tests.
+#
+"^def$"                 mYZ    "abc\x{a}<0>def</0>\x{a}ghi"
+"^def$"                 mYZ    "abc\x{b}<0>def</0>\x{b}ghi"
+"^def$"                 mYZ    "abc\x{c}<0>def</0>\x{c}ghi"
+"^def$"                 mYZ    "abc\x{d}<0>def</0>\x{d}ghi"
+"^def$"                 mYZ    "abc\x{85}<0>def</0>\x{85}ghi"
+"^def$"                 mYZ    "abc\x{2028}<0>def</0>\x{2028}ghi"
+"^def$"                 mYZ    "abc\x{2029}<0>def</0>\x{2029}ghi"
+"^def$"                 mYZ    "abc\r\n<0>def</0>\r\nghi"
+
+"^def$"                 yz     "<0>def</0>\x{a}"
+"^def$"                 yz     "<0>def</0>\x{b}"
+"^def$"                 yz     "<0>def</0>\x{c}"
+"^def$"                 yz     "<0>def</0>\x{d}"
+"^def$"                 yz     "<0>def</0>\x{85}"
+"^def$"                 yz     "<0>def</0>\x{2028}"
+"^def$"                 yz     "<0>def</0>\x{2029}"
+"^def$"                 yz     "<0>def</0>\r\n"
+"^def$"                 yz     "<0>def</0>"
+
+
+"^def$"                       "<0>def</0>\x{2028"    #TODO: should be an error of some sort.
+
+#
+#  UNIX_LINES mode
+#
+"abc$"                 D      "<0>abc</0>\n"
+"abc$"                 D      "abc\r"
+"abc$"                 D      "abc\u0085"
+"a.b"                  D      "<0>a\rb</0>"
+"a.b"                  D      "a\nb"
+"(?d)abc$"                    "<0>abc</0>\n"
+"(?d)abc$"                    "abc\r"
+"abc$"                 mD     "<0>abc</0>\ndef"
+"abc$"                 mD     "abc\rdef"
+
+".*def"                L      "abc\r def xyz"          # Normal mode, LookingAt() stops at \r
+".*def"                DL     "<0>abc\r def</0> xyz"   # Unix Lines mode, \r not line end.
+".*def"                DL     "abc\n def xyz"   
+
+"(?d)a.b"                     "a\nb"
+"(?d)a.b"                     "<0>a\rb</0>"
+
+"^abc"                 m      "xyz\r<0>abc</0>"
+"^abc"                 Dm     "xyz\rabc"
+"^abc"                 Dm     "xyz\n<0>abc</0>"
+
+
 
 # Capturing parens
 ".(..)."                       "<0>a<1>bc</1>d</0>"
 "(hello)|(goodbye)"            "<0><2>goodbye</2></0>"
 "abc( +(  inner(X?) +)  xyz)"  "leading cruft <0>abc<1>     <2>  inner<3></3>    </2>  xyz</1></0> cruft"
 "\s*([ixsmdt]*)([:letter:]*)"  "<0>   <1>d</1><2></2></0>  "
+"(a|b)c*d"                     "a<0><1>b</1>cd</0>"
 
 # Non-capturing parens (?: stuff).   Groups, but does not capture.
 "(?:abc)*(tail)"               "<0>abcabcabc<1>tail</1></0>"
 ".*\Ahello"                    "stuff\nhello" # don't match after embedded new-line.
 
 # \b \B
+#
 ".*?\b(.).*"                   "<0>  $%^&*( <1>h</1>ello123%^&*()gxx</0>"
 "\ba\b"                        "-<0>a</0>"
 "\by\b"                        "xy"
+"[ \b]"                        "<0>b</0>"     # in a set, \b is a literal b.
 
 # Finds first chars of up to 5 words
 "(?:.*?\b(\w))?(?:.*?\b(\w))?(?:.*?\b(\w))?(?:.*?\b(\w))?(?:.*?\b(\w))?"   "<0><1>T</1>the <2>q</2>ick <3>b</3>rown <4>f</4></0>ox"
 
 "(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?.*"   "<0>   \u0301 \u0301<1>A</1>\u0302BC\u0303\u0304<2> </2>\u0305 \u0306<3>X</3>\u0307Y\u0308</0>"
 
+
+#
+#  Unicode word boundary mode
+#
+"(?w).*?\b"                      v   "<0></0>hello, world"
+"(?w).*?(\b.+?\b).*"             v   "<0><1> </1> 123.45   </0>"
+"(?w).*?(\b\d.*?\b).*"           v   "<0>  <1>123.45</1>   </0>"
+".*?(\b.+?\b).*"                     "<0>  <1>123</1>.45   </0>"
+"(?w:.*?(\b\d.*?\b).*)"          v   "<0>  <1>123.45</1>   </0>"
+"(?w:.*?(\b.+?\b).*)"            v   "<0><1>don't</1>   </0>"
+"(?w:.+?(\b\S.+?\b).*)"          v   "<0>  <1>don't</1>   </0>"
+"(?w:(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?).*)"     v "<0><1>.</1><2> </2><3>,</3><4>:</4><5>$</5><6>37,000.50</6><7> </7>   </0>"
+
+#
+#  Unicode word boundaries with Regions
+#
+"(?w).*?\b"                      v   "abc<r><0>def</0></r>ghi"
+"(?w).*?\b"                      v2  "abc<r>def<0></0></r>ghi"
+"(?w).*?\b"                      v3  "abc<r>def</r>ghi"
+#"(?w).*?\b"                      vb  "abc<r><0>def</0></r>ghi"    # TODO:  bug.  Ticket 6073
+#"(?w).*?\b"                      vb2 "abc<r>def</r>ghi"
+
+
+
 # . does not match new-lines
-"."                            "\u000a\u000d\u0085\u000c\u2028\u2029<0>X</0>\u000aY"
+"."                            "\u000a\u000d\u0085\u000c\u000b\u2028\u2029<0>X</0>\u000aY"
 "A."                           "A\u000a "# no match
 
 # \d for decimal digits
-"\d*"                          "<0>0123456789\u0660\u06F9\u0969\u0A66\u1369\u17E2\uFF10\U0001D7CE\U0001D7FF</0>non-digits"
+"\d*"                          "<0>0123456789\u0660\u06F9\u0969\u0A66\u17E2\uFF10\U0001D7CE\U0001D7FF</0>non-digits"
 "\D+"                          "<0>non digits</0>"
 "\D*(\d*)(\D*)"                "<0>non-digits<1>3456666</1><2>more non digits</2></0>"
 
 # \Q...\E quote mode
 "hel\Qlo, worl\Ed"             "<0>hello, world</0>"
 "\Q$*^^(*)?\A\E(a*)"           "<0>$*^^(*)?\\A<1>aaaaaaaaaaaaaaa</1></0>"
+"[abc\Q]\r\E]+"                "<0>aaaccc]]]\\\\\\</0>\r..."   # \Q ... \E escape in a [set]
+
+# UREGEX_LITERAL - entire pattern is a literal string, no escapes recognized.
+#                  Note that data strings in test cases still get escape processing.
+"abc\an\r\E\\abcd\u0031bye"     Q  "lead<0>abc\\an\\r\\E\\\\abcd\\u0031bye</0>extra"
+"case insensitive \\ (l)iteral" Qi "stuff!! <0>cAsE InSenSiTiVE \\\\ (L)ITeral</0>"
 
 # \S and \s  space characters
 "\s+"                          "not_space<0> \t \r \n \u3000 \u2004 \u2028 \u2029</0>xyz"
 ".*^(Hello)"                   " Hello Hello Hello Hello Goodbye"# No Match
 
 # $ matches only at end of line, or before a newline preceding the end of line
-".*?(Goodbye)$"                "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>"
-".*?(Goodbye)"                 "<0>Hello <1>Goodbye</1></0> Goodbye Goodbye"
-".*?(Goodbye)$"                "Hello Goodbye> Goodbye Goodbye "# No Match
+".*?(Goodbye)$"           zy   "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>"
+".*?(Goodbye)"            ZY   "<0>Hello <1>Goodbye</1></0> Goodbye Goodbye"
+".*?(Goodbye)$"           z    "Hello Goodbye> Goodbye Goodbye "# No Match
 
-".*?(Goodbye)$"                "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\n"
-".*?(Goodbye)$"                "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\n"
-".*?(Goodbye)$"                "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\r\n"
-".*?(Goodbye)$"                "Hello Goodbye Goodbye Goodbye\n\n"# No Match
+".*?(Goodbye)$"           zy   "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\n"
+".*?(Goodbye)$"           zy   "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\n"
+".*?(Goodbye)$"           zy   "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\r\n"
+".*?(Goodbye)$"           z    "Hello Goodbye Goodbye Goodbye\n\n"# No Match
 
 # \Z matches at end of input, like $ with default flags.
-".*?(Goodbye)\Z"               "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>"
-".*?(Goodbye)"                 "<0>Hello <1>Goodbye</1></0> Goodbye Goodbye"
-".*?(Goodbye)\Z"               "Hello Goodbye> Goodbye Goodbye "# No Match
-"here$"                        "here\nthe end"# No Match
+".*?(Goodbye)\Z"          zy   "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>"
+".*?(Goodbye)"            ZY   "<0>Hello <1>Goodbye</1></0> Goodbye Goodbye"
+".*?(Goodbye)\Z"          z    "Hello Goodbye> Goodbye Goodbye "# No Match
+"here$"                   z    "here\nthe end"# No Match
 
 ".*?(Goodbye)\Z"               "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\n"
 ".*?(Goodbye)\Z"               "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\n"
 # \z matches only at the end of string.
 #    no special treatment of new lines.
 #    no dependencies on flag settings.
-".*?(Goodbye)\z"               "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>"
-".*?(Goodbye)\z"               "Hello Goodbye Goodbye Goodbye "# No Match
-"here$"                        "here\nthe end"# No Match
+".*?(Goodbye)\z"          zy   "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>"
+".*?(Goodbye)\z"          z    "Hello Goodbye Goodbye Goodbye "# No Match
+"here$"                   z    "here\nthe end"# No Match
 
-".*?(Goodbye)\z"               "Hello Goodbye Goodbye Goodbye\n"# No Match
-".*?(Goodbye)\n\z"             "<0>Hello Goodbye Goodbye <1>Goodbye</1>\n</0>"
+".*?(Goodbye)\z"          z    "Hello Goodbye Goodbye Goodbye\n"# No Match
+".*?(Goodbye)\n\z"        zy   "<0>Hello Goodbye Goodbye <1>Goodbye</1>\n</0>"
+"abc\z|def"               ZY   "abc<0>def</0>"
 
 # (?# comment) doesn't muck up pattern
 "Hello (?# this is a comment) world"  "  <0>Hello  world</0>..."
 "(x?)*xyz"                     "<0>xx<1></1>xyz</0>"    # Sligthly wierd, but correct.  The "last" time through (x?),
                                                         #   it matches the empty string.
 
+# Set expressions, basic operators and escapes work
+#
+"[\d]+"                        "<0>0123</0>abc/.,"
+"[^\d]+"                       "0123<0>abc/.,</0>"
+"[\D]+"                        "0123<0>abc/.,</0>"
+"[^\D]+"                       "<0>0123</0>abc/.,"
+
+"[\s]+"                        "<0> \t</0>abc/.,"
+"[^\s]+"                       " \t<0>abc/.,</0>"
+"[\S]+"                        " \t<0>abc/.,</0>"
+"[^\S]+"                       "<0> \t</0>abc/.,"
+
+"[\w]+"                        "<0>abc123</0> .,;"
+"[^\w]+"                       "abc123<0> .,;</0>"
+"[\W]+"                        "abc123<0> .,;</0>"
+"[^\W]+"                       "<0>abc123</0> .,;"
+
+"[\z]+"                        "abc<0>zzz</0>def"     # \z has no special meaning
+"[^\z]+"                       "<0>abc</0>zzzdef"
+"[\^]+"                        "abc<0>^^</0>"
+"[^\^]+"                       "<0>abc</0>^^"
+
+"[\u0041c]+"                   "<0>AcAc</0>def"
+"[\U00010002]+"                "<0>\ud800\udc02</0>\U00010003"
+"[^\U00010002]+"               "<0>Hello</0>\x{10002}"
+"[\x61b]+"                     "<0>abab</0>cde"
+#"[\x6z]+"                      "\x06"                  #TODO:  single hex digits should fail
+"[\x{9}\x{75}\x{6d6}\x{6ba6}\x{6146B}\x{10ffe3}]+"  "<0>\u0009\u0075\u06d6\u6ba6\U0006146B\U0010ffe3</0>abc"
+
+"[\N{LATIN CAPITAL LETTER TONE SIX}ab\N{VARIATION SELECTOR-70} ]+"       "x<0> \u0184\U000E0135 ab</0>c"
+"[\N{LATIN SMALL LETTER C}-\N{LATIN SMALL LETTER F}]+"    "ab<0>cdef</0>ghi"
+
+
+
+#
+#  [set expressions], check the precedence of '-', '&', '--', '&&'
+#      '-' and '&', for compatibility with ICU UnicodeSet, have the same
+#                   precedence as the implicit Union between adjacent items.
+#      '--' and '&&', for compatibility with Java, have lower precedence than
+#                   the implicit Union operations.  '--' and '&&' themselves
+#                   have the same precedence, and group left to right.
+#
+"[[a-m]-[f-w]p]+"              "<0>dep</0>fgwxyz"
+"[^[a-m]-[f-w]p]+"             "dep<0>fgwxyz</0>"
+
+"[[a-m]--[f-w]p]+"             "<0>de</0>pfgwxyz"
+"[^[a-m]--[f-w]p]+"            "de<0>pfgwxyz</0>"
+
+"[[a-m]&[e-s]w]+"              "<0>efmw</0>adnst"
+"[^[a-m]&[e-s]w]+"             "efmw<0>adnst</0>"
+
+"[[a-m]&[e-s]]+"              "<0>efm</0>adnst"
+
+
+
 # {min,max} iteration qualifier
 "A{3}BC"                       "<0>AAABC</0>"
 
 "ab(?:c|(d?))(\1)"             "<0>ab<1></1><2></2></0>e"
 "ab(?:c|(d?))(\1)"             "<0>ab<1></1><2></2></0>"
 
+# Back References that hit/don't hit end
+"(abcd) \1"                z   "abcd abc"
+"(abcd) \1"                Z   "<0><1>abcd</1> abcd</0>"
+"(abcd) \1"                Z   "<0><1>abcd</1> abcd</0> "
+
+# Case Insensitve back references that hit/don't hit end.
+"(abcd) \1"                zi  "abcd abc"
+"(abcd) \1"                Zi  "<0><1>abcd</1> ABCD</0>"
+"(abcd) \1"                Zi  "<0><1>abcd</1> ABCD</0> "
+
+# Back references that hit/don't hit boundary limits.
+
+"(abcd) \1"                z   "<r>abcd abc</r>d "
+"(abcd) \1"                Z   "<r><0><1>abcd</1> abcd</0></r> "
+"(abcd) \1"                Z   "<r><0><1>abcd</1> abcd</0> </r>"
+
+"(abcd) \1"                zi  "<r>abcd abc</r>d "
+"(abcd) \1"                Zi  "<r><0><1>abcd</1> abcd</0></r> "
+"(abcd) \1"                Zi  "<r><0><1>abcd</1> abcd</0> </r>"
+
+# Back reference that fails match near the end of input without actually hitting the end.
+"(abcd) \1"                ZL  "abcd abd"
+"(abcd) \1"                ZLi "abcd abd"
+
+# Back reference to a zero-length match.  They are always a successful match.
+"ab(x?)cd(\1)ef"               "<0>ab<1></1>cd<2></2>ef</0>"
+"ab(x?)cd(\1)ef"            i  "<0>ab<1></1>cd<2></2>ef</0>"
+
+# Back refs to capture groups that didn't participate in the match.
+"ab(?:(c)|(d))\1"              "abde"
+"ab(?:(c)|(d))\1"              "<0>ab<1>c</1>c</0>e"
+"ab(?:(c)|(d))\1"            i "abde"
+"ab(?:(c)|(d))\1"            i "<0>ab<1>c</1>c</0>e"
+
 # Case Insensitive
-"aBc"                    i      "<0>ABC</0>"      
-"a[^bc]d"                i      "ABD"   
+"aBc"                    i      "<0>ABC</0>"
+"a[^bc]d"                i      "ABD"
 '((((((((((a))))))))))\10' i    "<0><1><2><3><4><5><6><7><8><9><10>A</10></9></8></7></6></5></4></3></2></1>A</0>"
 
 "(?:(?i)a)b"                    "<0>Ab</0>"
 "a b"                           "ab"
 "abc "                          "abc"
 "abc "                          "<0>abc </0>"
-"ab[cd e]z"                     "<0>ab z</0>" 
+"ab[cd e]z"                     "<0>ab z</0>"
 "ab\ c"                         "<0>ab c</0> "
 "ab c"                          "<0>ab c</0> "
 "ab c"                        x "ab c "
 "ab\ c"                       x "<0>ab c</0> "
 
+#
+# Pattern Flags
+#
+"(?u)abc"                       "<0>abc</0>"
+"(?-u)abc"                      "<0>abc</0>"
+
+#
+#  \c escapes  (Control-whatever)
+#
+"\cA"                           "<0>\u0001</0>"
+"\ca"                           "<0>\u0001</0>"
+"\c\x"                          "<0>\u001cx</0>"
+
 
 #Multi-line mode
-'b\s^'                        m  "a\nb\n"
+'b\s^'                        m "a\nb\n"
+"(?m)^abc$"                     "abc \n abc\n<0>abc</0>\nabc"
+"(?m)^abc$"                   2 "abc \n abc\nabc\n<0>abc</0>"
+"^abc$"                       2 "abc \n abc\nabc\nabc"
+
+# Empty and full range
+"[\u0000-\U0010ffff]+"          "<0>abc\u0000\uffff\U00010000\U0010ffffzz</0>"
+"[^\u0000-\U0010ffff]"          "abc\u0000\uffff\U00010000\U0010ffffzz"
+"[^a--a]+"                      "<0>abc\u0000\uffff\U00010000\U0010ffffzz</0>"
 
 # Free-spacing mode
 "a b c  # this is a comment"  x "<0>abc</0> "
 "abc.*$"                         "<0>abcdef</0>"
 "abc(.*)"                        "<0>abc<1>def</1></0>"
 "abc(.*)"                        "<0>abc<1></1></0>"
-"abc.*"                          "<0>abc</0>\ndef"     
-"abc.*"                     s    "<0>abc\ndef</0>"     
+"abc.*"                          "<0>abc</0>\ndef"
+"abc.*"                     s    "<0>abc\ndef</0>"
 "abc.*$"                    s    "<0>abc\ndef</0>"
 "abc.*$"                         "abc\ndef"
 "abc.*$"                    m    "<0>abc</0>\ndef"
 "ab\x09w"                        "<0>ab\u0009w</0>"
 "ab\xabcdc"                      "<0>ab\u00abcdc</0>"
 "ab\x{abcd}c"                    "<0>ab\uabcdc</0>"
-"ab\x{101234}c"                    "<0>ab\U00101234c</0>"
+"ab\x{101234}c"                  "<0>ab\U00101234c</0>"
 "abα"                            "<0>abα</0>"
 
+#
+#  Octal Escaping.   This conforms to Java conventions, not Perl.
+"\0101\00\03\073\0154\01442"      "<0>A\u0000\u0003\u003b\u006c\u0064\u0032</0>"
+"\0776"                          "<0>\u003f\u0036</0>"  # overflow, the 6 is literal.
+"\0376xyz"                       "<0>\u00fexyz</0>"
+"\08"                        E   "<0>\u00008</0>"
+"\0"                         E   "x"
 
 #
 #  \u Surrogate Pairs
 "\ud800\udc00*"                   "<0>\U00010000\U00010000\U00010000</0>\U00010001"
 "\ud800\ud800\udc00"              "<0>\ud800\U00010000</0>\U00010000\U00010000\U00010001"
 "(\ud800)(\udc00)"                "\U00010000"
+"\U00010001+"                     "<0>\U00010001\U00010001</0>\udc01"
+
+#
+# hitEnd with find()
+#
+"abc"                        Z    "aa<0>abc</0>  abcab"
+"abc"                       2Z    "aaabc  <0>abc</0>ab"
+"abc"                       3z    "aa>abc  abcab"
+
+#
+# \ escaping
+#
+"abc\jkl"                         "<0>abcjkl</0>"    # escape of a non-special letter is just itself.
+"abc[ \j]kl"                      "<0>abcjkl</0>"
+
+#
+# Bug xxxx
+#
+"(?:\-|(\-?\d+\d\d\d))?(?:\-|\-(\d\d))?(?:\-|\-(\d\d))?(T)?(?:(\d\d):(\d\d):(\d\d)(\.\d+)?)?(?:(?:((?:\+|\-)\d\d):(\d\d))|(Z))?"   MG  "<0>-1234-21-31T41:51:61.789+71:81</0>"
+
+
+#
+# A random, complex, meaningless pattern that should at least compile
+#
+"(?![^\<C\f\0146\0270\}&&[|\02-\x3E\}|X-\|]]{7,}+)[|\\\x98\<\?\u4FCFr\,\0025\}\004|\0025-\0521]|(?<![|\01-\u829E])|(?<!\p{Alpha})|^|(?-s:[^\x15\\\x24F\a\,\a\u97D8[\x38\a[\0224-\0306[^\0020-\u6A57]]]]??)(?xix:[^|\{\[\0367\t\e\x8C\{\[\074c\]V[|b\fu\r\0175\<\07f\066s[^D-\x5D]]])(?xx:^{5,}+)(?d)(?=^\D)|(?!\G)(?>\G)(?![^|\]\070\ne\{\t\[\053\?\\\x51\a\075\0023-\[&&[|\022-\xEA\00-\u41C2&&[^|a-\xCC&&[^\037\uECB3\u3D9A\x31\|\<b\0206\uF2EC\01m\,\ak\a\03&&\p{Punct}]]]])(?-dxs:[|\06-\07|\e-\x63&&[|Tp\u18A3\00\|\xE4\05\061\015\0116C|\r\{\}\006\xEA\0367\xC4\01\0042\0267\xBB\01T\}\0100\?[|\[-\u459B|\x23\x91\rF\0376[|\?-\x94\0113-\\\s]]]]{6}?)(?<=[^\t-\x42H\04\f\03\0172\?i\u97B6\e\f\uDAC2])(?=\B)(?>[^\016\r\{\,\uA29D\034\02[\02-\[|\t\056\uF599\x62\e\<\032\uF0AC\0026\0205Q\|\\\06\0164[|\057-\u7A98&&[\061-g|\|\0276\n\042\011\e\xE8\x64B\04\u6D0EDW^\p{Lower}]]]]?)(?<=[^\n\\\t\u8E13\,\0114\u656E\xA5\]&&[\03-\026|\uF39D\01\{i\u3BC2\u14FE]])(?<=[^|\uAE62\054H\|\}&&^\p{Space}])(?sxx)(?<=[\f\006\a\r\xB4]{1,5})|(?x-xd:^{5}+)()"  "<0></0>abc"
+
 
+#
+# Bug 3225
+
+"1|9"                             "<0>1</0>"
+"1|9"                             "<0>9</0>"
+"1*|9"                            "<0>1</0>"
+"1*|9"                            "<0></0>9"
+
+"(?:a|ac)d"                       "<0>acd</0>"
+"a|ac"                            "<0>a</0>c"
 
+#
+# Bug 3320
+#
+"(a([^ ]+)){0,} (c)"              "<0><1>a<2>b</2></1> <3>c</3></0> "
+"(a([^ ]+))* (c)"                 "<0><1>a<2>b</2></1> <3>c</3></0> "
 
 #
+# Bug 3436
+#
+"(.*?) *$"                        "<0><1>test</1>    </0>"
+
+#
+# Bug 4034
+#
+"\D"                              "<0>A</0>BC\u00ffDEF"
+"\d"                              "ABC\u00ffDEF"
+"\D"                              "<0>\u00ff</0>DEF"
+"\d"                              "\u00ffDEF"
+"\D"                              "123<0>\u00ff</0>DEF"
+"\D"                              "<0>\u0100</0>DEF"
+"\D"                              "123<0>\u0100</0>DEF"
+
+#
+#bug 4024, new line sequence handling
+#
+"(?m)^"                           "<0></0>AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+"(?m)^"                       2   "AA\u000d\u000a<0></0>BB\u000d\u000aCC\u000d\u000a"
+"(?m)^"                       3   "AA\u000d\u000aBB\u000d\u000a<0></0>CC\u000d\u000a"
+"(?m)^"                       4   "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+
+"(?m)$"                           "AA<0></0>\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+"(?m)$"                       2   "AA\u000d\u000aBB<0></0>\u000d\u000aCC\u000d\u000a"
+"(?m)$"                       3   "AA\u000d\u000aBB\u000d\u000aCC<0></0>\u000d\u000a"
+"(?m)$"                       4   "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a<0></0>"
+"(?m)$"                       5   "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+
+"$"                               "AA\u000d\u000aBB\u000d\u000aCC<0></0>\u000d\u000a"
+"$"                           2   "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a<0></0>"
+"$"                           3   "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+
+"$"                               "\u000a\u0000a<0></0>\u000a"
+"$"                           2   "\u000a\u0000a\u000a<0></0>"
+"$"                           3   "\u000a\u0000a\u000a"
+
+"$"                               "<0></0>"
+"$"                           2   ""
+
+"$"                               "<0></0>\u000a"
+"$"                           2   "\u000a<0></0>"
+"$"                           3   "\u000a"
+
+"^"                               "<0></0>"
+"^"                           2   ""
+
+"\Z"                              "<0></0>"
+"\Z"                          2   ""
+"\Z"                          2   "\u000a<0></0>"
+"\Z"                              "<0></0>\u000d\u000a"
+"\Z"                          2   "\u000d\u000a<0></0>"
+
+
+# No matching ^ at interior new-lines if not in multi-line mode.
+"^"                               "<0></0>AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+"^"                           2   "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+
+#
+# Dot-matches-any mode, and stopping at new-lines if off.
+#
+"."                               "<0>1</0>23\u000aXYZ"
+"."                           2   "1<0>2</0>3\u000aXYZ"
+"."                           3   "12<0>3</0>\u000aXYZ"
+"."                           4   "123\u000a<0>X</0>YZ"    # . doesn't match newlines
+"."                           4   "123\u000b<0>X</0>YZ"
+"."                           4   "123\u000c<0>X</0>YZ"
+"."                           4   "123\u000d<0>X</0>YZ"
+"."                           4   "123\u000d\u000a<0>X</0>YZ"
+"."                           4   "123\u0085<0>X</0>YZ"
+"."                           4   "123\u2028<0>X</0>YZ"
+"."                           4   "123\u2029<0>X</0>YZ"
+"."                           4s  "123<0>\u000a</0>XYZ"    # . matches any
+"."                           4s  "123<0>\u000b</0>XYZ"
+"."                           4s  "123<0>\u000c</0>XYZ"
+"."                           4s  "123<0>\u000d</0>XYZ"
+"."                           4s  "123<0>\u000d\u000a</0>XYZ"
+"."                           4s  "123<0>\u0085</0>XYZ"
+"."                           4s  "123<0>\u2028</0>XYZ"
+"."                           4s  "123<0>\u2029</0>XYZ"
+".{6}"                            "123\u000a\u000dXYZ"
+".{6}"                         s  "<0>123\u000a\u000dX</0>Y"
+
+
+#
+# Ranges
+#
+".*"                              "abc<r><0>def</0></r>ghi"
+"a"                               "aaa<r><0>a</0>aa</r>aaa"
+"a"                           2   "aaa<r>a<0>a</0>a</r>aaa"
+"a"                           3   "aaa<r>aa<0>a</0></r>aaa"
+"a"                           4   "aaa<r>aaa</r>aaa"
+"a"                               "aaa<r><0>a</0>aa</r>aaa"
+
+#
+# [set] parsing, systematically run through all of the parser states.
+#
+#
+"[def]+"                          "abc<0>ddeeff</0>ghi"       # set-open
+"[^def]+"                         "<0>abc</0>defghi"
+"[:digit:]+"                      "abc<0>123</0>def"
+"[:^digit:]+"                     "<0>abc</0>123def"
+"[\u005edef]+"                    "abc<0>de^f</0>ghi"
+
+"[]]+"                            "abc<0>]]]</0>[def"         # set-open2
+"[^]]+"                           "<0>abc</0>]]][def"
+
+"[:Lu:]+"                         "abc<0>ABC</0>def"          # set-posix
+"[:Lu]+"                          "abc<0>uL::Lu</0>"
+"[:^Lu]+"                         "abc<0>uL:^:Lu</0>"
+"[:]+"                            "abc<0>:::</0>def"
+"[:whats this:]"               E  " "
+"[--]+"                       dE  "-------"
+
+"[[nested]]+"                      "xyz[<0>nnetsteed</0>]abc"   #set-start
+"[\x{41}]+"                        "CB<0>AA</0>ZYX"
+"[\[\]\\]+"                        "&*<0>[]\\</0>..."
+"[*({<]+"                          "^&<0>{{(<<*</0>)))"
+
+
+"[-def]+"                          "abc<0>def-ef-d</0>xyz"     # set-start-dash
+"[abc[--def]]"                 E   " "
+
+"[x[&def]]+"                        "abc<0>def&</0>ghi"        # set-start-amp
+"[&& is bad at start]"         E   " "
+
+"[abc"                         E   " "                         # set-after-lit
+"[def]]"                           "abcdef"
+"[def]]"                           "abcde<0>f]</0>]"
+
+"[[def][ghi]]+"                    "abc]<0>defghi</0>[xyz"     # set-after-set
+"[[def]ghi]+"                      "abc]<0>defghi</0>[xyz" 
+"[[[[[[[[[[[abc]"              E   " "
+"[[abc]\p{Lu}]+"                   "def<0>abcABC</0>xyz"
+
+"[d-f]+"                           "abc<0>def</0>ghi"          # set-after-range
+"[d-f[x-z]]+"                      "abc<0>defxyzzz</0>gw"
+"[\s\d]+"                          "abc<0>  123</0>def"
+"[d-f\d]+"                         "abc<0>def123</0>ghi"
+"[d-fr-t]+"                        "abc<0>defrst</0>uvw"
+
+"[abc--]"                      E   " "                         # set-after-op
+"[[def]&&]"                    E   " "
+"[-abcd---]+"                     "<0>abc</0>--"                 #[-abcd]--[-]
+"[&abcd&&&ac]+"                   "b<0>ac&&ca</0>d"              #[&abcd]&&[&ac]
+
+"[[abcd]&[ac]]+"                  "b<0>acac</0>d"              # set-set-amp
+"[[abcd]&&[ac]]+"                 "b<0>acac</0>d"
+"[[abcd]&&ac]+"                   "b<0>acac</0>d"
+"[[abcd]&ac]+"                    "<0>bacacd&&&</0>"
+
+"[abcd&[ac]]+"                    "<0>bacacd&&&</0>"           #set-lit-amp
+"[abcd&&[ac]]+"                   "b<0>acac</0>d"
+"[abcd&&ac]+"                     "b<0>acac</0>d"
+
+"[[abcd]-[ac]]+"                  "a<0>bdbd</0>c"              # set-set-dash
+"[[abcd]--[ac]]+"                 "a<0>bdbd</0>c"
+"[[abcd]--ac]+"                   "a<0>bdbd</0>c"
+"[[abcd]-ac]+"                    "<0>bacacd---</0>"
+
+"[a-d--[b-c]]+"                   "b<0>adad</0>c"              # set-range-dash
+"[a-d--b-c]+"                     "b<0>adad</0>c"   
+"[a-d-[b-c]]+"                    "<0>bad-adc</0>"
+"[a-d-b-c]+"                      "<0>bad-adc</0>"
+"[\w--[b-c]]+"                    "b<0>adad</0>c"  
+"[\w--b-c]+"                      "b<0>adad</0>c"   
+"[\w-[b-c]]+"                     "<0>bad-adc</0>"
+"[\w-b-c]+"                       "<0>bad-adc</0>"
+
+"[a-d&&[b-c]]+"                   "a<0>bcbc</0>d"              # set-range-amp
+"[a-d&&b-c]+"                     "a<0>bcbc</0>d"
+"[a-d&[b-c]]+"                    "<0>abc&bcd</0>"
+"[a-d&b-c]+"                      "<0>abc&bcd</0>"
+
+"[abcd--bc]+"                     "b<0>adda</0>c"              # set-lit-dash
+"[abcd--[bc]]+"                   "b<0>adda</0>c"
+"[abcd-[bc]]+"                    "<0>bad--dac</0>xyz"
+"[abcd-]+"                        "<0>bad--dac</0>xyz"
+
+"[abcd-\s]+"                 E    "xyz<0>abcd  --</0>xyz"      # set-lit-dash-esc
+"[abcd-\N{LATIN SMALL LETTER G}]+"  "xyz-<0>abcdefg</0>hij-"
+"[bcd-\{]+"                       "a<0>bcdefyz{</0>|}"
+
+"[\p{Ll}]+"                       "ABC<0>abc</0>^&*&"          # set-escape
+"[\P{Ll}]+"                       "abc<0>ABC^&*&</0>xyz"
+"[\N{LATIN SMALL LETTER Q}]+"     "mnop<0>qqq</0>rst"
+"[\sa]+"                          "cb<0>a  a  </0>(*&"
+"[\S]+"                           "   <0>hello</0>  "
+"[\w]+"                           "   <0>hello_world</0>!  "
+"[\W]+"                           "a<0>   *$%#,</0>hello "
+"[\d]+"                           "abc<0>123</0>def"
+"[\D]+"                           "123<0>abc</0>567"
+"[\$\#]+"                         "123<0>$#$#</0>\\"
+
+#
+#  Try each of the Java compatibility properties.
+#    These are checked here, while normal Unicode properties aren't, because
+#    these Java compatibility properties are implemented directly by regexp, while other
+#    properties are handled by ICU's Property and UnicodeSet APIs.
+#
+#    These tests are only to verify that the names are recognized and the
+#    implementation isn't dead.  They are not intended to verify that the
+#    function defintions are 100% correct.
+#
+"[:InBasic Latin:]+"               "ΓΔΕΖΗΘ<0>hello, world.</0>ニヌネノハバパ"
+"[:^InBasic Latin:]+"              "<0>ΓΔΕΖΗΘ</0>hello, world.ニヌネノハバパ"
+"\p{InBasicLatin}+"                "ΓΔΕΖΗΘ<0>hello, world.</0>ニヌネノハバパ"
+"\P{InBasicLatin}+"                "<0>ΓΔΕΖΗΘ</0>hello, world.ニヌネノハバパ"
+"\p{InGreek}+"                     "<0>ΓΔΕΖΗΘ</0>hello, world.ニヌネノハバパ"
+"\p{InCombining Marks for Symbols}" "<0>\u20d0</0>"
+"\p{Incombiningmarksforsymbols}"    "<0>\u20d0</0>"
+
+
+"\p{javaDefined}+"                 "\uffff<0>abcd</0>\U00045678"
+"\p{javaDigit}+"                   "abc<0>1234</0>xyz"
+"\p{javaIdentifierIgnorable}+"     "abc<0>\u0000\u000e\u009f</0>xyz"
+"\p{javaISOControl}+"              "abc<0>\u0000\u000d\u0083</0>xyz"
+"\p{javaJavaIdentifierPart}+"      "#@!<0>abc123_$</0>;"
+"\p{javaJavaIdentifierStart}+"     "123\u0301<0>abc$_</0>%^&"
+"\p{javaLetter}+"                  "123<0>abcDEF</0>&*()("
+"\p{javaLetterOrDigit}+"           "$%^&*<0>123abcகஙசஜஞ</0>☺♘♚☔☎♬⚄⚡"
+"\p{javaLowerCase}+"               "ABC<0>def</0>&^%#:="
+"\p{javaMirrored}+"                "ab$%<0>(){}[]</0>xyz"
+"\p{javaSpaceChar}+"               "abc<0> \u00ao\u2028</0>!@#"
+"\p{javaSupplementaryCodePoint}+"  "abc\uffff<0>\U00010000\U0010ffff</0>\u0000"
+"\p{javaTitleCase}+"               "abCE<0>Džῌᾨ</0>123"
+"\p{javaUnicodeIdentifierStart}+"  "123<0>abcⅣ</0>%^&&*"
+"\p{javaUnicodeIdentifierPart}+"   "%&&^<0>abc123\u0301\u0002</0>..."
+"\p{javaUpperCase}+"               "abc<0>ABC</0>123"
+"\p{javaValidCodePoint}+"          "<0>\u0000abc\ud800 unpaired \udfff |\U0010ffff</0>"
+"\p{javaWhitespace}+"              "abc\u00a0\u2007\u202f<0> \u0009\u001c\u001f\u2028</0>42"
+"\p{all}+"                         "<0>123\u0000\U0010ffff</0>"
+"\P{all}+"                         "123\u0000\U0010ffff"
+
+# [:word:] is implemented directly by regexp.  Not a java compat property, but PCRE and others.
+
+"[:word:]+"                        ".??$<0>abc123ΓΔΕΖΗ_</0>%%%"
+"\P{WORD}+"                        "<0>.??$</0>abc123ΓΔΕΖΗ_%%%"
+
+#
+#  Errors on unrecognized ASCII letter escape sequences.
+#
+"[abc\Y]+"                         "<0>abcY</0>"
+"[abc\Y]+"                     eE  "<0>abcY</0>"
+
+"(?:a|b|c|\Y)+"                    "<0>abcY</0>"
+"(?:a|b|c|\Y)+"                eE  "<0>abcY</0>"
+
+"\Q\Y\E"                       e   "<0>\\Y</0>"
+
+#
+# Reported problem
+#
+"[a-\w]"                       E  "x"
+
+#
+# Bug 4045
+#
+"A*"                              "<0>AAAA</0>"
+"A*"                           2  "AAAA<0></0>"
+"A*"                           3  "AAAA"
+"A*"                           4  "AAAA"
+"A*"                           5  "AAAA"
+"A*"                           6  "AAAA"
+"A*"                              "<0></0>"
+"A*"                           2  ""
+"A*"                           3  ""
+"A*"                           4  ""
+"A*"                           5  ""
+
+#
+# Bug 4046
+#
+"(?m)^"                           "<0></0>AA\u000dBB\u000dCC\u000d"
+"(?m)^"                        2  "AA\u000d<0></0>BB\u000dCC\u000d"
+"(?m)^"                        3  "AA\u000dBB\u000d<0></0>CC\u000d"
+"(?m)^"                        4  "AA\u000dBB\u000dCC\u000d"
+"(?m)^"                        5  "AA\u000dBB\u000dCC\u000d"
+"(?m)^"                        6  "AA\u000dBB\u000dCC\u000d"
+
+"(?m)^"                           "<0></0>AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+"(?m)^"                        2  "AA\u000d\u000a<0></0>BB\u000d\u000aCC\u000d\u000a"
+"(?m)^"                        3  "AA\u000d\u000aBB\u000d\u000a<0></0>CC\u000d\u000a"
+"(?m)^"                        4  "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a"
+
+#
+# Bug 4059
+#
+"\w+"                            "<0>イチロー</0>"
+"\b....\b."                       "<0>イチロー?</0>"
+
+
+#
+# Bug 4058    ICU Unicode Set patterns have an odd feature -
+#             A $ as the last character before the close bracket means match
+#             a \uffff, which means off the end of the string in transliterators.
+#             Didn't make sense for regular expressions, and is now fixed.
+#
+"[\$](P|C|D);"                    "<0>$<1>P</1>;</0>"
+"[$](P|C|D);"                     "<0>$<1>P</1>;</0>"
+"[$$](P|C|D);"                    "<0>$<1>P</1>;</0>"
+
+#
+# bug 4888    Flag settings lost in some cases.
+#
+"((a){2})|(#)"              is    "no"
+"((a){2})|(#)"              is    "<0><1>a<2>a</2></1></0>#"
+"((a){2})|(#)"              is    "a<0><3>#</3></0>"
+
+"((a|b){2})|c"              is    "<0>c</0>"
+"((a|b){2})|c"              is    "<0>C</0>"
+"((a|b){2})|c"              s     "C"
+
+#
+# bug 5617  ZWJ \u200d shoudn't cause word boundaries
+#
+".+?\b"                           "<0> </0>\u0935\u0915\u094D\u200D\u0924\u0947 "
+".+?\b"                       2   " <0>\u0935\u0915\u094D\u200D\u0924\u0947</0> "
+".+?\b"                       3   " \u0935\u0915\u094D\u200D\u0924\u0947 "
+
+#
+# bug 5386  "^.*$" should match empty input
+#
+"^.*$"                            "<0></0>"
+"^.*$"                     m      "<0></0>"
+"^.*$"                            "<0></0>\n"
+"(?s)^.*$"                        "<0>\n</0>"
+
+#
+# bug 5386  Empty pattern and empty input should match.
+#
+""                                "<0></0>abc"
+""                                "<0></0>"
+
+#
+# bug 5386   Range upper and lower bounds can be equal
+#
+"[a-a]"                           "<0>a</0>"
+
+#
+# bug 5386  $* should not fail, should match empty string.
+#
+"$*"                              "<0></0>abc"
+
+#
+# bug 5386  \Q ... \E escaping problem
+#
+"[a-z\Q-$\E]+"                    "QE<0>abc-def$</0>."
+
+# More reported 5386 Java comaptibility failures
+#
+"[^]*abb]*"                       "<0>kkkk</0>"
+"\xa"                             "huh"              # Java would like to be warned.
+"^.*$"                            "<0></0>"
+
+#
+# bug 5386  Empty left alternation should produce a zero length match.
+#
+"|a"                              "<0></0>a"
+"$|ab"                            "<0>ab</0>"
+"$|ba"                            "ab<0></0>"
+
+#
+# bug 5386  Java compatibility for set expressions
+#
+"[a-z&&[cde]]+"                   "ab<0>cde</0>fg"
+
+#
+# bug 6019  matches() needs to backtrack and check for a longer match if the
+#                     first match(es) found don't match the entire input.
+#
+"a?|b"                            "<0></0>b"
+"a?|b"                         M  "<0>b</0>"
+"a?|.*?u|stuff|d"              M  "<0>stuff</0>"
+"a?|.*?(u)|stuff|d"            M  "<0>stuff<1>u</1></0>"
+"a+?"                             "<0>a</0>aaaaaaaaaaaa"
+"a+?"                          M  "<0>aaaaaaaaaaaaa</0>"
+
+#
+#   Bug 7724.  Expression to validate zip codes.
+#
+"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?"    "<0><1>94040</1><2>-3344</2></0>"
+"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?"    "94040-0000"
+"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?"    "00000-3344"
+
+#
+#    Bug 8666.  Assertion failure on match, bad operand to JMP_SAV_X opcode.
+#
+"((.??)+|A)*"                     "<0><1><2></2></1></0>AAAAABBBBBCCCCCDDDDEEEEE"
+
+#
+#    Bug 8826.  Incorrect results with case insensitive matches.
+#
+"AS(X)"                         i "aßx"
+"AS.*"                          i "aßx"           # Expansion of sharp s can't split between pattern terms.
+"ASßS"                          i "<0>aßß</0>"    # All one literal string, does match.
+"ASß{1}S"                       i "aßß"           # Pattern with terms, no match.
+"aßx"                           i "<0>assx</0>"
+"aßx"                           i "<0>ASSX</0>"
+"aßx"                           i "<0>aßx</0>"
+"ASS(.)"                        i "<0>aß<1>x</1></0>"
+
+# Case Insensitive, probe some corner cases.
+"ass+"                          i "aß"            # Second 's' in pattern is qualified, can't combine with first.
+"as+"                           i "aß"
+"aßs"                           i "as"            # Can't match half of a ß
+"aß+"                           i "<0>assssssss</0>s"
+"aß+"                           i "<0>assßSssSSS</0>s"
+"a(ß?)+"                        i "<0>assssssss<1></1></0>s"
+"a(ß?)+"                        i "<0>a<1></1></0>zzzzzzzzs"
+
+"\U00010400"                    i "<0>\U00010428</0>"   # case folded supplemental code point.
+
+"sstuff"                        i "<0>ßtuff</0>"    # exercise optimizations on what chars can start a match.
+"sstuff"                        i "s<0>ßtuff</0>"    # exercise optimizations on what chars can start a match.
+"ßtuff"                         i "s<0>sstuff</0>"
+"ßtuff"                         i "s<0>Sstuff</0>"
+
+"a(..)\1"                       i "<0>A<1>bc</1>BC</0>def"
+"(ß)\1"                         i "aa<0><1>ss</1>ß</0>zz"          # Case insensitive back reference
+"..(.)\1"                       i "<0>aa<1>ß</1>ss</0>"
+"ab(..)\1"                      i "xx<0>ab<1>ss</1>ß</0>ss" 
+
+" (ss) ((\1.*)|(.*))"           i "<0> <1>ss</1> <2><4>sß</4></2></0>"       # The back reference 'ss' must not match in 'sß'
+
+# Bug 9057
+#   \u200c and \u200d should be word characters.
+#
+"\w+"                             "  <0>abc\u200cdef\u200dghi</0>   "
+"\w+"                           i "  <0>abc\u200cdef\u200dghi</0>   "
+"[\w]+"                           "  <0>abc\u200cdef\u200dghi</0>   "
+"[\w]+"                         i "  <0>abc\u200cdef\u200dghi</0>   "
+
+# Bug 9283
+#  uregex_open fails for look-behind assertion + case-insensitive
+
+"(ab)?(?<=ab)cd|ef"             i  "<0><1>ab</1>cd</0>"
+
 #  Random debugging, Temporary
 #
-#"^(?:a?b?)*$"                   "a--" 
-"^(?:a?b?)*$"                    "a--"
+#"^(?:a?b?)*$"                   "a--"
 
 "This is a string with (?:one |two |three )endings"   "<0>This is a string with two endings</0>"
 "((?:a|b|c)whoop-dee-do) | [jkl]|zed"             "x"
 "^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$"   G "<0>ftp://ftp.blah.co.uk:2828/blah%20blah.gif</0>"
 "^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$"   G "<0>https://blah.gov/blah-blah.as</0>"
 "^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$"     "www.blah.com"
-"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$"     "http://www.blah.com/I have spaces!" 
+"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$"     "http://www.blah.com/I have spaces!"
 "^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$"     "ftp://blah_underscore/[nope]"
 "^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2})$|^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2}\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$"   G "<0>12/01/2002</0>"
 "^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2})$|^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2}\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$"   G "<0>12/01/2002 12:32:10</0>"
 "^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$"     "10.0.5.4"
 "^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$"     "192.168.0.1"
 "^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$"     "my ip address"
-#"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"   G "<0>foo@foo.com</0>"   # TODO:  \w in pattern
-#"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"   G "<0>foo@foo-foo.com.au</0>"   # TODO:  \w in pattern
-#"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"   G "<0>foo@foo.foo.info</0>"   # TODO:  \w in pattern
-#"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"     "foo@.com"   # TODO:  \w in pattern
-#"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"     "foo@foo..com"   # TODO:  \w in pattern
-#"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"     "foo@me@.com"   # TODO:  \w in pattern
-#"/\*[\d\D]*?\*/"   G "<0>/* my comment */</0>"
-#"/\*[\d\D]*?\*/"   G "<0>/* my multiline comment */</0>"
-#"/\*[\d\D]*?\*/"   G "<0>/* my nested comment */</0>"
-#"/\*[\d\D]*?\*/"     "*/ anything here /*"
-#"/\*[\d\D]*?\*/"     "anything between 2 seperate comments"
-#"/\*[\d\D]*?\*/"     "\* *\"
+"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"   G "<0>foo@foo.com</0>"
+"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"   G "<0>foo@foo-foo.com.au</0>"
+"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"   G "<0>foo@foo.foo.info</0>"
+"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"     "foo@.com"
+"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"     "foo@foo..com"
+"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$"     "foo@me@.com"
+"/\*[\d\D]*?\*/"   G "<0>/* my comment */</0>"
+"/\*[\d\D]*?\*/"   G "<0>/* my multiline comment */</0>"
+"/\*[\d\D]*?\*/"   G "<0>/* my nested comment */</0>"
+"/\*[\d\D]*?\*/"     "*/ anything here /*"
+"/\*[\d\D]*?\*/"     "anything between 2 seperate comments"
+"/\*[\d\D]*?\*/"     "\* *\"
 "/\*[\p{N}\P{N}]*?\*/"   G "<0>/* my comment */</0>"
 "/\*[\p{N}\P{N}]*?\*/"   G "<0>/* my multiline comment */</0>"
 "/\*[\p{N}\P{N}]*?\*/"   G "<0>/* my nested comment */</0>"
 '^(([^<>;()\[\]\\.,;:@"]+(\.[^<>()\[\]\\.,;:@"]+)*)|(".+"))@((([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))\.)*(([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))$'   G "<0>blah@[10.0.0.1]</0>"
 '^(([^<>;()\[\]\\.,;:@"]+(\.[^<>()\[\]\\.,;:@"]+)*)|(".+"))@((([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))\.)*(([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))$'   G "<0>a@b.c</0>"
 '^(([^<>;()\[\]\\.,;:@"]+(\.[^<>()\[\]\\.,;:@"]+)*)|(".+"))@((([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))\.)*(([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))$'     "non@match@."
-#"^\d{9}[\d|X]$"   G "<0>1234123412</0>"
-#"^\d{9}[\d|X]$"   G "<0>123412341X</0>"
-#"^\d{9}[\d|X]$"     "not an isbn"
+"^\d{9}[\d|X]$"   G "<0>1234123412</0>"
+"^\d{9}[\d|X]$"   G "<0>123412341X</0>"
+"^\d{9}[\d|X]$"     "not an isbn"
 "^\d{9}(\d|X)$"   G "<0>1234123412</0>"
 "^\d{9}(\d|X)$"   G "<0>123412341X</0>"
 "^\d{9}(\d|X)$"     "not an isbn"
 "\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}"     "12 123 1234"
 "\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}"     "(012) 123/1234"
 "\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}"     "(012) 123 12345"
-#"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"   G "<0>bob-smith@foo.com</0>"   # TODO:  \w in pattern
-#"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"   G "<0>bob.smith@foo.com</0>"   # TODO:  \w in pattern
-#"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"   G "<0>bob_smith@foo.com</0>"   # TODO:  \w in pattern
-#"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"     "-smith@foo.com"   # TODO:  \w in pattern 
-#"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"     ".smith@foo.com"   # TODO:  \w in pattern
-#"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"     "smith@foo_com"   # TODO:  \w in pattern
+"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"   G "<0>bob-smith@foo.com</0>"
+"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"   G "<0>bob.smith@foo.com</0>"
+"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"   G "<0>bob_smith@foo.com</0>"
+"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"     "-smith@foo.com"
+"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"     ".smith@foo.com"
+"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$"     "smith@foo_com"
 "^(?=.*\d).{4,8}$"   G "<0>1234</0>"
 "^(?=.*\d).{4,8}$"   G "<0>asdf1234</0>"
 "^(?=.*\d).{4,8}$"   G "<0>asp123</0>"
 "^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$"     "$12,3456.01"
 "^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$"     "12345"
 "^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$"     "$1.234"
-"([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})"   G "<0>C:\\temp\\this allows spaces\\web.config</0>" 
+"([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})"   G "<0>C:\\temp\\this allows spaces\\web.config</0>"
 "([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})"   G "<0>\\\\Andromeda\\share\\file name.123</0>"
 "([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})"     "tz:\temp\ fi*le?na:m<e>.doc"
 "([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})"     "\\Andromeda\share\filename.a"
 "^[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}$"     "qqqBFDB4D31-3E35-4DAB-AFCA-5E6E5C8F61EA"
 "^[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}$"     "BFDB4D31-3E-4DAB-AFCA-5E6E5C8F61EA"
 "^[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}$"     "BFDB4D31-3E35-4DAB-AF"
-#"^\d{2}(\x2e)(\d{3})(-\d{3})?$"   G "<0>12.345-678</0>"  # TODO: \x not implemented.
-#"^\d{2}(\x2e)(\d{3})(-\d{3})?$"   G "<0>23.345-123</0>"
-#"^\d{2}(\x2e)(\d{3})(-\d{3})?$"   G "<0>99.999</0>"
-#"^\d{2}(\x2e)(\d{3})(-\d{3})?$"     "41222-222"
-#"^\d{2}(\x2e)(\d{3})(-\d{3})?$"     "3.444-233"
-#"^\d{2}(\x2e)(\d{3})(-\d{3})?$"     "43.324444"
+"^\d{2}(\x2e)(\d{3})(-\d{3})?$"   G "<0>12.345-678</0>"
+"^\d{2}(\x2e)(\d{3})(-\d{3})?$"   G "<0>23.345-123</0>"
+"^\d{2}(\x2e)(\d{3})(-\d{3})?$"   G "<0>99.999</0>"
+"^\d{2}(\x2e)(\d{3})(-\d{3})?$"     "41222-222"
+"^\d{2}(\x2e)(\d{3})(-\d{3})?$"     "3.444-233"
+"^\d{2}(\x2e)(\d{3})(-\d{3})?$"     "43.324444"
 "^\d{2}(\u002e)(\d{3})(-\d{3})?$"   G "<0>12.345-678</0>"
 "^\d{2}(\u002e)(\d{3})(-\d{3})?$"   G "<0>23.345-123</0>"
 "^\d{2}(\u002e)(\d{3})(-\d{3})?$"   G "<0>99.999</0>"
 "^\d{2}(\u002e)(\d{3})(-\d{3})?$"     "41222-222"
 "^\d{2}(\u002e)(\d{3})(-\d{3})?$"     "3.444-233"
 "^\d{2}(\u002e)(\d{3})(-\d{3})?$"     "43.324444"
-#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"   G "<0>c:\file.txt</0>"
-#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"   G "<0>c:\folder\sub folder\file.txt</0>"
-#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"   G "<0>\\network\folder\file.txt</0>"    # TODO:  \w in pattern
-#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"     "C:"   # TODO:  \w in pattern
-#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"     "C:\file.xls"   # TODO:  \w in pattern
-#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"     "folder.txt"   # TODO:  \w in pattern
+#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"   G "<0>c:\file.txt</0>"   # TODO:  debug
+#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"   G "<0>c:\folder\sub folder\file.txt</0>"   # TODO:  debug
+#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"   G "<0>\\network\folder\file.txt</0>"    # TODO:  debug
+"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"     "C:"
+"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"     "C:\file.xls"
+"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$"     "folder.txt"
 "^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$"   G "<0>my.domain.com</0>"
 "^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$"   G "<0>regexlib.com</0>"
 "^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$"   G "<0>big-reg.com</0>"
 "^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$"     "1-555-5555"
 "^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$"     "15553333"
 "^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$"     "0-561-555-1212"
-#"<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>"   G "<0><input type = text name = "bob"></0>"    # TODO:  \w in pattern
-#"<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>"   G "<0><select name = "fred"></0>"   # TODO:  \w in pattern
-#"<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>"   G "<0><form</0>"   # TODO:  \w in pattern
-#"<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>"     "<input type = submit>"   # TODO:  \w in pattern
-#"<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>"     "<font face = "arial">"   # TODO:  \w in pattern
-#"<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>"     "The drity brown fox stank like"   # TODO:  \w in pattern
+'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>'   G '<0><input type = text name = "bob"></0>'
+'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>'   G '<0><select name = "fred"></0>'
+#'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>'   G '<0><form></0>'    #TODO:  Debug
+'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>'     "<input type = submit>"   # TODO:  \w in pattern
+'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>'     '<font face = "arial">'   # TODO:  \w in pattern
+'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>'      "The drity brown fox stank like"
 "^(1|01|2|02|3|03|4|04|5|05|6|06|7|07|8|08|9|09|10|11|12{1,2}):(([0-5]{1}[0-9]{1}\s{0,1})([AM|PM|am|pm]{2,2}))\W{0}$"   G "<0>1:00 AM</0>"
 "^(1|01|2|02|3|03|4|04|5|05|6|06|7|07|8|08|9|09|10|11|12{1,2}):(([0-5]{1}[0-9]{1}\s{0,1})([AM|PM|am|pm]{2,2}))\W{0}$"   G "<0>12:00 PM</0>"
 "^(1|01|2|02|3|03|4|04|5|05|6|06|7|07|8|08|9|09|10|11|12{1,2}):(([0-5]{1}[0-9]{1}\s{0,1})([AM|PM|am|pm]{2,2}))\W{0}$"   G "<0>1:00am</0>"
 "^(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])$"     "10.57.98.23."
 "<img([^>]*[^/])>"   G '<0><img src="bob"></0>'
 "<img([^>]*[^/])>"     '<img src="bob" />'
-#"<!--[\s\S]*?-->"   G "<0><!-- comments --></0>"
-#"<!--[\s\S]*?-->"   G "<0><!-- x = a > b - 3 --></0>"
-#"<!--[\s\S]*?-->"     "<COMMENTS>this is a comment</COMMENTS>"
+"<!--[\s\S]*?-->"   G "<0><!-- comments --></0>"
+"<!--[\s\S]*?-->"   G "<0><!-- x = a > b - 3 --></0>"
+"<!--[\s\S]*?-->"     "<COMMENTS>this is a comment</COMMENTS>"
 "<!--[\p{Zs}\P{Zs}]*?-->"   G "<0><!-- comments --></0>"
 "<!--[\p{Zs}\P{Zs}]*?-->"   G "<0><!-- x = a > b - 3 --></0>"
 "<!--[\p{Zs}\P{Zs}]*?-->"     "<COMMENTS>this is a comment</COMMENTS>"
 "(\{\\f\d*)\\([^;]+;)"   G "<0>{\\f1\\fswiss\\fcharset0\\fprq2{\\*\\panose 020b0604020202020204}Arial;</0>"
 "(\{\\f\d*)\\([^;]+;)"   G "{\\f"
 "(\{\\f\d*)\\([^;]+;)"     "{f0fs20 some text}"
-#"</?([a-zA-Z][-A-Za-z\d\.]{0,71})(\s+(\S+)(\s*=\s*([-\w\.]{1,1024}|"[^"]{0,1024}"|'[^']{0,1024}'))?)*\s*>"   G "<0><IMG src='stars.gif' alt="space" height=1></0>"    # TODO:  \w in pattern
-#"</?([a-zA-Z][-A-Za-z\d\.]{0,71})(\s+(\S+)(\s*=\s*([-\w\.]{1,1024}|"[^"]{0,1024}"|'[^']{0,1024}'))?)*\s*>"     "this is not a tag"   # TODO:  \w in pattern
+#"</?([a-zA-Z][-A-Za-z\d\.]{0,71})(\s+(\S+)(\s*=\s*([-\w\.]{1,1024}|"[^"]{0,1024}"|'[^']{0,1024}'))?)*\s*>"   G '<0><IMG src='stars.gif' alt="space" height=1></0>'    # TODO:  Can't quote this pattern with the test syntax!
+#"</?([a-zA-Z][-A-Za-z\d\.]{0,71})(\s+(\S+)(\s*=\s*([-\w\.]{1,1024}|"[^"]{0,1024}"|'[^']{0,1024}'))?)*\s*>"     "this is not a tag"
 "^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$"   G "<0>12/30/2002</0>"
 "^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$"   G "<0>01/12/1998 13:30</0>"
 "^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$"   G "<0>01/28/2002 22:35:00</0>"
 "^[0-9A-Za-z_ ]+(.[jJ][pP][gG]|.[gG][iI][fF])$"     "bad.bad.gif"
 "^[0-9A-Za-z_ ]+(.[jJ][pP][gG]|.[gG][iI][fF])$"     "slash\gif."
 "<[^>\s]*\bauthor\b[^>]*>"   G '<0><author name="Daniel"></0>'
-#"<[^>\s]*\bauthor\b[^>]*>"   G "<0></sch:author></0>"
-#"<[^>\s]*\bauthor\b[^>]*>"   G '<0><pp:author name="Daniel"</0>'
+"<[^>\s]*\bauthor\b[^>]*>"   G "<0></sch:author></0>"
+# "<[^>\s]*\bauthor\b[^>]*>"   G '<0><pp:author name="Daniel"</0>'  #Debug  should work
 "<[^> ]*\bauthor\b[^>]*>"   G "<0></sch:author></0>"
-"<[^> ]*\bauthor\b[^>]*>"   G '<0><pp:author name="Daniel"></0>' 
+"<[^> ]*\bauthor\b[^>]*>"   G '<0><pp:author name="Daniel"></0>'
 "<[^>\s]*\bauthor\b[^>]*>"     "<other>"
 "<[^>\s]*\bauthor\b[^>]*>"     "</authors>"
 "<[^>\s]*\bauthor\b[^>]*>"     "<work>author</work>"
 "(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)"     "0"
 "(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)"     "0.0"
 "(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)"     ".0"
-#"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"   G "<0>Sacramento</0>"          #TODO: Octal
-#"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"   G "<0>San Francisco</0>"
-#"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"   G "<0>San Luis Obispo</0>"
-#"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"     "SanFrancisco"
-#"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"     "SanLuisObispo"
-#"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"     "San francisco"
-#"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$"   G "<0>{e02ff0e4-00ad-090A-c030-0d00a0008ba0}</0>"
-#"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$"   G "<0>e02ff0e4-00ad-090A-c030-0d00a0008ba0</0>"
-#"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$"     "0xe02ff0e400ad090Ac0300d00a0008ba0"
+"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"   G "<0>Sacramento</0>"
+"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"     "<0><2>San Francisco</2></0>"
+"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"     "<0><3>San Luis Obispo</3></0>"
+"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"     "SanFrancisco"
+"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"     "SanLuisObispo"
+"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$"     "San francisco"
+"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$"   G "<0>{e02ff0e4-00ad-090A-c030-0d00a0008ba0}</0>"
+"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$"   G "<0>e02ff0e4-00ad-090A-c030-0d00a0008ba0</0>"
+"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$"     "0xe02ff0e400ad090Ac0300d00a0008ba0"
 "^\{?[a-fA-F0-9]{8}-([a-fA-F0-9]{4}-){3}[a-fA-F0-9]{12}\}?$"   G "<0>{e02ff0e4-00ad-090A-c030-0d00a0008ba0}</0>"
 "^\{?[a-fA-F0-9]{8}-([a-fA-F0-9]{4}-){3}[a-fA-F0-9]{12}\}?$"   G "<0>e02ff0e4-00ad-090A-c030-0d00a0008ba0</0>"
 "^\{?[a-fA-F0-9]{8}-([a-fA-F0-9]{4}-){3}[a-fA-F0-9]{12}\}?$"     "0xe02ff0e400ad090Ac0300d00a0008ba0"
 "^((0[1-9])|(1[0-2]))\/(\d{2})$"   G "<0>01/04</0>"
 "^((0[1-9])|(1[0-2]))\/(\d{2})$"     "13/03"
 "^((0[1-9])|(1[0-2]))\/(\d{2})$"     "10/2003"
-#"<script[^>]*>[\w|\t|\r|\W]*</script>"   G "<0><script language=javascript>document.write("one");</script></0>"    # TODO:  \w in pattern
-#"<script[^>]*>[\w|\t|\r|\W]*</script>"     "--"   # TODO:  \w in pattern
-#"<script[^>]*>[\w|\t|\r|\W]*</script>"     "A-Z][a-z]+"   # TODO:  \w in pattern
-#"<script[^>]*>[\w|\t|\r|\W]*</script>"   G "<0>strFirstName</0>"   # TODO:  \w in pattern
-#"<script[^>]*>[\w|\t|\r|\W]*</script>"   G "<0>intAgeInYears</0>"   # TODO:  \w in pattern
-#"<script[^>]*>[\w|\t|\r|\W]*</script>"   G "<0>Where the Wild Things Are</0>"   # TODO:  \w in pattern
-#"<script[^>]*>[\w|\t|\r|\W]*</script>"     "123"   # TODO:  \w in pattern
-#"<script[^>]*>[\w|\t|\r|\W]*</script>"     "abc"   # TODO:  \w in pattern
-#"<script[^>]*>[\w|\t|\r|\W]*</script>"     "this has no caps in it"   # TODO:  \w in pattern
+"<script[^>]*>[\w|\t|\r|\W]*</script>"   G '<0><script language=javascript>document.write("one");</script></0>'
+"<script[^>]*>[\w|\t|\r|\W]*</script>"     "--"
+"<script[^>]*>[\w|\t|\r|\W]*</script>"     "A-Z][a-z]+"
+#"<script[^>]*>[\w|\t|\r|\W]*</script>"   G "<0>strFirstName</0>"   # Test Case damaged?
+#"<script[^>]*>[\w|\t|\r|\W]*</script>"   G "<0>intAgeInYears</0>"   # Test Case damaged?
+#"<script[^>]*>[\w|\t|\r|\W]*</script>"   G "<0>Where the Wild Things Are</0>"   #  Test Case damaged?
+"<script[^>]*>[\w|\t|\r|\W]*</script>"     "123"
+"<script[^>]*>[\w|\t|\r|\W]*</script>"     "abc"
+"<script[^>]*>[\w|\t|\r|\W]*</script>"     "this has no caps in it"
 "(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)"   G "<0>-0.050</0>"
 "(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)"   G "<0>-5.000</0>"
 "(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)"   G "<0>-5</0>"
 "^.{4,8}$"     "asd"
 "^.{4,8}$"     "123"
 "^.{4,8}$"     "asdfe12345"
-#"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"   G "<0>a@a.com</0>"    # TODO:  \w in pattern
-#"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"   G "<0>a@a.com.au</   # TODO:  \w in pattern0>"
-#"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"   G "<0>a@a.au</0>"   # TODO:  \w in pattern
-#"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"     "word"   # TODO:  \w in pattern
-#"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"     "word@"   # TODO:  \w in pattern
-#"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"     "@word"   # TODO:  \w in pattern
+"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"   G "<0>a@a.com</0>"
+"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"   G "<0>a@a.com.au</0>"
+"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"   G "<0>a@a.au</0>"
+"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"     "word"
+"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"     "word@"
+"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$"     "@word"
 "^\d{5}-\d{4}$"   G "<0>22222-3333</0>"
 "^\d{5}-\d{4}$"   G "<0>34545-2367</0>"
 "^\d{5}-\d{4}$"   G "<0>56334-2343</0>"
 "^[12345]$"     "6"
 "^[12345]$"     "-1"
 "^[12345]$"     "abc"
-#"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"   G "<0>joe@aol.com</0>"    # TODO:  \w in pattern
-#"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"   G "<0>joe@wrox.co.uk</0>"   # TODO:  \w in pattern
-#"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"   G "<0>joe@domain.info</0>"   # TODO:  \w in pattern
-#"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"     "a@b"   # TODO:  \w in pattern
-#"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"     "notanemail"   # TODO:  \w in pattern
-#"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"     "joe@@."   # TODO:  \w in pattern
+"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"   G "<0>joe@aol.com</0>"
+"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"   G "<0>joe@wrox.co.uk</0>"
+"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"   G "<0>joe@domain.info</0>"
+"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"     "a@b"
+"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"     "notanemail"
+"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$"     "joe@@."
 "^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$"   G "<0>joe@aol.com</0>"
 "^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$"   G "<0>ssmith@aspalliance.com</0>"
 "^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$"   G "<0>a@b.cc</0>"
 "^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$"     "joe@123aspx.com"
 "^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$"     "joe@web.info"
 "^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$"     "joe@company.co.uk"
-#"[\w-]+@([\w-]+\.)+[\w-]+"   G "<0>joe@aol.com</0>"   # TODO:  \w in pattern
-#"[\w-]+@([\w-]+\.)+[\w-]+"   G "<0>a@b.c</0>"   # TODO:  \w in pattern
-#"[\w-]+@([\w-]+\.)+[\w-]+"     "asdf"   # TODO:  \w in pattern
-#"[\w-]+@([\w-]+\.)+[\w-]+"     "1234"   # TODO:  \w in pattern
+"[\w-]+@([\w-]+\.)+[\w-]+"   G "<0>joe@aol.com</0>"
+"[\w-]+@([\w-]+\.)+[\w-]+"   G "<0>a@b.c</0>"
+"[\w-]+@([\w-]+\.)+[\w-]+"     "asdf"
+"[\w-]+@([\w-]+\.)+[\w-]+"     "1234"
 "\d{4}-?\d{4}-?\d{4}-?\d{4}"   G "<0>1234-1234-1234-1234</0>"
 "\d{4}-?\d{4}-?\d{4}-?\d{4}"   G "<0>1234123412341234</0>"
 "\d{4}-?\d{4}-?\d{4}-?\d{4}"     "1234123412345"