X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..51004dcb01e06fef634b61be77ed73dd61cb6db9:/icuSources/test/testdata/regextst.txt diff --git a/icuSources/test/testdata/regextst.txt b/icuSources/test/testdata/regextst.txt index da7dc051..53bd73a7 100644 --- a/icuSources/test/testdata/regextst.txt +++ b/icuSources/test/testdata/regextst.txt @@ -1,7 +1,7 @@ -# Copyright (c) 2001-2003 International Business Machines +# Copyright (c) 2001-2012 International Business Machines # Corporation and others. All Rights Reserved. # -# file: +# file: # # ICU regular expression test cases. # @@ -10,19 +10,183 @@ # = "" # = "" # the quotes on the pattern and match string can be " or ' or / -# = text, with the start and end of each +# = text, with the start and end of each # capture group tagged with .... The overall match, # if any, is group 0, as in <0>matched text -# = any combination of +# A region can be specified with ... tags. +# Standard ICU unescape will be applied, allowing \u, \U, etc. to appear. +# +# = any combination of # i case insensitive match # x free spacing and comments # s dot-matches-all mode -# m multi-line mode. $ and ^ match at embedded new-lines +# m multi-line mode. +# ($ and ^ match at embedded new-lines) +# D Unix Lines mode (only recognize 0x0a as new-line) +# Q UREGEX_LITERAL flag. Entire pattern is literal string. +# v If icu configured without break iteration, this +# regex test pattern should not compile. +# e set the UREGEX_ERROR_ON_UNKNOWN_ESCAPES flag # d dump the compiled pattern # t trace operation of match engine. +# 2-9 a digit between 2 and 9, specifies the number of +# times to execute find(). The expected results are +# for the last find() in the sequence. +# G Only check match / no match. Do not check capture groups. +# E Pattern compilation error expected +# L Use LookingAt() rather than find() +# M Use matches() rather than find(). +# +# a Use non-Anchoring Bounds. +# b Use Transparent Bounds. +# The a and b options only make a difference if +# a region has been specified in the string. +# z|Z hitEnd was expected(z) or not expected (Z). +# With neither, hitEnd is not checked. +# y|Y Require End expected(y) or not expected (Y). +# # White space must be present between the flags and the match string. # +# Look-ahead expressions +# +"(?!0{5})(\d{5})" "<0><1>00001zzzz" +"(?!0{5})(\d{5})z" "<0><1>00001zzzz" +"(?!0{5})(\d{5})(?!y)" "<0><1>00001zzzz" +"abc(?=def)" "<0>abcdef" +"(.*)(?=c)" "<0><1>abcdef" + +"(?:.*)(?=c)" "abcdef" +"(?:.*)(?=c)" b "<0>abcdef" # transparent bounds +"(?:.*)(?=c)" bM "<0>abcdef" # transparent bounds + +"(?:.*)(?=(c))" b "<0>ab<1>cdef" # Capture in look-ahead +"(?=(.)\1\1)\1" "abcc<0><1>dddefg" # Backrefs to look-ahead capture + +".(?!\p{L})" "abc<0>d " # Negated look-ahead +".(?!(\p{L}))" "abc<0>d " # Negated look-ahead, no capture + # visible outside of look-ahead +"and(?=roid)" L "<0>android" +"and(?=roid)" M "android" +"and(?=roid)" bM "<0>android" + +"and(?!roid)" L "<0>androix" +"and(?!roid)" L "android" + +"and(?!roid)" M "<0>android" # Opaque bounds +"and(?!roid)" bM "android" +"and(?!roid)" bM "<0>androix" + +# +# Negated Lookahead, various regions and region transparency +# +"abc(?!def)" "<0>abcxyz" +"abc(?!def)" "abcdef" +"abc(?!def)" "<0>abcdef" +"abc(?!def)" b "abcdef" +"abc(?!def)" b "<0>abcxyz" + +# +# Anchoring Bounds +# +"^def$" "abc<0>defghi" # anchoring (default) bounds +"^def$" a "abcdefghi" # non-anchoring bounds +"^def" a "<0>defghi" # non-anchoring bounds +"def$" a "abc<0>def" # non-anchoring bounds + +"^.*$" m "<0>line 1\n line 2" +"^.*$" m2 "line 1\n<0> line 2" +"^.*$" m3 "line 1\n line 2" +"^.*$" m "li<0>ne 1\n line 2" # anchoring bounds +"^.*$" m2 "line 1\n line 2" # anchoring bounds +"^.*$" am "line 1\n line 2" # non-anchoring bounds +"^.*$" am "li\n<0>ne \n1\n line 2" # non-anchoring bounds + +# +# HitEnd and RequireEnd for new-lines just before end-of-input +# +"xyz$" yz "<0>xyz\n" +"xyz$" yz "<0>xyz\x{d}\x{a}" + +"xyz$" myz "<0>xyz" # multi-line mode +"xyz$" mYZ "<0>xyz\n" +"xyz$" mYZ "<0>xyz\r\n" +"xyz$" mYZ "<0>xyz\x{85}abcd" + +"xyz$" Yz "xyz\nx" +"xyz$" Yz "xyza" +"xyz$" yz "<0>xyz" + +# +# HitEnd +# +"abcd" Lz "a" +"abcd" Lz "ab" +"abcd" Lz "abc" +"abcd" LZ "<0>abcd" +"abcd" LZ "<0>abcde" +"abcd" LZ "abcx" +"abcd" LZ "abx" +"abcd" Lzi "a" +"abcd" Lzi "ab" +"abcd" Lzi "abc" +"abcd" LZi "<0>abcd" +"abcd" LZi "<0>abcde" +"abcd" LZi "abcx" +"abcd" LZi "abx" + +# +# All Unicode line endings recognized. +# 0a, 0b, 0c, 0d, 0x85, 0x2028, 0x2029 +# Multi-line and non-multiline mode take different paths, so repeated tests. +# +"^def$" mYZ "abc\x{a}<0>def\x{a}ghi" +"^def$" mYZ "abc\x{b}<0>def\x{b}ghi" +"^def$" mYZ "abc\x{c}<0>def\x{c}ghi" +"^def$" mYZ "abc\x{d}<0>def\x{d}ghi" +"^def$" mYZ "abc\x{85}<0>def\x{85}ghi" +"^def$" mYZ "abc\x{2028}<0>def\x{2028}ghi" +"^def$" mYZ "abc\x{2029}<0>def\x{2029}ghi" +"^def$" mYZ "abc\r\n<0>def\r\nghi" + +"^def$" yz "<0>def\x{a}" +"^def$" yz "<0>def\x{b}" +"^def$" yz "<0>def\x{c}" +"^def$" yz "<0>def\x{d}" +"^def$" yz "<0>def\x{85}" +"^def$" yz "<0>def\x{2028}" +"^def$" yz "<0>def\x{2029}" +"^def$" yz "<0>def\r\n" +"^def$" yz "<0>def" + + +"^def$" "<0>def\x{2028" #TODO: should be an error of some sort. + +# +# UNIX_LINES mode +# +"abc$" D "<0>abc\n" +"abc$" D "abc\r" +"abc$" D "abc\u0085" +"a.b" D "<0>a\rb" +"a.b" D "a\nb" +"(?d)abc$" "<0>abc\n" +"(?d)abc$" "abc\r" +"abc$" mD "<0>abc\ndef" +"abc$" mD "abc\rdef" + +".*def" L "abc\r def xyz" # Normal mode, LookingAt() stops at \r +".*def" DL "<0>abc\r def xyz" # Unix Lines mode, \r not line end. +".*def" DL "abc\n def xyz" + +"(?d)a.b" "a\nb" +"(?d)a.b" "<0>a\rb" + +"^abc" m "xyz\r<0>abc" +"^abc" Dm "xyz\rabc" +"^abc" Dm "xyz\n<0>abc" + + # Capturing parens ".(..)." "<0>a<1>bcd" @@ -31,6 +195,7 @@ "(hello)|(goodbye)" "<0><2>goodbye" "abc( +( inner(X?) +) xyz)" "leading cruft <0>abc<1> <2> inner<3> xyz cruft" "\s*([ixsmdt]*)([:letter:]*)" "<0> <1>d<2> " +"(a|b)c*d" "a<0><1>bcd" # Non-capturing parens (?: stuff). Groups, but does not capture. "(?:abc)*(tail)" "<0>abcabcabc<1>tail" @@ -66,9 +231,11 @@ ".*\Ahello" "stuff\nhello" # don't match after embedded new-line. # \b \B +# ".*?\b(.).*" "<0> $%^&*( <1>hello123%^&*()gxx" "\ba\b" "-<0>a" "\by\b" "xy" +"[ \b]" "<0>b" # in a set, \b is a literal b. # Finds first chars of up to 5 words "(?:.*?\b(\w))?(?:.*?\b(\w))?(?:.*?\b(\w))?(?:.*?\b(\w))?(?:.*?\b(\w))?" "<0><1>Tthe <2>qick <3>brown <4>fox" @@ -78,18 +245,48 @@ "(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?.*" "<0> \u0301 \u0301<1>A\u0302BC\u0303\u0304<2> \u0305 \u0306<3>X\u0307Y\u0308" + +# +# Unicode word boundary mode +# +"(?w).*?\b" v "<0>hello, world" +"(?w).*?(\b.+?\b).*" v "<0><1> 123.45 " +"(?w).*?(\b\d.*?\b).*" v "<0> <1>123.45 " +".*?(\b.+?\b).*" "<0> <1>123.45 " +"(?w:.*?(\b\d.*?\b).*)" v "<0> <1>123.45 " +"(?w:.*?(\b.+?\b).*)" v "<0><1>don't " +"(?w:.+?(\b\S.+?\b).*)" v "<0> <1>don't " +"(?w:(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?).*)" v "<0><1>.<2> <3>,<4>:<5>$<6>37,000.50<7> " + +# +# Unicode word boundaries with Regions +# +"(?w).*?\b" v "abc<0>defghi" +"(?w).*?\b" v2 "abcdef<0>ghi" +"(?w).*?\b" v3 "abcdefghi" +#"(?w).*?\b" vb "abc<0>defghi" # TODO: bug. Ticket 6073 +#"(?w).*?\b" vb2 "abcdefghi" + + + # . does not match new-lines -"." "\u000a\u000d\u0085\u000c\u2028\u2029<0>X\u000aY" +"." "\u000a\u000d\u0085\u000c\u000b\u2028\u2029<0>X\u000aY" "A." "A\u000a "# no match # \d for decimal digits -"\d*" "<0>0123456789\u0660\u06F9\u0969\u0A66\u1369\u17E2\uFF10\U0001D7CE\U0001D7FFnon-digits" +"\d*" "<0>0123456789\u0660\u06F9\u0969\u0A66\u17E2\uFF10\U0001D7CE\U0001D7FFnon-digits" "\D+" "<0>non digits" "\D*(\d*)(\D*)" "<0>non-digits<1>3456666<2>more non digits" # \Q...\E quote mode "hel\Qlo, worl\Ed" "<0>hello, world" "\Q$*^^(*)?\A\E(a*)" "<0>$*^^(*)?\\A<1>aaaaaaaaaaaaaaa" +"[abc\Q]\r\E]+" "<0>aaaccc]]]\\\\\\\r..." # \Q ... \E escape in a [set] + +# UREGEX_LITERAL - entire pattern is a literal string, no escapes recognized. +# Note that data strings in test cases still get escape processing. +"abc\an\r\E\\abcd\u0031bye" Q "lead<0>abc\\an\\r\\E\\\\abcd\\u0031byeextra" +"case insensitive \\ (l)iteral" Qi "stuff!! <0>cAsE InSenSiTiVE \\\\ (L)ITeral" # \S and \s space characters "\s+" "not_space<0> \t \r \n \u3000 \u2004 \u2028 \u2029xyz" @@ -108,20 +305,20 @@ ".*^(Hello)" " Hello Hello Hello Hello Goodbye"# No Match # $ matches only at end of line, or before a newline preceding the end of line -".*?(Goodbye)$" "<0>Hello Goodbye Goodbye <1>Goodbye" -".*?(Goodbye)" "<0>Hello <1>Goodbye Goodbye Goodbye" -".*?(Goodbye)$" "Hello Goodbye> Goodbye Goodbye "# No Match +".*?(Goodbye)$" zy "<0>Hello Goodbye Goodbye <1>Goodbye" +".*?(Goodbye)" ZY "<0>Hello <1>Goodbye Goodbye Goodbye" +".*?(Goodbye)$" z "Hello Goodbye> Goodbye Goodbye "# No Match -".*?(Goodbye)$" "<0>Hello Goodbye Goodbye <1>Goodbye\n" -".*?(Goodbye)$" "<0>Hello Goodbye Goodbye <1>Goodbye\n" -".*?(Goodbye)$" "<0>Hello Goodbye Goodbye <1>Goodbye\r\n" -".*?(Goodbye)$" "Hello Goodbye Goodbye Goodbye\n\n"# No Match +".*?(Goodbye)$" zy "<0>Hello Goodbye Goodbye <1>Goodbye\n" +".*?(Goodbye)$" zy "<0>Hello Goodbye Goodbye <1>Goodbye\n" +".*?(Goodbye)$" zy "<0>Hello Goodbye Goodbye <1>Goodbye\r\n" +".*?(Goodbye)$" z "Hello Goodbye Goodbye Goodbye\n\n"# No Match # \Z matches at end of input, like $ with default flags. -".*?(Goodbye)\Z" "<0>Hello Goodbye Goodbye <1>Goodbye" -".*?(Goodbye)" "<0>Hello <1>Goodbye Goodbye Goodbye" -".*?(Goodbye)\Z" "Hello Goodbye> Goodbye Goodbye "# No Match -"here$" "here\nthe end"# No Match +".*?(Goodbye)\Z" zy "<0>Hello Goodbye Goodbye <1>Goodbye" +".*?(Goodbye)" ZY "<0>Hello <1>Goodbye Goodbye Goodbye" +".*?(Goodbye)\Z" z "Hello Goodbye> Goodbye Goodbye "# No Match +"here$" z "here\nthe end"# No Match ".*?(Goodbye)\Z" "<0>Hello Goodbye Goodbye <1>Goodbye\n" ".*?(Goodbye)\Z" "<0>Hello Goodbye Goodbye <1>Goodbye\n" @@ -131,12 +328,13 @@ # \z matches only at the end of string. # no special treatment of new lines. # no dependencies on flag settings. -".*?(Goodbye)\z" "<0>Hello Goodbye Goodbye <1>Goodbye" -".*?(Goodbye)\z" "Hello Goodbye Goodbye Goodbye "# No Match -"here$" "here\nthe end"# No Match +".*?(Goodbye)\z" zy "<0>Hello Goodbye Goodbye <1>Goodbye" +".*?(Goodbye)\z" z "Hello Goodbye Goodbye Goodbye "# No Match +"here$" z "here\nthe end"# No Match -".*?(Goodbye)\z" "Hello Goodbye Goodbye Goodbye\n"# No Match -".*?(Goodbye)\n\z" "<0>Hello Goodbye Goodbye <1>Goodbye\n" +".*?(Goodbye)\z" z "Hello Goodbye Goodbye Goodbye\n"# No Match +".*?(Goodbye)\n\z" zy "<0>Hello Goodbye Goodbye <1>Goodbye\n" +"abc\z|def" ZY "abc<0>def" # (?# comment) doesn't muck up pattern "Hello (?# this is a comment) world" " <0>Hello world..." @@ -160,6 +358,61 @@ "(x?)*xyz" "<0>xx<1>xyz" # Sligthly wierd, but correct. The "last" time through (x?), # it matches the empty string. +# Set expressions, basic operators and escapes work +# +"[\d]+" "<0>0123abc/.," +"[^\d]+" "0123<0>abc/.," +"[\D]+" "0123<0>abc/.," +"[^\D]+" "<0>0123abc/.," + +"[\s]+" "<0> \tabc/.," +"[^\s]+" " \t<0>abc/.," +"[\S]+" " \t<0>abc/.," +"[^\S]+" "<0> \tabc/.," + +"[\w]+" "<0>abc123 .,;" +"[^\w]+" "abc123<0> .,;" +"[\W]+" "abc123<0> .,;" +"[^\W]+" "<0>abc123 .,;" + +"[\z]+" "abc<0>zzzdef" # \z has no special meaning +"[^\z]+" "<0>abczzzdef" +"[\^]+" "abc<0>^^" +"[^\^]+" "<0>abc^^" + +"[\u0041c]+" "<0>AcAcdef" +"[\U00010002]+" "<0>\ud800\udc02\U00010003" +"[^\U00010002]+" "<0>Hello\x{10002}" +"[\x61b]+" "<0>ababcde" +#"[\x6z]+" "\x06" #TODO: single hex digits should fail +"[\x{9}\x{75}\x{6d6}\x{6ba6}\x{6146B}\x{10ffe3}]+" "<0>\u0009\u0075\u06d6\u6ba6\U0006146B\U0010ffe3abc" + +"[\N{LATIN CAPITAL LETTER TONE SIX}ab\N{VARIATION SELECTOR-70} ]+" "x<0> \u0184\U000E0135 abc" +"[\N{LATIN SMALL LETTER C}-\N{LATIN SMALL LETTER F}]+" "ab<0>cdefghi" + + + +# +# [set expressions], check the precedence of '-', '&', '--', '&&' +# '-' and '&', for compatibility with ICU UnicodeSet, have the same +# precedence as the implicit Union between adjacent items. +# '--' and '&&', for compatibility with Java, have lower precedence than +# the implicit Union operations. '--' and '&&' themselves +# have the same precedence, and group left to right. +# +"[[a-m]-[f-w]p]+" "<0>depfgwxyz" +"[^[a-m]-[f-w]p]+" "dep<0>fgwxyz" + +"[[a-m]--[f-w]p]+" "<0>depfgwxyz" +"[^[a-m]--[f-w]p]+" "de<0>pfgwxyz" + +"[[a-m]&[e-s]w]+" "<0>efmwadnst" +"[^[a-m]&[e-s]w]+" "efmw<0>adnst" + +"[[a-m]&[e-s]]+" "<0>efmadnst" + + + # {min,max} iteration qualifier "A{3}BC" "<0>AAABC" @@ -226,9 +479,43 @@ "ab(?:c|(d?))(\1)" "<0>ab<1><2>e" "ab(?:c|(d?))(\1)" "<0>ab<1><2>" +# Back References that hit/don't hit end +"(abcd) \1" z "abcd abc" +"(abcd) \1" Z "<0><1>abcd abcd" +"(abcd) \1" Z "<0><1>abcd abcd " + +# Case Insensitve back references that hit/don't hit end. +"(abcd) \1" zi "abcd abc" +"(abcd) \1" Zi "<0><1>abcd ABCD" +"(abcd) \1" Zi "<0><1>abcd ABCD " + +# Back references that hit/don't hit boundary limits. + +"(abcd) \1" z "abcd abcd " +"(abcd) \1" Z "<0><1>abcd abcd " +"(abcd) \1" Z "<0><1>abcd abcd " + +"(abcd) \1" zi "abcd abcd " +"(abcd) \1" Zi "<0><1>abcd abcd " +"(abcd) \1" Zi "<0><1>abcd abcd " + +# Back reference that fails match near the end of input without actually hitting the end. +"(abcd) \1" ZL "abcd abd" +"(abcd) \1" ZLi "abcd abd" + +# Back reference to a zero-length match. They are always a successful match. +"ab(x?)cd(\1)ef" "<0>ab<1>cd<2>ef" +"ab(x?)cd(\1)ef" i "<0>ab<1>cd<2>ef" + +# Back refs to capture groups that didn't participate in the match. +"ab(?:(c)|(d))\1" "abde" +"ab(?:(c)|(d))\1" "<0>ab<1>cce" +"ab(?:(c)|(d))\1" i "abde" +"ab(?:(c)|(d))\1" i "<0>ab<1>cce" + # Case Insensitive -"aBc" i "<0>ABC" -"a[^bc]d" i "ABD" +"aBc" i "<0>ABC" +"a[^bc]d" i "ABD" '((((((((((a))))))))))\10' i "<0><1><2><3><4><5><6><7><8><9><10>AA" "(?:(?i)a)b" "<0>Ab" @@ -239,15 +526,36 @@ "a b" "ab" "abc " "abc" "abc " "<0>abc " -"ab[cd e]z" "<0>ab z" +"ab[cd e]z" "<0>ab z" "ab\ c" "<0>ab c " "ab c" "<0>ab c " "ab c" x "ab c " "ab\ c" x "<0>ab c " +# +# Pattern Flags +# +"(?u)abc" "<0>abc" +"(?-u)abc" "<0>abc" + +# +# \c escapes (Control-whatever) +# +"\cA" "<0>\u0001" +"\ca" "<0>\u0001" +"\c\x" "<0>\u001cx" + #Multi-line mode -'b\s^' m "a\nb\n" +'b\s^' m "a\nb\n" +"(?m)^abc$" "abc \n abc\n<0>abc\nabc" +"(?m)^abc$" 2 "abc \n abc\nabc\n<0>abc" +"^abc$" 2 "abc \n abc\nabc\nabc" + +# Empty and full range +"[\u0000-\U0010ffff]+" "<0>abc\u0000\uffff\U00010000\U0010ffffzz" +"[^\u0000-\U0010ffff]" "abc\u0000\uffff\U00010000\U0010ffffzz" +"[^a--a]+" "<0>abc\u0000\uffff\U00010000\U0010ffffzz" # Free-spacing mode "a b c # this is a comment" x "<0>abc " @@ -296,8 +604,8 @@ "abc.*$" "<0>abcdef" "abc(.*)" "<0>abc<1>def" "abc(.*)" "<0>abc<1>" -"abc.*" "<0>abc\ndef" -"abc.*" s "<0>abc\ndef" +"abc.*" "<0>abc\ndef" +"abc.*" s "<0>abc\ndef" "abc.*$" s "<0>abc\ndef" "abc.*$" "abc\ndef" "abc.*$" m "<0>abc\ndef" @@ -337,9 +645,16 @@ "ab\x09w" "<0>ab\u0009w" "ab\xabcdc" "<0>ab\u00abcdc" "ab\x{abcd}c" "<0>ab\uabcdc" -"ab\x{101234}c" "<0>ab\U00101234c" +"ab\x{101234}c" "<0>ab\U00101234c" "abα" "<0>abα" +# +# Octal Escaping. This conforms to Java conventions, not Perl. +"\0101\00\03\073\0154\01442" "<0>A\u0000\u0003\u003b\u006c\u0064\u0032" +"\0776" "<0>\u003f\u0036" # overflow, the 6 is literal. +"\0376xyz" "<0>\u00fexyz" +"\08" E "<0>\u00008" +"\0" E "x" # # \u Surrogate Pairs @@ -348,14 +663,492 @@ "\ud800\udc00*" "<0>\U00010000\U00010000\U00010000\U00010001" "\ud800\ud800\udc00" "<0>\ud800\U00010000\U00010000\U00010000\U00010001" "(\ud800)(\udc00)" "\U00010000" +"\U00010001+" "<0>\U00010001\U00010001\udc01" + +# +# hitEnd with find() +# +"abc" Z "aa<0>abc abcab" +"abc" 2Z "aaabc <0>abcab" +"abc" 3z "aa>abc abcab" + +# +# \ escaping +# +"abc\jkl" "<0>abcjkl" # escape of a non-special letter is just itself. +"abc[ \j]kl" "<0>abcjkl" + +# +# Bug xxxx +# +"(?:\-|(\-?\d+\d\d\d))?(?:\-|\-(\d\d))?(?:\-|\-(\d\d))?(T)?(?:(\d\d):(\d\d):(\d\d)(\.\d+)?)?(?:(?:((?:\+|\-)\d\d):(\d\d))|(Z))?" MG "<0>-1234-21-31T41:51:61.789+71:81" + + +# +# A random, complex, meaningless pattern that should at least compile +# +"(?![^\\G)(?![^|\]\070\ne\{\t\[\053\?\\\x51\a\075\0023-\[&&[|\022-\xEA\00-\u41C2&&[^|a-\xCC&&[^\037\uECB3\u3D9A\x31\|\[^\016\r\{\,\uA29D\034\02[\02-\[|\t\056\uF599\x62\e\<\032\uF0AC\0026\0205Q\|\\\06\0164[|\057-\u7A98&&[\061-g|\|\0276\n\042\011\e\xE8\x64B\04\u6D0EDW^\p{Lower}]]]]?)(?<=[^\n\\\t\u8E13\,\0114\u656E\xA5\]&&[\03-\026|\uF39D\01\{i\u3BC2\u14FE]])(?<=[^|\uAE62\054H\|\}&&^\p{Space}])(?sxx)(?<=[\f\006\a\r\xB4]{1,5})|(?x-xd:^{5}+)()" "<0>abc" + +# +# Bug 3225 + +"1|9" "<0>1" +"1|9" "<0>9" +"1*|9" "<0>1" +"1*|9" "<0>9" + +"(?:a|ac)d" "<0>acd" +"a|ac" "<0>ac" +# +# Bug 3320 +# +"(a([^ ]+)){0,} (c)" "<0><1>a<2>b <3>c " +"(a([^ ]+))* (c)" "<0><1>a<2>b <3>c " # +# Bug 3436 +# +"(.*?) *$" "<0><1>test " + +# +# Bug 4034 +# +"\D" "<0>ABC\u00ffDEF" +"\d" "ABC\u00ffDEF" +"\D" "<0>\u00ffDEF" +"\d" "\u00ffDEF" +"\D" "123<0>\u00ffDEF" +"\D" "<0>\u0100DEF" +"\D" "123<0>\u0100DEF" + +# +#bug 4024, new line sequence handling +# +"(?m)^" "<0>AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a" +"(?m)^" 2 "AA\u000d\u000a<0>BB\u000d\u000aCC\u000d\u000a" +"(?m)^" 3 "AA\u000d\u000aBB\u000d\u000a<0>CC\u000d\u000a" +"(?m)^" 4 "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a" + +"(?m)$" "AA<0>\u000d\u000aBB\u000d\u000aCC\u000d\u000a" +"(?m)$" 2 "AA\u000d\u000aBB<0>\u000d\u000aCC\u000d\u000a" +"(?m)$" 3 "AA\u000d\u000aBB\u000d\u000aCC<0>\u000d\u000a" +"(?m)$" 4 "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a<0>" +"(?m)$" 5 "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a" + +"$" "AA\u000d\u000aBB\u000d\u000aCC<0>\u000d\u000a" +"$" 2 "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a<0>" +"$" 3 "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a" + +"$" "\u000a\u0000a<0>\u000a" +"$" 2 "\u000a\u0000a\u000a<0>" +"$" 3 "\u000a\u0000a\u000a" + +"$" "<0>" +"$" 2 "" + +"$" "<0>\u000a" +"$" 2 "\u000a<0>" +"$" 3 "\u000a" + +"^" "<0>" +"^" 2 "" + +"\Z" "<0>" +"\Z" 2 "" +"\Z" 2 "\u000a<0>" +"\Z" "<0>\u000d\u000a" +"\Z" 2 "\u000d\u000a<0>" + + +# No matching ^ at interior new-lines if not in multi-line mode. +"^" "<0>AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a" +"^" 2 "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a" + +# +# Dot-matches-any mode, and stopping at new-lines if off. +# +"." "<0>123\u000aXYZ" +"." 2 "1<0>23\u000aXYZ" +"." 3 "12<0>3\u000aXYZ" +"." 4 "123\u000a<0>XYZ" # . doesn't match newlines +"." 4 "123\u000b<0>XYZ" +"." 4 "123\u000c<0>XYZ" +"." 4 "123\u000d<0>XYZ" +"." 4 "123\u000d\u000a<0>XYZ" +"." 4 "123\u0085<0>XYZ" +"." 4 "123\u2028<0>XYZ" +"." 4 "123\u2029<0>XYZ" +"." 4s "123<0>\u000aXYZ" # . matches any +"." 4s "123<0>\u000bXYZ" +"." 4s "123<0>\u000cXYZ" +"." 4s "123<0>\u000dXYZ" +"." 4s "123<0>\u000d\u000aXYZ" +"." 4s "123<0>\u0085XYZ" +"." 4s "123<0>\u2028XYZ" +"." 4s "123<0>\u2029XYZ" +".{6}" "123\u000a\u000dXYZ" +".{6}" s "<0>123\u000a\u000dXY" + + +# +# Ranges +# +".*" "abc<0>defghi" +"a" "aaa<0>aaaaaa" +"a" 2 "aaaa<0>aaaaa" +"a" 3 "aaaaa<0>aaaa" +"a" 4 "aaaaaaaaa" +"a" "aaa<0>aaaaaa" + +# +# [set] parsing, systematically run through all of the parser states. +# +# +"[def]+" "abc<0>ddeeffghi" # set-open +"[^def]+" "<0>abcdefghi" +"[:digit:]+" "abc<0>123def" +"[:^digit:]+" "<0>abc123def" +"[\u005edef]+" "abc<0>de^fghi" + +"[]]+" "abc<0>]]][def" # set-open2 +"[^]]+" "<0>abc]]][def" + +"[:Lu:]+" "abc<0>ABCdef" # set-posix +"[:Lu]+" "abc<0>uL::Lu" +"[:^Lu]+" "abc<0>uL:^:Lu" +"[:]+" "abc<0>:::def" +"[:whats this:]" E " " +"[--]+" dE "-------" + +"[[nested]]+" "xyz[<0>nnetsteed]abc" #set-start +"[\x{41}]+" "CB<0>AAZYX" +"[\[\]\\]+" "&*<0>[]\\..." +"[*({<]+" "^&<0>{{(<<*)))" + + +"[-def]+" "abc<0>def-ef-dxyz" # set-start-dash +"[abc[--def]]" E " " + +"[x[&def]]+" "abc<0>def&ghi" # set-start-amp +"[&& is bad at start]" E " " + +"[abc" E " " # set-after-lit +"[def]]" "abcdef" +"[def]]" "abcde<0>f]]" + +"[[def][ghi]]+" "abc]<0>defghi[xyz" # set-after-set +"[[def]ghi]+" "abc]<0>defghi[xyz" +"[[[[[[[[[[[abc]" E " " +"[[abc]\p{Lu}]+" "def<0>abcABCxyz" + +"[d-f]+" "abc<0>defghi" # set-after-range +"[d-f[x-z]]+" "abc<0>defxyzzzgw" +"[\s\d]+" "abc<0> 123def" +"[d-f\d]+" "abc<0>def123ghi" +"[d-fr-t]+" "abc<0>defrstuvw" + +"[abc--]" E " " # set-after-op +"[[def]&&]" E " " +"[-abcd---]+" "<0>abc--" #[-abcd]--[-] +"[&abcd&&&ac]+" "b<0>ac&&cad" #[&abcd]&&[&ac] + +"[[abcd]&[ac]]+" "b<0>acacd" # set-set-amp +"[[abcd]&&[ac]]+" "b<0>acacd" +"[[abcd]&&ac]+" "b<0>acacd" +"[[abcd]&ac]+" "<0>bacacd&&&" + +"[abcd&[ac]]+" "<0>bacacd&&&" #set-lit-amp +"[abcd&&[ac]]+" "b<0>acacd" +"[abcd&&ac]+" "b<0>acacd" + +"[[abcd]-[ac]]+" "a<0>bdbdc" # set-set-dash +"[[abcd]--[ac]]+" "a<0>bdbdc" +"[[abcd]--ac]+" "a<0>bdbdc" +"[[abcd]-ac]+" "<0>bacacd---" + +"[a-d--[b-c]]+" "b<0>adadc" # set-range-dash +"[a-d--b-c]+" "b<0>adadc" +"[a-d-[b-c]]+" "<0>bad-adc" +"[a-d-b-c]+" "<0>bad-adc" +"[\w--[b-c]]+" "b<0>adadc" +"[\w--b-c]+" "b<0>adadc" +"[\w-[b-c]]+" "<0>bad-adc" +"[\w-b-c]+" "<0>bad-adc" + +"[a-d&&[b-c]]+" "a<0>bcbcd" # set-range-amp +"[a-d&&b-c]+" "a<0>bcbcd" +"[a-d&[b-c]]+" "<0>abc&bcd" +"[a-d&b-c]+" "<0>abc&bcd" + +"[abcd--bc]+" "b<0>addac" # set-lit-dash +"[abcd--[bc]]+" "b<0>addac" +"[abcd-[bc]]+" "<0>bad--dacxyz" +"[abcd-]+" "<0>bad--dacxyz" + +"[abcd-\s]+" E "xyz<0>abcd --xyz" # set-lit-dash-esc +"[abcd-\N{LATIN SMALL LETTER G}]+" "xyz-<0>abcdefghij-" +"[bcd-\{]+" "a<0>bcdefyz{|}" + +"[\p{Ll}]+" "ABC<0>abc^&*&" # set-escape +"[\P{Ll}]+" "abc<0>ABC^&*&xyz" +"[\N{LATIN SMALL LETTER Q}]+" "mnop<0>qqqrst" +"[\sa]+" "cb<0>a a (*&" +"[\S]+" " <0>hello " +"[\w]+" " <0>hello_world! " +"[\W]+" "a<0> *$%#,hello " +"[\d]+" "abc<0>123def" +"[\D]+" "123<0>abc567" +"[\$\#]+" "123<0>$#$#\\" + +# +# Try each of the Java compatibility properties. +# These are checked here, while normal Unicode properties aren't, because +# these Java compatibility properties are implemented directly by regexp, while other +# properties are handled by ICU's Property and UnicodeSet APIs. +# +# These tests are only to verify that the names are recognized and the +# implementation isn't dead. They are not intended to verify that the +# function defintions are 100% correct. +# +"[:InBasic Latin:]+" "ΓΔΕΖΗΘ<0>hello, world.ニヌネノハバパ" +"[:^InBasic Latin:]+" "<0>ΓΔΕΖΗΘhello, world.ニヌネノハバパ" +"\p{InBasicLatin}+" "ΓΔΕΖΗΘ<0>hello, world.ニヌネノハバパ" +"\P{InBasicLatin}+" "<0>ΓΔΕΖΗΘhello, world.ニヌネノハバパ" +"\p{InGreek}+" "<0>ΓΔΕΖΗΘhello, world.ニヌネノハバパ" +"\p{InCombining Marks for Symbols}" "<0>\u20d0" +"\p{Incombiningmarksforsymbols}" "<0>\u20d0" + + +"\p{javaDefined}+" "\uffff<0>abcd\U00045678" +"\p{javaDigit}+" "abc<0>1234xyz" +"\p{javaIdentifierIgnorable}+" "abc<0>\u0000\u000e\u009fxyz" +"\p{javaISOControl}+" "abc<0>\u0000\u000d\u0083xyz" +"\p{javaJavaIdentifierPart}+" "#@!<0>abc123_$;" +"\p{javaJavaIdentifierStart}+" "123\u0301<0>abc$_%^&" +"\p{javaLetter}+" "123<0>abcDEF&*()(" +"\p{javaLetterOrDigit}+" "$%^&*<0>123abcகஙசஜஞ☺♘♚☔☎♬⚄⚡" +"\p{javaLowerCase}+" "ABC<0>def&^%#:=" +"\p{javaMirrored}+" "ab$%<0>(){}[]xyz" +"\p{javaSpaceChar}+" "abc<0> \u00ao\u2028!@#" +"\p{javaSupplementaryCodePoint}+" "abc\uffff<0>\U00010000\U0010ffff\u0000" +"\p{javaTitleCase}+" "abCE<0>Džῌᾨ123" +"\p{javaUnicodeIdentifierStart}+" "123<0>abcⅣ%^&&*" +"\p{javaUnicodeIdentifierPart}+" "%&&^<0>abc123\u0301\u0002..." +"\p{javaUpperCase}+" "abc<0>ABC123" +"\p{javaValidCodePoint}+" "<0>\u0000abc\ud800 unpaired \udfff |\U0010ffff" +"\p{javaWhitespace}+" "abc\u00a0\u2007\u202f<0> \u0009\u001c\u001f\u202842" +"\p{all}+" "<0>123\u0000\U0010ffff" +"\P{all}+" "123\u0000\U0010ffff" + +# [:word:] is implemented directly by regexp. Not a java compat property, but PCRE and others. + +"[:word:]+" ".??$<0>abc123ΓΔΕΖΗ_%%%" +"\P{WORD}+" "<0>.??$abc123ΓΔΕΖΗ_%%%" + +# +# Errors on unrecognized ASCII letter escape sequences. +# +"[abc\Y]+" "<0>abcY" +"[abc\Y]+" eE "<0>abcY" + +"(?:a|b|c|\Y)+" "<0>abcY" +"(?:a|b|c|\Y)+" eE "<0>abcY" + +"\Q\Y\E" e "<0>\\Y" + +# +# Reported problem +# +"[a-\w]" E "x" + +# +# Bug 4045 +# +"A*" "<0>AAAA" +"A*" 2 "AAAA<0>" +"A*" 3 "AAAA" +"A*" 4 "AAAA" +"A*" 5 "AAAA" +"A*" 6 "AAAA" +"A*" "<0>" +"A*" 2 "" +"A*" 3 "" +"A*" 4 "" +"A*" 5 "" + +# +# Bug 4046 +# +"(?m)^" "<0>AA\u000dBB\u000dCC\u000d" +"(?m)^" 2 "AA\u000d<0>BB\u000dCC\u000d" +"(?m)^" 3 "AA\u000dBB\u000d<0>CC\u000d" +"(?m)^" 4 "AA\u000dBB\u000dCC\u000d" +"(?m)^" 5 "AA\u000dBB\u000dCC\u000d" +"(?m)^" 6 "AA\u000dBB\u000dCC\u000d" + +"(?m)^" "<0>AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a" +"(?m)^" 2 "AA\u000d\u000a<0>BB\u000d\u000aCC\u000d\u000a" +"(?m)^" 3 "AA\u000d\u000aBB\u000d\u000a<0>CC\u000d\u000a" +"(?m)^" 4 "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a" + +# +# Bug 4059 +# +"\w+" "<0>イチロー" +"\b....\b." "<0>イチロー?" + + +# +# Bug 4058 ICU Unicode Set patterns have an odd feature - +# A $ as the last character before the close bracket means match +# a \uffff, which means off the end of the string in transliterators. +# Didn't make sense for regular expressions, and is now fixed. +# +"[\$](P|C|D);" "<0>$<1>P;" +"[$](P|C|D);" "<0>$<1>P;" +"[$$](P|C|D);" "<0>$<1>P;" + +# +# bug 4888 Flag settings lost in some cases. +# +"((a){2})|(#)" is "no" +"((a){2})|(#)" is "<0><1>a<2>a#" +"((a){2})|(#)" is "a<0><3>#" + +"((a|b){2})|c" is "<0>c" +"((a|b){2})|c" is "<0>C" +"((a|b){2})|c" s "C" + +# +# bug 5617 ZWJ \u200d shoudn't cause word boundaries +# +".+?\b" "<0> \u0935\u0915\u094D\u200D\u0924\u0947 " +".+?\b" 2 " <0>\u0935\u0915\u094D\u200D\u0924\u0947 " +".+?\b" 3 " \u0935\u0915\u094D\u200D\u0924\u0947 " + +# +# bug 5386 "^.*$" should match empty input +# +"^.*$" "<0>" +"^.*$" m "<0>" +"^.*$" "<0>\n" +"(?s)^.*$" "<0>\n" + +# +# bug 5386 Empty pattern and empty input should match. +# +"" "<0>abc" +"" "<0>" + +# +# bug 5386 Range upper and lower bounds can be equal +# +"[a-a]" "<0>a" + +# +# bug 5386 $* should not fail, should match empty string. +# +"$*" "<0>abc" + +# +# bug 5386 \Q ... \E escaping problem +# +"[a-z\Q-$\E]+" "QE<0>abc-def$." + +# More reported 5386 Java comaptibility failures +# +"[^]*abb]*" "<0>kkkk" +"\xa" "huh" # Java would like to be warned. +"^.*$" "<0>" + +# +# bug 5386 Empty left alternation should produce a zero length match. +# +"|a" "<0>a" +"$|ab" "<0>ab" +"$|ba" "ab<0>" + +# +# bug 5386 Java compatibility for set expressions +# +"[a-z&&[cde]]+" "ab<0>cdefg" + +# +# bug 6019 matches() needs to backtrack and check for a longer match if the +# first match(es) found don't match the entire input. +# +"a?|b" "<0>b" +"a?|b" M "<0>b" +"a?|.*?u|stuff|d" M "<0>stuff" +"a?|.*?(u)|stuff|d" M "<0>stuff<1>u" +"a+?" "<0>aaaaaaaaaaaaa" +"a+?" M "<0>aaaaaaaaaaaaa" + +# +# Bug 7724. Expression to validate zip codes. +# +"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?" "<0><1>94040<2>-3344" +"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?" "94040-0000" +"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?" "00000-3344" + +# +# Bug 8666. Assertion failure on match, bad operand to JMP_SAV_X opcode. +# +"((.??)+|A)*" "<0><1><2>AAAAABBBBBCCCCCDDDDEEEEE" + +# +# Bug 8826. Incorrect results with case insensitive matches. +# +"AS(X)" i "aßx" +"AS.*" i "aßx" # Expansion of sharp s can't split between pattern terms. +"ASßS" i "<0>aßß" # All one literal string, does match. +"ASß{1}S" i "aßß" # Pattern with terms, no match. +"aßx" i "<0>assx" +"aßx" i "<0>ASSX" +"aßx" i "<0>aßx" +"ASS(.)" i "<0>aß<1>x" + +# Case Insensitive, probe some corner cases. +"ass+" i "aß" # Second 's' in pattern is qualified, can't combine with first. +"as+" i "aß" +"aßs" i "as" # Can't match half of a ß +"aß+" i "<0>asssssssss" +"aß+" i "<0>assßSssSSSs" +"a(ß?)+" i "<0>assssssss<1>s" +"a(ß?)+" i "<0>a<1>zzzzzzzzs" + +"\U00010400" i "<0>\U00010428" # case folded supplemental code point. + +"sstuff" i "<0>ßtuff" # exercise optimizations on what chars can start a match. +"sstuff" i "s<0>ßtuff" # exercise optimizations on what chars can start a match. +"ßtuff" i "s<0>sstuff" +"ßtuff" i "s<0>Sstuff" + +"a(..)\1" i "<0>A<1>bcBCdef" +"(ß)\1" i "aa<0><1>ssßzz" # Case insensitive back reference +"..(.)\1" i "<0>aa<1>ßss" +"ab(..)\1" i "xx<0>ab<1>ssßss" + +" (ss) ((\1.*)|(.*))" i "<0> <1>ss <2><4>sß" # The back reference 'ss' must not match in 'sß' + +# Bug 9057 +# \u200c and \u200d should be word characters. +# +"\w+" " <0>abc\u200cdef\u200dghi " +"\w+" i " <0>abc\u200cdef\u200dghi " +"[\w]+" " <0>abc\u200cdef\u200dghi " +"[\w]+" i " <0>abc\u200cdef\u200dghi " + +# Bug 9283 +# uregex_open fails for look-behind assertion + case-insensitive + +"(ab)?(?<=ab)cd|ef" i "<0><1>abcd" + # Random debugging, Temporary # -#"^(?:a?b?)*$" "a--" -"^(?:a?b?)*$" "a--" +#"^(?:a?b?)*$" "a--" "This is a string with (?:one |two |three )endings" "<0>This is a string with two endings" "((?:a|b|c)whoop-dee-do) | [jkl]|zed" "x" @@ -495,7 +1288,7 @@ "^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$" G "<0>ftp://ftp.blah.co.uk:2828/blah%20blah.gif" "^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$" G "<0>https://blah.gov/blah-blah.as" "^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$" "www.blah.com" -"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$" "http://www.blah.com/I have spaces!" +"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$" "http://www.blah.com/I have spaces!" "^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$" "ftp://blah_underscore/[nope]" "^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2})$|^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2}\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$" G "<0>12/01/2002" "^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2})$|^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2}\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$" G "<0>12/01/2002 12:32:10" @@ -773,18 +1566,18 @@ "^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$" "10.0.5.4" "^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$" "192.168.0.1" "^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$" "my ip address" -#"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" G "<0>foo@foo.com" # TODO: \w in pattern -#"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" G "<0>foo@foo-foo.com.au" # TODO: \w in pattern -#"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" G "<0>foo@foo.foo.info" # TODO: \w in pattern -#"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" "foo@.com" # TODO: \w in pattern -#"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" "foo@foo..com" # TODO: \w in pattern -#"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" "foo@me@.com" # TODO: \w in pattern -#"/\*[\d\D]*?\*/" G "<0>/* my comment */" -#"/\*[\d\D]*?\*/" G "<0>/* my multiline comment */" -#"/\*[\d\D]*?\*/" G "<0>/* my nested comment */" -#"/\*[\d\D]*?\*/" "*/ anything here /*" -#"/\*[\d\D]*?\*/" "anything between 2 seperate comments" -#"/\*[\d\D]*?\*/" "\* *\" +"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" G "<0>foo@foo.com" +"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" G "<0>foo@foo-foo.com.au" +"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" G "<0>foo@foo.foo.info" +"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" "foo@.com" +"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" "foo@foo..com" +"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" "foo@me@.com" +"/\*[\d\D]*?\*/" G "<0>/* my comment */" +"/\*[\d\D]*?\*/" G "<0>/* my multiline comment */" +"/\*[\d\D]*?\*/" G "<0>/* my nested comment */" +"/\*[\d\D]*?\*/" "*/ anything here /*" +"/\*[\d\D]*?\*/" "anything between 2 seperate comments" +"/\*[\d\D]*?\*/" "\* *\" "/\*[\p{N}\P{N}]*?\*/" G "<0>/* my comment */" "/\*[\p{N}\P{N}]*?\*/" G "<0>/* my multiline comment */" "/\*[\p{N}\P{N}]*?\*/" G "<0>/* my nested comment */" @@ -800,9 +1593,9 @@ '^(([^<>;()\[\]\\.,;:@"]+(\.[^<>()\[\]\\.,;:@"]+)*)|(".+"))@((([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))\.)*(([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))$' G "<0>blah@[10.0.0.1]" '^(([^<>;()\[\]\\.,;:@"]+(\.[^<>()\[\]\\.,;:@"]+)*)|(".+"))@((([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))\.)*(([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))$' G "<0>a@b.c" '^(([^<>;()\[\]\\.,;:@"]+(\.[^<>()\[\]\\.,;:@"]+)*)|(".+"))@((([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))\.)*(([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))$' "non@match@." -#"^\d{9}[\d|X]$" G "<0>1234123412" -#"^\d{9}[\d|X]$" G "<0>123412341X" -#"^\d{9}[\d|X]$" "not an isbn" +"^\d{9}[\d|X]$" G "<0>1234123412" +"^\d{9}[\d|X]$" G "<0>123412341X" +"^\d{9}[\d|X]$" "not an isbn" "^\d{9}(\d|X)$" G "<0>1234123412" "^\d{9}(\d|X)$" G "<0>123412341X" "^\d{9}(\d|X)$" "not an isbn" @@ -870,12 +1663,12 @@ "\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}" "12 123 1234" "\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}" "(012) 123/1234" "\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}" "(012) 123 12345" -#"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" G "<0>bob-smith@foo.com" # TODO: \w in pattern -#"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" G "<0>bob.smith@foo.com" # TODO: \w in pattern -#"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" G "<0>bob_smith@foo.com" # TODO: \w in pattern -#"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" "-smith@foo.com" # TODO: \w in pattern -#"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" ".smith@foo.com" # TODO: \w in pattern -#"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" "smith@foo_com" # TODO: \w in pattern +"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" G "<0>bob-smith@foo.com" +"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" G "<0>bob.smith@foo.com" +"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" G "<0>bob_smith@foo.com" +"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" "-smith@foo.com" +"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" ".smith@foo.com" +"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" "smith@foo_com" "^(?=.*\d).{4,8}$" G "<0>1234" "^(?=.*\d).{4,8}$" G "<0>asdf1234" "^(?=.*\d).{4,8}$" G "<0>asp123" @@ -989,7 +1782,7 @@ "^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$" "$12,3456.01" "^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$" "12345" "^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$" "$1.234" -"([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})" G "<0>C:\\temp\\this allows spaces\\web.config" +"([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})" G "<0>C:\\temp\\this allows spaces\\web.config" "([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})" G "<0>\\\\Andromeda\\share\\file name.123" "([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})" "tz:\temp\ fi*le?na:m.doc" "([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})" "\\Andromeda\share\filename.a" @@ -1020,24 +1813,24 @@ "^[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}$" "qqqBFDB4D31-3E35-4DAB-AFCA-5E6E5C8F61EA" "^[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}$" "BFDB4D31-3E-4DAB-AFCA-5E6E5C8F61EA" "^[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}$" "BFDB4D31-3E35-4DAB-AF" -#"^\d{2}(\x2e)(\d{3})(-\d{3})?$" G "<0>12.345-678" # TODO: \x not implemented. -#"^\d{2}(\x2e)(\d{3})(-\d{3})?$" G "<0>23.345-123" -#"^\d{2}(\x2e)(\d{3})(-\d{3})?$" G "<0>99.999" -#"^\d{2}(\x2e)(\d{3})(-\d{3})?$" "41222-222" -#"^\d{2}(\x2e)(\d{3})(-\d{3})?$" "3.444-233" -#"^\d{2}(\x2e)(\d{3})(-\d{3})?$" "43.324444" +"^\d{2}(\x2e)(\d{3})(-\d{3})?$" G "<0>12.345-678" +"^\d{2}(\x2e)(\d{3})(-\d{3})?$" G "<0>23.345-123" +"^\d{2}(\x2e)(\d{3})(-\d{3})?$" G "<0>99.999" +"^\d{2}(\x2e)(\d{3})(-\d{3})?$" "41222-222" +"^\d{2}(\x2e)(\d{3})(-\d{3})?$" "3.444-233" +"^\d{2}(\x2e)(\d{3})(-\d{3})?$" "43.324444" "^\d{2}(\u002e)(\d{3})(-\d{3})?$" G "<0>12.345-678" "^\d{2}(\u002e)(\d{3})(-\d{3})?$" G "<0>23.345-123" "^\d{2}(\u002e)(\d{3})(-\d{3})?$" G "<0>99.999" "^\d{2}(\u002e)(\d{3})(-\d{3})?$" "41222-222" "^\d{2}(\u002e)(\d{3})(-\d{3})?$" "3.444-233" "^\d{2}(\u002e)(\d{3})(-\d{3})?$" "43.324444" -#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" G "<0>c:\file.txt" -#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" G "<0>c:\folder\sub folder\file.txt" -#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" G "<0>\\network\folder\file.txt" # TODO: \w in pattern -#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" "C:" # TODO: \w in pattern -#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" "C:\file.xls" # TODO: \w in pattern -#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" "folder.txt" # TODO: \w in pattern +#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" G "<0>c:\file.txt" # TODO: debug +#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" G "<0>c:\folder\sub folder\file.txt" # TODO: debug +#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" G "<0>\\network\folder\file.txt" # TODO: debug +"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" "C:" +"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" "C:\file.xls" +"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" "folder.txt" "^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>my.domain.com" "^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>regexlib.com" "^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>big-reg.com" @@ -1079,12 +1872,12 @@ "^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$" "1-555-5555" "^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$" "15553333" "^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$" "0-561-555-1212" -#"<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>" G "<0>" # TODO: \w in pattern -#"<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>" G "<0>" # TODO: \w in pattern -#"<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>" "" # TODO: \w in pattern -#"<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>" "The drity brown fox stank like" # TODO: \w in pattern +'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>' G '<0>' +'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>' G '<0>" # TODO: \w in pattern +'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>' '' # TODO: \w in pattern +'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>' "The drity brown fox stank like" "^(1|01|2|02|3|03|4|04|5|05|6|06|7|07|8|08|9|09|10|11|12{1,2}):(([0-5]{1}[0-9]{1}\s{0,1})([AM|PM|am|pm]{2,2}))\W{0}$" G "<0>1:00 AM" "^(1|01|2|02|3|03|4|04|5|05|6|06|7|07|8|08|9|09|10|11|12{1,2}):(([0-5]{1}[0-9]{1}\s{0,1})([AM|PM|am|pm]{2,2}))\W{0}$" G "<0>12:00 PM" "^(1|01|2|02|3|03|4|04|5|05|6|06|7|07|8|08|9|09|10|11|12{1,2}):(([0-5]{1}[0-9]{1}\s{0,1})([AM|PM|am|pm]{2,2}))\W{0}$" G "<0>1:00am" @@ -1309,9 +2102,9 @@ "^(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])$" "10.57.98.23." "]*[^/])>" G '<0>' "]*[^/])>" '' -#"" G "<0>" -#"" G "<0>" -#"" "this is a comment" +"" G "<0>" +"" G "<0>" +"" "this is a comment" "" G "<0>" "" G "<0>" "" "this is a comment" @@ -1323,8 +2116,8 @@ "(\{\\f\d*)\\([^;]+;)" G "<0>{\\f1\\fswiss\\fcharset0\\fprq2{\\*\\panose 020b0604020202020204}Arial;" "(\{\\f\d*)\\([^;]+;)" G "{\\f" "(\{\\f\d*)\\([^;]+;)" "{f0fs20 some text}" -#"" G "<0>space" # TODO: \w in pattern -#"" "this is not a tag" # TODO: \w in pattern +#"" G '<0>space' # TODO: Can't quote this pattern with the test syntax! +#"" "this is not a tag" "^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$" G "<0>12/30/2002" "^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$" G "<0>01/12/1998 13:30" "^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$" G "<0>01/28/2002 22:35:00" @@ -1400,10 +2193,10 @@ "^[0-9A-Za-z_ ]+(.[jJ][pP][gG]|.[gG][iI][fF])$" "bad.bad.gif" "^[0-9A-Za-z_ ]+(.[jJ][pP][gG]|.[gG][iI][fF])$" "slash\gif." "<[^>\s]*\bauthor\b[^>]*>" G '<0>' -#"<[^>\s]*\bauthor\b[^>]*>" G "<0>" -#"<[^>\s]*\bauthor\b[^>]*>" G '<0>' +"<[^>\s]*\bauthor\b[^>]*>" G "<0>" +# "<[^>\s]*\bauthor\b[^>]*>" G '<0>' #Debug should work "<[^> ]*\bauthor\b[^>]*>" G "<0>" -"<[^> ]*\bauthor\b[^>]*>" G '<0>' +"<[^> ]*\bauthor\b[^>]*>" G '<0>' "<[^>\s]*\bauthor\b[^>]*>" "" "<[^>\s]*\bauthor\b[^>]*>" "" "<[^>\s]*\bauthor\b[^>]*>" "author" @@ -1439,15 +2232,15 @@ "(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)" "0" "(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)" "0.0" "(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)" ".0" -#"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" G "<0>Sacramento" #TODO: Octal -#"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" G "<0>San Francisco" -#"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" G "<0>San Luis Obispo" -#"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" "SanFrancisco" -#"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" "SanLuisObispo" -#"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" "San francisco" -#"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$" G "<0>{e02ff0e4-00ad-090A-c030-0d00a0008ba0}" -#"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$" G "<0>e02ff0e4-00ad-090A-c030-0d00a0008ba0" -#"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$" "0xe02ff0e400ad090Ac0300d00a0008ba0" +"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" G "<0>Sacramento" +"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" "<0><2>San Francisco" +"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" "<0><3>San Luis Obispo" +"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" "SanFrancisco" +"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" "SanLuisObispo" +"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" "San francisco" +"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$" G "<0>{e02ff0e4-00ad-090A-c030-0d00a0008ba0}" +"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$" G "<0>e02ff0e4-00ad-090A-c030-0d00a0008ba0" +"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$" "0xe02ff0e400ad090Ac0300d00a0008ba0" "^\{?[a-fA-F0-9]{8}-([a-fA-F0-9]{4}-){3}[a-fA-F0-9]{12}\}?$" G "<0>{e02ff0e4-00ad-090A-c030-0d00a0008ba0}" "^\{?[a-fA-F0-9]{8}-([a-fA-F0-9]{4}-){3}[a-fA-F0-9]{12}\}?$" G "<0>e02ff0e4-00ad-090A-c030-0d00a0008ba0" "^\{?[a-fA-F0-9]{8}-([a-fA-F0-9]{4}-){3}[a-fA-F0-9]{12}\}?$" "0xe02ff0e400ad090Ac0300d00a0008ba0" @@ -1496,15 +2289,15 @@ "^((0[1-9])|(1[0-2]))\/(\d{2})$" G "<0>01/04" "^((0[1-9])|(1[0-2]))\/(\d{2})$" "13/03" "^((0[1-9])|(1[0-2]))\/(\d{2})$" "10/2003" -#"]*>[\w|\t|\r|\W]*" G "<0>" # TODO: \w in pattern -#"]*>[\w|\t|\r|\W]*" "--" # TODO: \w in pattern -#"]*>[\w|\t|\r|\W]*" "A-Z][a-z]+" # TODO: \w in pattern -#"]*>[\w|\t|\r|\W]*" G "<0>strFirstName" # TODO: \w in pattern -#"]*>[\w|\t|\r|\W]*" G "<0>intAgeInYears" # TODO: \w in pattern -#"]*>[\w|\t|\r|\W]*" G "<0>Where the Wild Things Are" # TODO: \w in pattern -#"]*>[\w|\t|\r|\W]*" "123" # TODO: \w in pattern -#"]*>[\w|\t|\r|\W]*" "abc" # TODO: \w in pattern -#"]*>[\w|\t|\r|\W]*" "this has no caps in it" # TODO: \w in pattern +"]*>[\w|\t|\r|\W]*" G '<0>' +"]*>[\w|\t|\r|\W]*" "--" +"]*>[\w|\t|\r|\W]*" "A-Z][a-z]+" +#"]*>[\w|\t|\r|\W]*" G "<0>strFirstName" # Test Case damaged? +#"]*>[\w|\t|\r|\W]*" G "<0>intAgeInYears" # Test Case damaged? +#"]*>[\w|\t|\r|\W]*" G "<0>Where the Wild Things Are" # Test Case damaged? +"]*>[\w|\t|\r|\W]*" "123" +"]*>[\w|\t|\r|\W]*" "abc" +"]*>[\w|\t|\r|\W]*" "this has no caps in it" "(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)" G "<0>-0.050" "(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)" G "<0>-5.000" "(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)" G "<0>-5" @@ -1539,12 +2332,12 @@ "^.{4,8}$" "asd" "^.{4,8}$" "123" "^.{4,8}$" "asdfe12345" -#"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" G "<0>a@a.com" # TODO: \w in pattern -#"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" G "<0>a@a.com.au" -#"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" G "<0>a@a.au" # TODO: \w in pattern -#"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" "word" # TODO: \w in pattern -#"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" "word@" # TODO: \w in pattern -#"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" "@word" # TODO: \w in pattern +"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" G "<0>a@a.com" +"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" G "<0>a@a.com.au" +"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" G "<0>a@a.au" +"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" "word" +"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" "word@" +"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" "@word" "^\d{5}-\d{4}$" G "<0>22222-3333" "^\d{5}-\d{4}$" G "<0>34545-2367" "^\d{5}-\d{4}$" G "<0>56334-2343" @@ -1609,22 +2402,22 @@ "^[12345]$" "6" "^[12345]$" "-1" "^[12345]$" "abc" -#"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" G "<0>joe@aol.com" # TODO: \w in pattern -#"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" G "<0>joe@wrox.co.uk" # TODO: \w in pattern -#"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" G "<0>joe@domain.info" # TODO: \w in pattern -#"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" "a@b" # TODO: \w in pattern -#"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" "notanemail" # TODO: \w in pattern -#"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" "joe@@." # TODO: \w in pattern +"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" G "<0>joe@aol.com" +"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" G "<0>joe@wrox.co.uk" +"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" G "<0>joe@domain.info" +"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" "a@b" +"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" "notanemail" +"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" "joe@@." "^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$" G "<0>joe@aol.com" "^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$" G "<0>ssmith@aspalliance.com" "^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$" G "<0>a@b.cc" "^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$" "joe@123aspx.com" "^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$" "joe@web.info" "^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$" "joe@company.co.uk" -#"[\w-]+@([\w-]+\.)+[\w-]+" G "<0>joe@aol.com" # TODO: \w in pattern -#"[\w-]+@([\w-]+\.)+[\w-]+" G "<0>a@b.c" # TODO: \w in pattern -#"[\w-]+@([\w-]+\.)+[\w-]+" "asdf" # TODO: \w in pattern -#"[\w-]+@([\w-]+\.)+[\w-]+" "1234" # TODO: \w in pattern +"[\w-]+@([\w-]+\.)+[\w-]+" G "<0>joe@aol.com" +"[\w-]+@([\w-]+\.)+[\w-]+" G "<0>a@b.c" +"[\w-]+@([\w-]+\.)+[\w-]+" "asdf" +"[\w-]+@([\w-]+\.)+[\w-]+" "1234" "\d{4}-?\d{4}-?\d{4}-?\d{4}" G "<0>1234-1234-1234-1234" "\d{4}-?\d{4}-?\d{4}-?\d{4}" G "<0>1234123412341234" "\d{4}-?\d{4}-?\d{4}-?\d{4}" "1234123412345"