X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b331163bffd790ced0e88b73f44f86d49ccc48a5..f3c0d7a59d99c2a94c6b8822291f0e42be3773c9:/icuSources/test/testdata/regextst.txt diff --git a/icuSources/test/testdata/regextst.txt b/icuSources/test/testdata/regextst.txt index 15d13bf1..6873f483 100644 --- a/icuSources/test/testdata/regextst.txt +++ b/icuSources/test/testdata/regextst.txt @@ -1,4 +1,6 @@ -# Copyright (c) 2001-2015 International Business Machines +# Copyright (C) 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html +# Copyright (c) 2001-2015 International Business Machines # Corporation and others. All Rights Reserved. # # file: @@ -1313,6 +1315,45 @@ "(?<=((0123456789){100000}){3000})abc" E "abc" +# Bug 11507 Capture of an unpaired surrogate shouldn't allow a back reference to +# match half of a surrogate pair, but only another unpaired surrogate. +# +"pre(.)post\1" "pre\ud800post\ud800\udc00" +"pre(.)post\1" "<0>pre<1>\ud800post\ud800 fin" +"pre(.)post\1" i "pre\ud800post\ud800\udc00" # case insensiteve backrefs take a different code path +"pre(.)post\1" i "<0>pre<1>\ud800post\ud800 fin" + +# Bug 11554 +# +# Maximum match length computation was assuming UTF-16. +# Used in look-behind matches to constrain how far back to look. + +"(?<=a\x{100000})spam" "***a\x{100000}<0>spam**" +"(?<=aą)spam" "**aą<0>spam**" +"(?<=ąabc)spam" "**ąabc<0>spam**" + +"(?<=a\x{100000})spam" "***a\x{100001}spam**" +"(?<=aą)spam" "**bąspam**" +"(?<=ąabc)spam" "**ąabxspam**" + +# with negative look-behind + +"(?spam**" +"(?spam**" +"(?spam**" + +# Bug #12930 +# +# Minimum Match Length computation, int32_t overflow on an empty set in the pattern. +# The empty set, with no match possible, has a min match length of INT32_MAX. +# Was incremented subsequently. Caused assertion failure on pattern compile. + +"[^\u0000-\U0010ffff]bc?" "bc no match" +"[^\u0000-\U0010ffff]?bc?" "<0>bc has a match" # Random debugging, Temporary