X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/ef6cf650f4a75c3f97de06b51fa104f2069b9ea2..a62d09fcbc8ca9da27887e04112ec143e19b1caf:/icuSources/test/intltest/rbbitst.cpp diff --git a/icuSources/test/intltest/rbbitst.cpp b/icuSources/test/intltest/rbbitst.cpp index 9f585147..d33837a8 100644 --- a/icuSources/test/intltest/rbbitst.cpp +++ b/icuSources/test/intltest/rbbitst.cpp @@ -2065,14 +2065,18 @@ RBBICharMonkey::RBBICharMonkey() { fEmojiBaseSet = new UnicodeSet(UnicodeString( - "[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C3-\\U0001F3C4\\U0001F3CA-\\U0001F3CB\\U0001F442-\\U0001F443" - "\\U0001F446-\\U0001F450\\U0001F466-\\U0001F469\\U0001F46E\\U0001F470-\\U0001F478\\U0001F47C\\U0001F481-\\U0001F483" - "\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F575\\U0001F590\\U0001F595-\\U0001F596\\U0001F645-\\U0001F647" - "\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F918]"), status); + "[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C2-\\U0001F3C4\\U0001F3C7\\U0001F3CA-\\U0001F3CC" + "\\U0001F442-\\U0001F443\\U0001F446-\\U0001F450\\U0001F466-\\U0001F478\\U0001F47C" + "\\U0001F481-\\U0001F483\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F574-\\U0001F575\\U0001F57A\\U0001F590\\U0001F595-\\U0001F596" + "\\U0001F645-\\U0001F647\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F6CC" + "\\U0001F918-\\U0001F91E\\U0001F926\\U0001F930\\U0001F933-\\U0001F939\\U0001F93C-\\U0001F93E]"), status); fEmojiModifierSet = new UnicodeSet(0x0001F3FB, 0x0001F3FF); fZWJSet = new UnicodeSet(0x200D, 0x200D); - fGAZSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u2640\\u2642\\u2764\\U0001F308\\U0001F466-\\U0001F469\\U0001F48B\\U0001F5E8]"), status); + fGAZSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u2640\\u2642\\u2695-\\u2696\\u2708\\u2764" + "\\U0001F308\\U0001F33E\\U0001F373\\U0001F393\\U0001F3A4\\U0001F3A8\\U0001F3EB\\U0001F3ED" + "\\U0001F466-\\U0001F469\\U0001F48B\\U0001F4BB-\\U0001F4BC\\U0001F527\\U0001F52C\\U0001F5E8" + "\\U0001F680\\U0001F692]"), status); fSets = new UVector(status); fSets->addElement(fCRLFSet, status); @@ -2354,14 +2358,18 @@ RBBIWordMonkey::RBBIWordMonkey() fExtendSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Extend}]"), status); fEBaseSet = new UnicodeSet(UnicodeString( - "[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C3-\\U0001F3C4\\U0001F3CA-\\U0001F3CB\\U0001F442-\\U0001F443" - "\\U0001F446-\\U0001F450\\U0001F466-\\U0001F469\\U0001F46E\\U0001F470-\\U0001F478\\U0001F47C\\U0001F481-\\U0001F483" - "\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F575\\U0001F590\\U0001F595-\\U0001F596\\U0001F645-\\U0001F647" - "\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F918]"), status); + "[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C2-\\U0001F3C4\\U0001F3C7\\U0001F3CA-\\U0001F3CC" + "\\U0001F442-\\U0001F443\\U0001F446-\\U0001F450\\U0001F466-\\U0001F478\\U0001F47C" + "\\U0001F481-\\U0001F483\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F574-\\U0001F575\\U0001F57A\\U0001F590\\U0001F595-\\U0001F596" + "\\U0001F645-\\U0001F647\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F6CC" + "\\U0001F918-\\U0001F91E\\U0001F926\\U0001F930\\U0001F933-\\U0001F939\\U0001F93C-\\U0001F93E]"), status); fEModifierSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\U0001F3FB-\\U0001F3FF]"), status); fZWSSet = new UnicodeSet((UChar32)0x200D, (UChar32)0x200D);; - fGAZSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u2640\\u2642\\u2764\\U0001F308\\U0001F466-\\U0001F469\\U0001F48B\\U0001F5E8]"), status); + fGAZSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u2640\\u2642\\u2695-\\u2696\\u2708\\u2764" + "\\U0001F308\\U0001F33E\\U0001F373\\U0001F393\\U0001F3A4\\U0001F3A8\\U0001F3EB\\U0001F3ED" + "\\U0001F466-\\U0001F469\\U0001F48B\\U0001F4BB-\\U0001F4BC\\U0001F527\\U0001F52C\\U0001F5E8" + "\\U0001F680\\U0001F692]"), status); fExtendSet->removeAll(*fZWSSet); @@ -3096,10 +3104,11 @@ RBBILineMonkey::RBBILineMonkey() : fSG = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\ud800-\\udfff]"), status); fXX = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=XX}]"), status); fEB = new UnicodeSet(UnicodeString( - "[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C3-\\U0001F3C4\\U0001F3CA-\\U0001F3CB\\U0001F442-\\U0001F443" - "\\U0001F446-\\U0001F450\\U0001F466-\\U0001F469\\U0001F46E\\U0001F470-\\U0001F478\\U0001F47C\\U0001F481-\\U0001F483" - "\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F575\\U0001F590\\U0001F595-\\U0001F596\\U0001F645-\\U0001F647" - "\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F918]"), status); + "[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C2-\\U0001F3C4\\U0001F3C7\\U0001F3CA-\\U0001F3CC" + "\\U0001F442-\\U0001F443\\U0001F446-\\U0001F450\\U0001F466-\\U0001F478\\U0001F47C" + "\\U0001F481-\\U0001F483\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F574-\\U0001F575\\U0001F57A\\U0001F590\\U0001F595-\\U0001F596" + "\\U0001F645-\\U0001F647\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F6CC" + "\\U0001F918-\\U0001F91E\\U0001F926\\U0001F930\\U0001F933-\\U0001F939\\U0001F93C-\\U0001F93E]"), status); fEM = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\U0001F3FB-\\U0001F3FF]"), status); fZJ = new UnicodeSet((UChar32)0x200D, (UChar32)0x200D); @@ -3119,9 +3128,13 @@ RBBILineMonkey::RBBILineMonkey() : fAL->removeAll(*fEM); + fAL->remove((UChar32)0x2695); // move u2695 from Al to Id + fAL->remove((UChar32)0x2696); // move u2696 from Al to Id fAL->remove((UChar32)0x2764); // Emoji Proposal: move u2764 from Al to Id fAI->remove((UChar32)0x2640); // new ZWJ seqs fAI->remove((UChar32)0x2642); // new ZWJ seqs + fID->add((UChar32)0x2695); + fID->add((UChar32)0x2696); fID->add((UChar32)0x2764); fID->add((UChar32)0x2640); fID->add((UChar32)0x2642);