]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/test/intltest/rbbitst.cpp
ICU-57149.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / rbbitst.cpp
index 9f5851478a024d5ddd4b97d9056a5cf1bddc8ad1..d33837a84c94c4268dbbb521d3a9dedeb63542a2 100644 (file)
@@ -2065,14 +2065,18 @@ RBBICharMonkey::RBBICharMonkey() {
 
 
     fEmojiBaseSet = new UnicodeSet(UnicodeString(
-                "[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C3-\\U0001F3C4\\U0001F3CA-\\U0001F3CB\\U0001F442-\\U0001F443"
-                "\\U0001F446-\\U0001F450\\U0001F466-\\U0001F469\\U0001F46E\\U0001F470-\\U0001F478\\U0001F47C\\U0001F481-\\U0001F483"
-                "\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F575\\U0001F590\\U0001F595-\\U0001F596\\U0001F645-\\U0001F647"
-                "\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F918]"), status);
+                "[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C2-\\U0001F3C4\\U0001F3C7\\U0001F3CA-\\U0001F3CC"
+                "\\U0001F442-\\U0001F443\\U0001F446-\\U0001F450\\U0001F466-\\U0001F478\\U0001F47C"
+                "\\U0001F481-\\U0001F483\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F574-\\U0001F575\\U0001F57A\\U0001F590\\U0001F595-\\U0001F596"
+                "\\U0001F645-\\U0001F647\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F6CC"
+                "\\U0001F918-\\U0001F91E\\U0001F926\\U0001F930\\U0001F933-\\U0001F939\\U0001F93C-\\U0001F93E]"), status);
 
     fEmojiModifierSet = new UnicodeSet(0x0001F3FB, 0x0001F3FF);
     fZWJSet           = new UnicodeSet(0x200D, 0x200D);
-    fGAZSet           = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u2640\\u2642\\u2764\\U0001F308\\U0001F466-\\U0001F469\\U0001F48B\\U0001F5E8]"), status);
+    fGAZSet           = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u2640\\u2642\\u2695-\\u2696\\u2708\\u2764"
+                                        "\\U0001F308\\U0001F33E\\U0001F373\\U0001F393\\U0001F3A4\\U0001F3A8\\U0001F3EB\\U0001F3ED"
+                                        "\\U0001F466-\\U0001F469\\U0001F48B\\U0001F4BB-\\U0001F4BC\\U0001F527\\U0001F52C\\U0001F5E8"
+                                        "\\U0001F680\\U0001F692]"), status);
 
     fSets       = new UVector(status);
     fSets->addElement(fCRLFSet,    status);
@@ -2354,14 +2358,18 @@ RBBIWordMonkey::RBBIWordMonkey()
     fExtendSet        = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Word_Break = Extend}]"),       status);
 
     fEBaseSet         = new UnicodeSet(UnicodeString(
-                "[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C3-\\U0001F3C4\\U0001F3CA-\\U0001F3CB\\U0001F442-\\U0001F443"
-                "\\U0001F446-\\U0001F450\\U0001F466-\\U0001F469\\U0001F46E\\U0001F470-\\U0001F478\\U0001F47C\\U0001F481-\\U0001F483"
-                "\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F575\\U0001F590\\U0001F595-\\U0001F596\\U0001F645-\\U0001F647"
-                "\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F918]"), status);
+                "[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C2-\\U0001F3C4\\U0001F3C7\\U0001F3CA-\\U0001F3CC"
+                "\\U0001F442-\\U0001F443\\U0001F446-\\U0001F450\\U0001F466-\\U0001F478\\U0001F47C"
+                "\\U0001F481-\\U0001F483\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F574-\\U0001F575\\U0001F57A\\U0001F590\\U0001F595-\\U0001F596"
+                "\\U0001F645-\\U0001F647\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F6CC"
+                "\\U0001F918-\\U0001F91E\\U0001F926\\U0001F930\\U0001F933-\\U0001F939\\U0001F93C-\\U0001F93E]"), status);
 
     fEModifierSet    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\U0001F3FB-\\U0001F3FF]"), status);
     fZWSSet          = new UnicodeSet((UChar32)0x200D, (UChar32)0x200D);;
-    fGAZSet          = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u2640\\u2642\\u2764\\U0001F308\\U0001F466-\\U0001F469\\U0001F48B\\U0001F5E8]"), status);
+    fGAZSet          = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\u2640\\u2642\\u2695-\\u2696\\u2708\\u2764"
+                                        "\\U0001F308\\U0001F33E\\U0001F373\\U0001F393\\U0001F3A4\\U0001F3A8\\U0001F3EB\\U0001F3ED"
+                                        "\\U0001F466-\\U0001F469\\U0001F48B\\U0001F4BB-\\U0001F4BC\\U0001F527\\U0001F52C\\U0001F5E8"
+                                        "\\U0001F680\\U0001F692]"), status);
     fExtendSet->removeAll(*fZWSSet);
 
 
@@ -3096,10 +3104,11 @@ RBBILineMonkey::RBBILineMonkey() :
     fSG    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\ud800-\\udfff]"), status);
     fXX    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Line_break=XX}]"), status);
     fEB    = new UnicodeSet(UnicodeString(
-                "[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C3-\\U0001F3C4\\U0001F3CA-\\U0001F3CB\\U0001F442-\\U0001F443"
-                "\\U0001F446-\\U0001F450\\U0001F466-\\U0001F469\\U0001F46E\\U0001F470-\\U0001F478\\U0001F47C\\U0001F481-\\U0001F483"
-                "\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F575\\U0001F590\\U0001F595-\\U0001F596\\U0001F645-\\U0001F647"
-                "\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F918]"), status);
+                "[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C2-\\U0001F3C4\\U0001F3C7\\U0001F3CA-\\U0001F3CC"
+                "\\U0001F442-\\U0001F443\\U0001F446-\\U0001F450\\U0001F466-\\U0001F478\\U0001F47C"
+                "\\U0001F481-\\U0001F483\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F574-\\U0001F575\\U0001F57A\\U0001F590\\U0001F595-\\U0001F596"
+                "\\U0001F645-\\U0001F647\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F6CC"
+                "\\U0001F918-\\U0001F91E\\U0001F926\\U0001F930\\U0001F933-\\U0001F939\\U0001F93C-\\U0001F93E]"), status);
     fEM    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\U0001F3FB-\\U0001F3FF]"), status);
     fZJ    = new UnicodeSet((UChar32)0x200D, (UChar32)0x200D);
 
@@ -3119,9 +3128,13 @@ RBBILineMonkey::RBBILineMonkey() :
     fAL->removeAll(*fEM);
 
 
+    fAL->remove((UChar32)0x2695);   // move u2695 from Al to Id
+    fAL->remove((UChar32)0x2696);   // move u2696 from Al to Id
     fAL->remove((UChar32)0x2764);   // Emoji Proposal: move u2764 from Al to Id
     fAI->remove((UChar32)0x2640);   // new ZWJ seqs
     fAI->remove((UChar32)0x2642);   // new ZWJ seqs
+    fID->add((UChar32)0x2695);
+    fID->add((UChar32)0x2696);
     fID->add((UChar32)0x2764);
     fID->add((UChar32)0x2640);
     fID->add((UChar32)0x2642);