]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/test/cintltst/custrtst.c
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / custrtst.c
index 13425dbfcd4a21667da3fdac538f3b02b54969ae..1c116ea6d87d657f0db9b31e61ee82e63c15e42d 100644 (file)
@@ -1,3 +1,5 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
 /*
 ******************************************************************************
 *
@@ -6,7 +8,7 @@
 *
 ******************************************************************************
 *   file name:  custrtst.c
-*   encoding:   US-ASCII
+*   encoding:   UTF-8
 *   tab size:   8 (not used)
 *   indentation:4
 *
@@ -36,6 +38,7 @@ static void TestSurrogateSearching(void);
 static void TestUnescape(void);
 static void TestCountChar32(void);
 static void TestUCharIterator(void);
+static void TestIsWellFormed(void);
 
 void addUStringTest(TestNode** root);
 
@@ -48,6 +51,7 @@ void addUStringTest(TestNode** root)
     addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape");
     addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32");
     addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator");
+    addTest(root, &TestIsWellFormed, "tsutil/custrtst/TestIsWellFormed");
 }
 
 /* test data for TestStringFunctions ---------------------------------------- */
@@ -1509,3 +1513,127 @@ TestUCharIterator() {
 
     /* ### TODO test other iterators: CharacterIterator, Replaceable */
 }
+
+static const UChar valid0[] = { 0 }; // test empty string
+static const UChar valid1[] = { 0x0061,0x270C,0xFE0E, // victory hand with text variation selector
+                                0x0062,0x270C,0xFE0F, // victory hand with emoji variation selector
+                                0x0063,0x270C,0xD83C,0xDFFD, // victory hand with skin tone modifier
+                                0x0064,0x270C,0xFE0F,0xD83C,0xDFFD, // victory hand with emoji variation selector and skin tone modifier (obsolete sequence)
+                                0x0065,0xD83D,0xDC69,0xD83C,0xDFFD,0x200D,0xD83C,0xDFEB, // woman teacher (ZWJ sequence) with skin tone
+                                0x0066,0xD83D,0xDC69,0x200D,0xD83D,0xDC69,0x200D,0xD83D,0xDC67, // family (woman, woman, girl - ZWJ sequence)
+                                0x0067,0x0030,0xFE0F,0x20E3, // keypad 0 (emoji combining seq)
+                                0x0068,0xD83C,0xDDEC,0xD83C,0xDDE7, // flag of UK (regional indicator pair)
+                                0x0069,0xD83C,0xDFF4,0xDB40,0xDC67,0xDB40,0xDC62,0xDB40,0xDC65,0xDB40,0xDC6E,0xDB40,0xDC67,0xDB40,0xDC7F, // flag of England (tag seq)
+                                0x006A,0 };
+static const UChar valid2[] = { 0x006B,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,
+                                0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300, // 29 combining marks
+                                0x006C,0 };
+static const UChar valid3[] = { // sample from Bill Siegrist, 100 UTF16 units
+                                0xD83D,0xDC2E,                                    // U+1F42E   🐮
+                                0xD83D,0xDC3C,                                    // U+1F43C   🐼
+                                0xD83D,0xDC39,                                    // U+1F439   🐹
+                                0xD83D,0xDC31,                                    // U+1F431   🐱
+                                0xD83D,0xDE4B,0x200D,0x2640,0xFE0F,               // U+1F64B  U+200D U+2640  U+FE0F    🙋‍♀️
+                                0xD83D,0xDE47,0xD83C,0xDFFC,0x200D,0x2642,0xFE0F, // U+1F647 U+1F3FC U+200D  U+2642 U+FE0F     🙇🏼‍♂️       
+                                0xD83D,0xDE46,0x200D,0x2642,0xFE0F,               // U+1F646  U+200D U+2642  U+FE0F    🙆‍♂️
+                                0xD83E,0xDDDA,0xD83C,0xDFFF,0x200D,0x2640,0xFE0F, // U+1F9DA U+1F3FF U+200D  U+2640 U+FE0F 🧚🏿‍♀️
+                                0xD83E,0xDDD6,0xD83C,0xDFFE,0x200D,0x2642,0xFE0F, // U+1F9D6 U+1F3FE U+200D  U+2642 U+FE0F 🧖🏾‍♂️
+                                0xD83E,0xDDD6,0xD83C,0xDFFE,0x200D,0x2642,0xFE0F, // U+1F9D6 U+1F3FE U+200D  U+2642 U+FE0F     🧖🏾‍♂️
+                                0xD83E,0xDDDB,0xD83C,0xDFFC,0x200D,0x2642,0xFE0F, // U+1F9DB U+1F3FC U+200D  U+2642 U+FE0F     🧛🏼‍♂️
+                                0xD83E,0xDDD9,0x200D,0x2640,0xFE0F,               // U+1F9D9  U+200D U+2640  U+FE0F    🧙‍♀️
+                                0xD83D,0xDC68,0xD83C,0xDFFE,0x200D,0x2696,0xFE0F, // U+1F468 U+1F3FE U+200D  U+2696 U+FE0F     👨🏾‍⚖️
+                                0xD83D,0xDC69,0xD83C,0xDFFC,0x200D,0xD83D,0xDD27, // U+1F469 U+1F3FC U+200D U+1F527    👩🏼‍🔧
+                                0xD83D,0xDC69,0xD83C,0xDFFF,0x200D,0xD83C,0xDFEB, // U+1F469 U+1F3FF U+200D U+1F3EB    👩🏿‍🏫
+                                0xD83D,0xDC68,0xD83C,0xDFFE,0x200D,0xD83D,0xDCBB, // U+1F468 U+1F3FE U+200D U+1F4BB    👨🏾‍💻
+                                0xD83D,0xDC69,0xD83C,0xDFFD,0x200D,0xD83D,0xDD2C, // U+1F469 U+1F3FD U+200D U+1F52C    👩🏽‍🔬
+                                0xD83D,0xDC68,0xD83C,0xDFFC,0x200D,0xD83D,0xDE92, // U+1F468 U+1F3FC U+200D U+1F692    👨🏼‍🚒
+                                0 };
+static const UChar valid4[] = { 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 16
+                                0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 32
+                                0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 48
+                                0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 64
+                                0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 80
+                                0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 96
+                                0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 112
+                                0x0061,0x2066,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x2069,                             // to level 122 in LRI then pop to 112
+                                0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,                                    // to level 122 again
+                                0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C, // pop to level 90
+                                0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C, // pop to level 58
+                                0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 74
+                                0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 90
+                                0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 106
+                                0x000A,                                                                                                          // pop to 0
+                                0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 16
+                                0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 32
+                                0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 48
+                                0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 64
+                                0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 80
+                                0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 96
+                                0x000A,0 };
+
+static const UChar malformed1[] = { 0x0061,0xFFFF,0 }; // non-character (BMP)
+static const UChar malformed2[] = { 0x0062,0xD87F,0xDFFE,0 }; // non-character (supplemental)
+static const UChar malformed3[] = { 0x0063,0xD7FC,0 }; // unassigned
+static const UChar malformed4[] = { 0x0064,0xD800,0 }; // unpaired high surrogate
+static const UChar malformed5[] = { 0x0065,0xDC00,0 }; // unpaired low surrogate
+static const UChar malformed6[] = { 0x0066,0xFE0F,0 }; // emoji variation selector on non-emoji
+static const UChar malformed7[] = { 0x0067,0xDB40,0xDC67,0xDB40,0xDC7F,0 }; // tag sequence on non-emoji
+static const UChar malformed8[] = { 0xDB40,0xDC67,0xDB40,0xDC7F,0 }; // tag sequence with no base
+static const UChar malformed9[] = { 0x0068,0xD83C,0xDFF4,0xDB40,0xDC67,0xDB40,0xDC62,0xDB40,0xDC65,0xDB40,0xDC6E,0xDB40,0xDC67,0x0069,0 }; // tag sequence with no term
+static const UChar malformedA[] = { 0x006A,0xD83C,0xDFF4,0xDB40,0xDC7F,0 }; // tag sequence with no tag_spec, just term
+static const UChar malformedB[] = { 0x006B,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,
+                                    0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300, // 31 combining marks
+                                    0x006C,0 };
+static const UChar malformedC[] = { 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 16
+                                    0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 32
+                                    0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 48
+                                    0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 64
+                                    0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 80
+                                    0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 96
+                                    0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 112
+                                    0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 128 (error)
+                                    0x0061,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0 };    // start PDFs, too late
+
+typedef struct {
+    const char* descrip;
+    const UChar* string;
+    UBool result;
+} StringAndResult;
+
+static const StringAndResult wellFormedTests[] = {
+    { "valid0",     valid0,     TRUE },
+    { "valid1",     valid1,     TRUE },
+    { "valid2",     valid2,     TRUE },
+    { "valid3",     valid3,     TRUE },
+    { "valid4",     valid4,     TRUE },
+    { "malformed1", malformed1, FALSE },
+    { "malformed2", malformed2, FALSE },
+    { "malformed3", malformed3, FALSE },
+    { "malformed4", malformed4, FALSE },
+    { "malformed5", malformed5, FALSE },
+    { "malformed6", malformed6, FALSE },
+    { "malformed7", malformed7, FALSE },
+    { "malformed8", malformed8, FALSE },
+    { "malformed9", malformed9, FALSE },
+    { "malformedA", malformedA, FALSE },
+    { "malformedB", malformedB, FALSE },
+    { "malformedC", malformedC, FALSE },
+    { NULL, NULL, 0 }
+};
+
+static void
+TestIsWellFormed() {
+    const StringAndResult* testPtr;
+    for (testPtr = wellFormedTests; testPtr->descrip != NULL; testPtr++) {
+        UBool result = u_strIsWellFormed(testPtr->string, -1);
+        if (result != testPtr->result) {
+            log_err("test %s with length -1, expected %d, got %d\n", testPtr->descrip, testPtr->result, result);
+        }
+
+        int32_t length = u_strlen(testPtr->string);
+        result = u_strIsWellFormed(testPtr->string, length);
+        if (result != testPtr->result) {
+            log_err("test %s with length %d, expected %d, got %d\n", testPtr->descrip, length, testPtr->result, result);
+        }
+    }
+}