]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/test/cintltst/cucdapi.c
ICU-64252.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cucdapi.c
index 24a719d8153e76fb4df65f61b3f60f0e75168847..9b359c7a9222fea730eca0e990dffca3be55edbd 100644 (file)
@@ -1,5 +1,7 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
 /********************************************************************
 /********************************************************************
- * Copyright (c) 1997-2008, International Business Machines
+ * Copyright (c) 1997-2016, International Business Machines
  * Corporation and others. All Rights Reserved.
  ********************************************************************/
 
  * Corporation and others. All Rights Reserved.
  ********************************************************************/
 
@@ -9,8 +11,39 @@
 #include "unicode/uchar.h"
 #include "cintltst.h"
 #include "cucdapi.h"
 #include "unicode/uchar.h"
 #include "cintltst.h"
 #include "cucdapi.h"
+#include "cmemory.h"
 
 
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof(array[0]))
+static void scriptsToString(const UScriptCode scripts[], int32_t length, char s[]) {
+    int32_t i;
+    if(length == 0) {
+        strcpy(s, "(no scripts)");
+        return;
+    }
+    s[0] = 0;
+    for(i = 0; i < length; ++i) {
+        if(i > 0) {
+            strcat(s, " ");
+        }
+        strcat(s, uscript_getShortName(scripts[i]));
+    }
+}
+
+static void assertEqualScripts(const char *msg,
+                               const UScriptCode scripts1[], int32_t length1,
+                               const UScriptCode scripts2[], int32_t length2,
+                               UErrorCode errorCode) {
+    char s1[80];
+    char s2[80];
+    if(U_FAILURE(errorCode)) {
+        log_err("Failed: %s - %s\n", msg, u_errorName(errorCode));
+        return;
+    }
+    scriptsToString(scripts1, length1, s1);
+    scriptsToString(scripts2, length2, s2);
+    if(0!=strcmp(s1, s2)) {
+        log_data_err("Failed: %s: expected %s but got %s\n", msg, s1, s2);
+    }
+}
 
 void TestUScriptCodeAPI(){
     int i =0;
 
 void TestUScriptCodeAPI(){
     int i =0;
@@ -39,7 +72,7 @@ void TestUScriptCodeAPI(){
         "asfdasd", "5464", "12235",
         /* test the last index */
         "zyyy", "YI",
         "asfdasd", "5464", "12235",
         /* test the last index */
         "zyyy", "YI",
-        '\0'  
+        NULL  
         };
         UScriptCode expected[] ={
             /* locales should return */
         };
         UScriptCode expected[] ={
             /* locales should return */
@@ -68,11 +101,11 @@ void TestUScriptCodeAPI(){
 
         const int32_t capacity = 10;
 
 
         const int32_t capacity = 10;
 
-        for( ; testNames[i]!='\0'; i++){
+        for( ; testNames[i]!=NULL; i++){
             UScriptCode script[10]={USCRIPT_INVALID_CODE};
             uscript_getCode(testNames[i],script,capacity, &err);
             if( script[0] != expected[i]){
             UScriptCode script[10]={USCRIPT_INVALID_CODE};
             uscript_getCode(testNames[i],script,capacity, &err);
             if( script[0] != expected[i]){
-                   log_err("Error getting script code Got: %i  Expected: %i for name %s\n",
+                   log_data_err("Error getting script code Got: %i  Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
                        script[0],expected[i],testNames[i]);
                    numErrors++;
             }
                        script[0],expected[i],testNames[i]);
                    numErrors++;
             }
@@ -94,11 +127,11 @@ void TestUScriptCodeAPI(){
             err = U_ZERO_ERROR;
             capacity = 10;
             num = uscript_getCode("ja",script,capacity, &err);
             err = U_ZERO_ERROR;
             capacity = 10;
             num = uscript_getCode("ja",script,capacity, &err);
-            if(num!=(sizeof(jaCode)/sizeof(UScriptCode))){
+            if(num!=UPRV_LENGTHOF(jaCode)){
                 log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
                 log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
-                        num, (sizeof(jaCode)/sizeof(UScriptCode)));
+                        num, UPRV_LENGTHOF(jaCode));
             }
             }
-            for(j=0;j<sizeof(jaCode)/sizeof(UScriptCode);j++) {
+            for(j=0;j<UPRV_LENGTHOF(jaCode);j++) {
                 if(script[j]!=jaCode[j]) {
                     log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
                             script[j], uscript_getName(script[j]),
                 if(script[j]!=jaCode[j]) {
                     log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
                             script[j], uscript_getName(script[j]),
@@ -113,6 +146,50 @@ void TestUScriptCodeAPI(){
         }
 
     }
         }
 
     }
+    {
+        static const UScriptCode LATIN[1] = { USCRIPT_LATIN };
+        static const UScriptCode CYRILLIC[1] = { USCRIPT_CYRILLIC };
+        static const UScriptCode DEVANAGARI[1] = { USCRIPT_DEVANAGARI };
+        static const UScriptCode HAN[1] = { USCRIPT_HAN };
+        static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
+        static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
+        static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
+        UScriptCode scripts[5];
+        UErrorCode err;
+        int32_t num;
+
+        // Should work regardless of whether we have locale data for the language.
+        err = U_ZERO_ERROR;
+        num = uscript_getCode("tg", scripts, UPRV_LENGTHOF(scripts), &err);
+        assertEqualScripts("tg script: Cyrl", CYRILLIC, 1, scripts, num, err);  // Tajik
+        err = U_ZERO_ERROR;
+        num = uscript_getCode("xsr", scripts, UPRV_LENGTHOF(scripts), &err);
+        assertEqualScripts("xsr script: Deva", DEVANAGARI, 1, scripts, num, err);  // Sherpa
+
+        // Multi-script languages.
+        err = U_ZERO_ERROR;
+        num = uscript_getCode("ja", scripts, UPRV_LENGTHOF(scripts), &err);
+        assertEqualScripts("ja scripts: Kana Hira Hani",
+                           JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, num, err);
+        err = U_ZERO_ERROR;
+        num = uscript_getCode("ko", scripts, UPRV_LENGTHOF(scripts), &err);
+        assertEqualScripts("ko scripts: Hang Hani",
+                           KOREAN, UPRV_LENGTHOF(KOREAN), scripts, num, err);
+        err = U_ZERO_ERROR;
+        num = uscript_getCode("zh", scripts, UPRV_LENGTHOF(scripts), &err);
+        assertEqualScripts("zh script: Hani", HAN, 1, scripts, num, err);
+        err = U_ZERO_ERROR;
+        num = uscript_getCode("zh-Hant", scripts, UPRV_LENGTHOF(scripts), &err);
+        assertEqualScripts("zh-Hant scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
+        err = U_ZERO_ERROR;
+        num = uscript_getCode("zh-TW", scripts, UPRV_LENGTHOF(scripts), &err);
+        assertEqualScripts("zh-TW scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
+
+        // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
+        err = U_ZERO_ERROR;
+        num = uscript_getCode("ro-RO", scripts, UPRV_LENGTHOF(scripts), &err);
+        assertEqualScripts("ro-RO script: Latn", LATIN, 1, scripts, num, err);
+    }
 
     {
         UScriptCode testAbbr[]={
 
     {
         UScriptCode testAbbr[]={
@@ -126,10 +203,10 @@ void TestUScriptCodeAPI(){
             /* test names */
             "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian", 
             "Gothic",  "Greek",  "Gujarati", 
             /* test names */
             "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian", 
             "Gothic",  "Greek",  "Gujarati", 
-             '\0'
+             NULL
         };
         i=0;
         };
         i=0;
-        while(i<sizeof(testAbbr)/sizeof(UScriptCode)){
+        while(i<UPRV_LENGTHOF(testAbbr)){
             const char* name = uscript_getName(testAbbr[i]);
              if(name == NULL) {
                log_data_err("Couldn't get script name\n");
             const char* name = uscript_getName(testAbbr[i]);
              if(name == NULL) {
                log_data_err("Couldn't get script name\n");
@@ -165,10 +242,10 @@ void TestUScriptCodeAPI(){
             "Knda","Kana","Khmr","Laoo",
             "Latn",
             "Mlym", "Mong",
             "Knda","Kana","Khmr","Laoo",
             "Latn",
             "Mlym", "Mong",
-             '\0'
+             NULL
         };
         i=0;
         };
         i=0;
-        while(i<sizeof(testAbbr)/sizeof(UScriptCode)){
+        while(i<UPRV_LENGTHOF(testAbbr)){
             const char* name = uscript_getShortName(testAbbr[i]);
             numErrors=0;
             if(strcmp(expectedAbbr[i],name)!=0){
             const char* name = uscript_getShortName(testAbbr[i]);
             numErrors=0;
             if(strcmp(expectedAbbr[i],name)!=0){
@@ -204,7 +281,7 @@ void TestUScriptCodeAPI(){
                 0x0001D1AA, /* USCRIPT_INHERITED*/
                 0x00020000, /* USCRIPT_HAN*/
                 0x00000D02, /* USCRIPT_MALAYALAM*/
                 0x0001D1AA, /* USCRIPT_INHERITED*/
                 0x00020000, /* USCRIPT_HAN*/
                 0x00000D02, /* USCRIPT_MALAYALAM*/
-                0x00000D00, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
+                0x00050005, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
                 0x00000000, /* USCRIPT_COMMON*/
                 0x0001D169, /* USCRIPT_INHERITED*/
                 0x0001D182, /* USCRIPT_INHERITED*/
                 0x00000000, /* USCRIPT_COMMON*/
                 0x0001D169, /* USCRIPT_INHERITED*/
                 0x0001D182, /* USCRIPT_INHERITED*/
@@ -241,7 +318,7 @@ void TestUScriptCodeAPI(){
         UErrorCode status = U_ZERO_ERROR;
         UBool passed = TRUE;
 
         UErrorCode status = U_ZERO_ERROR;
         UBool passed = TRUE;
 
-        for(i=0; i<LENGTHOF(codepoints); ++i){
+        for(i=0; i<UPRV_LENGTHOF(codepoints); ++i){
             code = uscript_getScript(codepoints[i],&status);
             if(U_SUCCESS(status)){
                 if( code != expected[i] ||
             code = uscript_getScript(codepoints[i],&status);
             if(U_SUCCESS(status)){
                 if( code != expected[i] ||
@@ -293,7 +370,7 @@ void TestUScriptCodeAPI(){
             }
         }
     }
             }
         }
     }
-                
+
     {
         /*
          * These script codes were originally added to ICU pre-3.6, so that ICU would
     {
         /*
          * These script codes were originally added to ICU pre-3.6, so that ICU would
@@ -304,23 +381,50 @@ void TestUScriptCodeAPI(){
          * Whenever this happens, the long script names here need to be updated.
          */
         static const char* expectedLong[] = {
          * Whenever this happens, the long script names here need to be updated.
          */
         static const char* expectedLong[] = {
-            "Balinese", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp", 
-            "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kayah_Li", "Latf", "Latg", 
-            "Lepcha", "Lina", "Mand", "Maya", "Mero", "Nko", "Orkh", "Perm", "Phags_Pa", "Phoenician", 
-            "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform", 
+            "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
+            "Egyd", "Egyh", "Egyptian_Hieroglyphs",
+            "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
+            "Javanese", "Kayah_Li", "Latf", "Latg",
+            "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
+            "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician", 
+            "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform", 
             "Zxxx", "Unknown",
             "Zxxx", "Unknown",
-            "Carian", "Jpan", "Lana", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
-            "Moon", "Mtei",
+            "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
+            "Moon", "Meetei_Mayek",
             /* new in ICU 4.0 */
             /* new in ICU 4.0 */
-            "Armi", "Avst", "Cakm", "Kore",
-            "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
+            "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
+            "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
+            "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
             "Zmth", "Zsym",
             "Zmth", "Zsym",
+            /* new in ICU 4.4 */
+            "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
+            /* new in ICU 4.6 */
+            "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
+            "Loma", "Mende_Kikakui", "Meroitic_Cursive",
+            "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
+            /* new in ICU 4.8 */
+            "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole",
+            /* new in ICU 49 */
+            "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
+            /* new in ICU 52 */
+            "Caucasian_Albanian", "Mahajani",
+            /* new in ICU 54 */
+            "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham",
+            // new in ICU 58
+            "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye",
+            // new in ICU 60
+            "Masaram_Gondi", "Soyombo", "Zanabazar_Square",
+            // new in ICU 61
+            "Dogra", "Gunjala_Gondi", "Makasar", "Medefaidrin",
+            "Hanifi_Rohingya", "Sogdian", "Old_Sogdian",
+            // new in ICU 64
+            "Elymaic", "Nyiakeng_Puachue_Hmong", "Nandinagari", "Wancho",
         };
         static const char* expectedShort[] = {
         };
         static const char* expectedShort[] = {
-            "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp", 
-            "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg", 
-            "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx", 
-            "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux", 
+            "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
+            "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
+            "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
+            "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
             "Zxxx", "Zzzz",
             "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
             "Moon", "Mtei",
             "Zxxx", "Zzzz",
             "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
             "Moon", "Mtei",
@@ -328,8 +432,33 @@ void TestUScriptCodeAPI(){
             "Armi", "Avst", "Cakm", "Kore",
             "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
             "Zmth", "Zsym",
             "Armi", "Avst", "Cakm", "Kore",
             "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
             "Zmth", "Zsym",
+            /* new in ICU 4.4 */
+            "Bamu", "Lisu", "Nkgb", "Sarb",
+            /* new in ICU 4.6 */
+            "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
+            "Narb", "Nbat", "Palm", "Sind", "Wara",
+            /* new in ICU 4.8 */
+            "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
+            /* new in ICU 49 */
+            "Hluw", "Khoj", "Tirh",
+            /* new in ICU 52 */
+            "Aghb", "Mahj",
+            /* new in ICU 54 */
+            "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd",
+            // new in ICU 58
+            "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye",
+            // new in ICU 60
+            "Gonm", "Soyo", "Zanb",
+            // new in ICU 61
+            "Dogr", "Gong", "Maka", "Medf", "Rohg", "Sogd", "Sogo",
+            // new in ICU 64
+            "Elym", "Hmnp", "Nand", "Wcho",
         };
         int32_t j = 0;
         };
         int32_t j = 0;
+        if(UPRV_LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
+            log_err("need to add new script codes in cucdapi.c!\n");
+            return;
+        }
         for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
             const char* name = uscript_getName((UScriptCode)i);
             if(name==NULL || strcmp(name,expectedLong[j])!=0){
         for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
             const char* name = uscript_getName((UScriptCode)i);
             if(name==NULL || strcmp(name,expectedLong[j])!=0){
@@ -340,11 +469,11 @@ void TestUScriptCodeAPI(){
                 log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
             }
         }
                 log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
             }
         }
-        for(i=0; i<LENGTHOF(expectedLong); i++){
+        for(i=0; i<UPRV_LENGTHOF(expectedLong); i++){
             UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
             UErrorCode status = U_ZERO_ERROR;
             int32_t len = 0;
             UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
             UErrorCode status = U_ZERO_ERROR;
             int32_t len = 0;
-            len = uscript_getCode(expectedShort[i], fillIn, LENGTHOF(fillIn), &status);
+            len = uscript_getCode(expectedShort[i], fillIn, UPRV_LENGTHOF(fillIn), &status);
             if(U_FAILURE(status)){
                 log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
             }
             if(U_FAILURE(status)){
                 log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
             }
@@ -356,6 +485,230 @@ void TestUScriptCodeAPI(){
             }
         }
     }
             }
         }
     }
+
+    {
+        /* test characters which have Script_Extensions */
+        UErrorCode errorCode=U_ZERO_ERROR;
+        if(!(
+                USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
+                USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
+                USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
+            U_FAILURE(errorCode)
+        ) {
+            log_err("uscript_getScript(character with Script_Extensions) failed\n");
+        }
+    }
+}
+
+void TestHasScript() {
+    if(!(
+        !uscript_hasScript(0x063f, USCRIPT_COMMON) &&
+        uscript_hasScript(0x063f, USCRIPT_ARABIC) &&  /* main Script value */
+        !uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
+        !uscript_hasScript(0x063f, USCRIPT_THAANA))
+    ) {
+        log_err("uscript_hasScript(U+063F, ...) is wrong\n");
+    }
+    if(!(
+        !uscript_hasScript(0x0640, USCRIPT_COMMON) &&  /* main Script value */
+        uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
+        uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
+        !uscript_hasScript(0x0640, USCRIPT_THAANA))
+    ) {
+        log_err("uscript_hasScript(U+0640, ...) is wrong\n");
+    }
+    if(!(
+        !uscript_hasScript(0x0650, USCRIPT_INHERITED) &&  /* main Script value */
+        uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
+        uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
+        !uscript_hasScript(0x0650, USCRIPT_THAANA))
+    ) {
+        log_err("uscript_hasScript(U+0650, ...) is wrong\n");
+    }
+    if(!(
+        !uscript_hasScript(0x0660, USCRIPT_COMMON) &&  /* main Script value */
+        uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
+        !uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
+        uscript_hasScript(0x0660, USCRIPT_THAANA))
+    ) {
+        log_err("uscript_hasScript(U+0660, ...) is wrong\n");
+    }
+    if(!(
+        !uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
+        uscript_hasScript(0xfdf2, USCRIPT_ARABIC) &&  /* main Script value */
+        !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
+        uscript_hasScript(0xfdf2, USCRIPT_THAANA))
+    ) {
+        log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
+    }
+    if(uscript_hasScript(0x0640, 0xaffe)) {
+        /* An unguarded implementation might go into an infinite loop. */
+        log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
+    }
+}
+
+static UBool scriptsContain(UScriptCode scripts[], int32_t length, UScriptCode script) {
+    UBool contain=FALSE;
+    int32_t prev=-1, i;
+    for(i=0; i<length; ++i) {
+        int32_t s=scripts[i];
+        if(s<=prev) {
+            log_err("uscript_getScriptExtensions() not in sorted order: %d %d\n", (int)prev, (int)s);
+        }
+        if(s==script) { contain=TRUE; }
+    }
+    return contain;
+}
+
+void TestGetScriptExtensions() {
+    UScriptCode scripts[20];
+    int32_t length;
+    UErrorCode errorCode;
+
+    /* errors and overflows */
+    errorCode=U_PARSE_ERROR;
+    length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
+    if(errorCode!=U_PARSE_ERROR) {
+        log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
+              u_errorName(errorCode));
+    }
+    errorCode=U_ZERO_ERROR;
+    length=uscript_getScriptExtensions(0x0640, NULL, UPRV_LENGTHOF(scripts), &errorCode);
+    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
+        log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
+              u_errorName(errorCode));
+    }
+    errorCode=U_ZERO_ERROR;
+    length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
+    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
+        log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
+              u_errorName(errorCode));
+    }
+    errorCode=U_ZERO_ERROR;
+    length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
+    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
+        log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s\n",
+              (int)length, u_errorName(errorCode));
+    }
+    errorCode=U_ZERO_ERROR;
+    length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
+    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
+        log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s\n",
+              (int)length, u_errorName(errorCode));
+    }
+    /* U+063F has only a Script code, no Script_Extensions. */
+    errorCode=U_ZERO_ERROR;
+    length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
+    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
+        log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
+              (int)length, u_errorName(errorCode));
+    }
+
+    /* invalid code points */
+    errorCode=U_ZERO_ERROR;
+    length=uscript_getScriptExtensions(-1, scripts, UPRV_LENGTHOF(scripts), &errorCode);
+    if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
+        log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
+              (int)length, u_errorName(errorCode));
+    }
+    errorCode=U_ZERO_ERROR;
+    length=uscript_getScriptExtensions(0x110000, scripts, UPRV_LENGTHOF(scripts), &errorCode);
+    if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
+        log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
+              (int)length, u_errorName(errorCode));
+    }
+
+    /* normal usage */
+    errorCode=U_ZERO_ERROR;
+    length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
+    if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
+        log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
+              (int)length, u_errorName(errorCode));
+    }
+    errorCode=U_ZERO_ERROR;
+    length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
+    if(U_FAILURE(errorCode) || length<3 ||
+            !scriptsContain(scripts, length, USCRIPT_ARABIC) ||
+            !scriptsContain(scripts, length, USCRIPT_SYRIAC) ||
+            !scriptsContain(scripts, length, USCRIPT_MANDAIC)) {
+        log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
+              (int)length, u_errorName(errorCode));
+    }
+    errorCode=U_ZERO_ERROR;
+    length=uscript_getScriptExtensions(0xfdf2, scripts, UPRV_LENGTHOF(scripts), &errorCode);
+    if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
+        log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
+              (int)length, u_errorName(errorCode));
+    }
+    errorCode=U_ZERO_ERROR;
+    length=uscript_getScriptExtensions(0xff65, scripts, UPRV_LENGTHOF(scripts), &errorCode);
+    if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
+        log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
+              (int)length, u_errorName(errorCode));
+    }
+}
+
+void TestScriptMetadataAPI() {
+    /* API & code coverage. More testing in intltest/ucdtest.cpp. */
+    UErrorCode errorCode=U_ZERO_ERROR;
+    UChar sample[8];
+
+    if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 ||
+            U_FAILURE(errorCode) ||
+            uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
+            sample[1]!=0) {
+        log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
+    }
+    sample[0]=0xfffe;
+    if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
+            errorCode!=U_BUFFER_OVERFLOW_ERROR ||
+            sample[0]!=0xfffe) {
+        log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
+    }
+    errorCode=U_ZERO_ERROR;
+    if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 ||
+            U_FAILURE(errorCode) ||
+            sample[0]!=0) {
+        log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
+    }
+    sample[0]=0xfffe;
+    if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
+            errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
+            sample[0]!=0xfffe) {
+        log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
+    }
+
+    if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
+            // Unicode 10 gives up on "aspirational".
+            uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_LIMITED_USE ||
+            uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
+            uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
+            uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
+            uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
+            uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
+        log_err("uscript_getUsage() failed\n");
+    }
+
+    if(uscript_isRightToLeft(USCRIPT_LATIN) ||
+            uscript_isRightToLeft(USCRIPT_CIRTH) ||
+            !uscript_isRightToLeft(USCRIPT_ARABIC) ||
+            !uscript_isRightToLeft(USCRIPT_HEBREW)) {
+        log_err("uscript_isRightToLeft() failed\n");
+    }
+
+    if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
+            uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
+            !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
+            !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
+        log_err("uscript_breaksBetweenLetters() failed\n");
+    }
+
+    if(uscript_isCased(USCRIPT_CIRTH) ||
+            uscript_isCased(USCRIPT_HAN) ||
+            !uscript_isCased(USCRIPT_LATIN) ||
+            !uscript_isCased(USCRIPT_GREEK)) {
+        log_err("uscript_isCased() failed\n");
+    }
 }
 
 void TestBinaryValues() {
 }
 
 void TestBinaryValues() {
@@ -366,14 +719,14 @@ void TestBinaryValues() {
     static const char *const falseValues[]={ "N", "No", "F", "False" };
     static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
     int32_t i;
     static const char *const falseValues[]={ "N", "No", "F", "False" };
     static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
     int32_t i;
-    for(i=0; i<LENGTHOF(falseValues); ++i) {
+    for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) {
         if(FALSE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
         if(FALSE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
-            log_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=FALSE\n", falseValues[i]);
+            log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=FALSE (Are you missing data?)\n", falseValues[i]);
         }
     }
         }
     }
-    for(i=0; i<LENGTHOF(trueValues); ++i) {
+    for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) {
         if(TRUE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
         if(TRUE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
-            log_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=TRUE\n", trueValues[i]);
+            log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=TRUE (Are you missing data?)\n", trueValues[i]);
         }
     }
 }
         }
     }
 }