/********************************************************************
- * COPYRIGHT:
- * Copyright (c) 1997-2003, International Business Machines Corporation and
- * others. All Rights Reserved.
+ * Copyright (c) 1997-2013, International Business Machines
+ * Corporation and others. All Rights Reserved.
********************************************************************/
#include <string.h>
#include "cintltst.h"
#include "cucdapi.h"
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof(array[0]))
+
void TestUScriptCodeAPI(){
int i =0;
int numErrors =0;
UScriptCode script[10]={USCRIPT_INVALID_CODE};
uscript_getCode(testNames[i],script,capacity, &err);
if( script[0] != expected[i]){
- log_err("Error getting script code Got: %i Expected: %i for name %s\n",
+ log_data_err("Error getting script code Got: %i Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
script[0],expected[i],testNames[i]);
numErrors++;
}
{
UErrorCode err = U_ZERO_ERROR;
int32_t capacity=0;
- UScriptCode jaCode[]={ USCRIPT_KATAKANA,USCRIPT_HIRAGANA,USCRIPT_HAN};
+ int32_t j;
+ UScriptCode jaCode[]={USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
UScriptCode script[10]={USCRIPT_INVALID_CODE};
int32_t num = uscript_getCode("ja",script,capacity, &err);
/* preflight */
err = U_ZERO_ERROR;
capacity = 10;
num = uscript_getCode("ja",script,capacity, &err);
- if(num!=(sizeof(jaCode)/sizeof(UScriptCode)) || script[0]!=jaCode[0] || script[1]!=jaCode[1]){
- log_err("Errors uscript_getScriptCode() for Japaneese locale \n");
+ if(num!=(sizeof(jaCode)/sizeof(UScriptCode))){
+ log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
+ num, (sizeof(jaCode)/sizeof(UScriptCode)));
+ }
+ for(j=0;j<sizeof(jaCode)/sizeof(UScriptCode);j++) {
+ if(script[j]!=jaCode[j]) {
+ log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
+ script[j], uscript_getName(script[j]),
+ jaCode[j], uscript_getName(jaCode[j]));
+
+ }
}
}else{
log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n",
}
/* now test uscript_getScript() API */
{
-#define MAX_ARRAY_SIZE 23
uint32_t codepoints[] = {
0x0000FF9D, /* USCRIPT_KATAKANA*/
0x0000FFBE, /* USCRIPT_HANGUL*/
0x0001D1AA, /* USCRIPT_INHERITED*/
0x00020000, /* USCRIPT_HAN*/
0x00000D02, /* USCRIPT_MALAYALAM*/
- 0x00000D00, /* USCRIPT_COMMON */
+ 0x00000D00, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
0x00000000, /* USCRIPT_COMMON*/
0x0001D169, /* USCRIPT_INHERITED*/
0x0001D182, /* USCRIPT_INHERITED*/
0x0001D18B, /* USCRIPT_INHERITED*/
0x0001D1AD, /* USCRIPT_INHERITED*/
- 0x00110000, /* USCRIPT_INVALID_CODE */
};
UScriptCode expected[] = {
USCRIPT_INHERITED,
USCRIPT_HAN ,
USCRIPT_MALAYALAM,
- USCRIPT_COMMON,
+ USCRIPT_UNKNOWN,
USCRIPT_COMMON,
USCRIPT_INHERITED ,
USCRIPT_INHERITED ,
USCRIPT_INHERITED ,
USCRIPT_INHERITED ,
- USCRIPT_INVALID_CODE,
};
UScriptCode code = USCRIPT_INVALID_CODE;
UErrorCode status = U_ZERO_ERROR;
UBool passed = TRUE;
- i =0;
- while(i< MAX_ARRAY_SIZE){
+ for(i=0; i<LENGTHOF(codepoints); ++i){
code = uscript_getScript(codepoints[i],&status);
if(U_SUCCESS(status)){
if( code != expected[i] ||
codepoints[i],u_errorName(status));
break;
}
- i++;
}
if(passed==FALSE){
for(i=0; (UScriptCode)i< USCRIPT_CODE_LIMIT; i++){
const char* name = uscript_getName((UScriptCode)i);
if(name==NULL || strcmp(name,"")==0){
- log_err("uscript_getName failed for code : %i\n",i);
+ log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i);
+ }
+ }
+ }
+
+ {
+ /*
+ * These script codes were originally added to ICU pre-3.6, so that ICU would
+ * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
+ * These script codes were added with only short names because we don't
+ * want to invent long names ourselves.
+ * Unicode 5 and later encode some of these scripts and give them long names.
+ * Whenever this happens, the long script names here need to be updated.
+ */
+ static const char* expectedLong[] = {
+ "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyptian_Hieroglyphs",
+ "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Javanese", "Kayah_Li", "Latf", "Latg",
+ "Lepcha", "Lina", "Mandaic", "Maya", "Meroitic_Hieroglyphs", "Nko", "Old_Turkic", "Perm", "Phags_Pa", "Phoenician",
+ "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
+ "Zxxx", "Unknown",
+ "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
+ "Moon", "Meetei_Mayek",
+ /* new in ICU 4.0 */
+ "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
+ "Kaithi", "Mani", "Inscriptional_Pahlavi", "Phlp", "Phlv", "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
+ "Zmth", "Zsym",
+ /* new in ICU 4.4 */
+ "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
+ /* new in ICU 4.6 */
+ "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Meroitic_Cursive",
+ "Narb", "Nbat", "Palm", "Sind", "Wara",
+ /* new in ICU 4.8 */
+ "Afak", "Jurc", "Mroo", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tang", "Wole",
+ /* new in ICU 49 */
+ "Hluw", "Khoj", "Tirh",
+ };
+ static const char* expectedShort[] = {
+ "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
+ "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
+ "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
+ "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
+ "Zxxx", "Zzzz",
+ "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
+ "Moon", "Mtei",
+ /* new in ICU 4.0 */
+ "Armi", "Avst", "Cakm", "Kore",
+ "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
+ "Zmth", "Zsym",
+ /* new in ICU 4.4 */
+ "Bamu", "Lisu", "Nkgb", "Sarb",
+ /* new in ICU 4.6 */
+ "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
+ "Narb", "Nbat", "Palm", "Sind", "Wara",
+ /* new in ICU 4.8 */
+ "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
+ /* new in ICU 49 */
+ "Hluw", "Khoj", "Tirh",
+ };
+ int32_t j = 0;
+ if(LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
+ log_err("need to add new script codes in cucdapi.c!\n");
+ return;
+ }
+ for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
+ const char* name = uscript_getName((UScriptCode)i);
+ if(name==NULL || strcmp(name,expectedLong[j])!=0){
+ log_err("uscript_getName failed for code %i: %s!=%s\n", i, name, expectedLong[j]);
+ }
+ name = uscript_getShortName((UScriptCode)i);
+ if(name==NULL || strcmp(name,expectedShort[j])!=0){
+ log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
+ }
+ }
+ for(i=0; i<LENGTHOF(expectedLong); i++){
+ UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t len = 0;
+ len = uscript_getCode(expectedShort[i], fillIn, LENGTHOF(fillIn), &status);
+ if(U_FAILURE(status)){
+ log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
+ }
+ if(len>1){
+ log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort[i], len);
}
+ if(fillIn[0]!= (UScriptCode)(USCRIPT_BALINESE+i)){
+ log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort[i], (USCRIPT_BALINESE+i), fillIn[0] );
+ }
+ }
+ }
+
+ {
+ /* test characters which have Script_Extensions */
+ UErrorCode errorCode=U_ZERO_ERROR;
+ if(!(
+ USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
+ USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
+ USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
+ U_FAILURE(errorCode)
+ ) {
+ log_err("uscript_getScript(character with Script_Extensions) failed\n");
}
}
-
+}
+
+void TestHasScript() {
+ if(!(
+ !uscript_hasScript(0x063f, USCRIPT_COMMON) &&
+ uscript_hasScript(0x063f, USCRIPT_ARABIC) && /* main Script value */
+ !uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
+ !uscript_hasScript(0x063f, USCRIPT_THAANA))
+ ) {
+ log_err("uscript_hasScript(U+063F, ...) is wrong\n");
+ }
+ if(!(
+ !uscript_hasScript(0x0640, USCRIPT_COMMON) && /* main Script value */
+ uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
+ uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
+ !uscript_hasScript(0x0640, USCRIPT_THAANA))
+ ) {
+ log_err("uscript_hasScript(U+0640, ...) is wrong\n");
+ }
+ if(!(
+ !uscript_hasScript(0x0650, USCRIPT_INHERITED) && /* main Script value */
+ uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
+ uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
+ !uscript_hasScript(0x0650, USCRIPT_THAANA))
+ ) {
+ log_err("uscript_hasScript(U+0650, ...) is wrong\n");
+ }
+ if(!(
+ !uscript_hasScript(0x0660, USCRIPT_COMMON) && /* main Script value */
+ uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
+ !uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
+ uscript_hasScript(0x0660, USCRIPT_THAANA))
+ ) {
+ log_err("uscript_hasScript(U+0660, ...) is wrong\n");
+ }
+ if(!(
+ !uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
+ uscript_hasScript(0xfdf2, USCRIPT_ARABIC) && /* main Script value */
+ !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
+ uscript_hasScript(0xfdf2, USCRIPT_THAANA))
+ ) {
+ log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
+ }
+ if(uscript_hasScript(0x0640, 0xaffe)) {
+ /* An unguarded implementation might go into an infinite loop. */
+ log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
+ }
+}
+
+void TestGetScriptExtensions() {
+ UScriptCode scripts[20];
+ int32_t length;
+ UErrorCode errorCode;
+
+ /* errors and overflows */
+ errorCode=U_PARSE_ERROR;
+ length=uscript_getScriptExtensions(0x0640, scripts, LENGTHOF(scripts), &errorCode);
+ if(errorCode!=U_PARSE_ERROR) {
+ log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
+ u_errorName(errorCode));
+ }
+ errorCode=U_ZERO_ERROR;
+ length=uscript_getScriptExtensions(0x0640, NULL, LENGTHOF(scripts), &errorCode);
+ if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
+ u_errorName(errorCode));
+ }
+ errorCode=U_ZERO_ERROR;
+ length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
+ if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
+ log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
+ u_errorName(errorCode));
+ }
+ errorCode=U_ZERO_ERROR;
+ length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
+ if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
+ log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d != 3 - %s\n",
+ (int)length, u_errorName(errorCode));
+ }
+ errorCode=U_ZERO_ERROR;
+ length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
+ if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
+ log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d != 3 - %s\n",
+ (int)length, u_errorName(errorCode));
+ }
+ /* U+063F has only a Script code, no Script_Extensions. */
+ errorCode=U_ZERO_ERROR;
+ length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
+ if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
+ log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
+ (int)length, u_errorName(errorCode));
+ }
+
+ /* invalid code points */
+ errorCode=U_ZERO_ERROR;
+ length=uscript_getScriptExtensions(-1, scripts, LENGTHOF(scripts), &errorCode);
+ if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
+ log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
+ (int)length, u_errorName(errorCode));
+ }
+ errorCode=U_ZERO_ERROR;
+ length=uscript_getScriptExtensions(0x110000, scripts, LENGTHOF(scripts), &errorCode);
+ if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
+ log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
+ (int)length, u_errorName(errorCode));
+ }
+
+ /* normal usage */
+ errorCode=U_ZERO_ERROR;
+ length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
+ if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
+ log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
+ (int)length, u_errorName(errorCode));
+ }
+ errorCode=U_ZERO_ERROR;
+ length=uscript_getScriptExtensions(0x0640, scripts, LENGTHOF(scripts), &errorCode);
+ if(U_FAILURE(errorCode) || length!=3 ||
+ scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_SYRIAC || scripts[2]!=USCRIPT_MANDAIC
+ ) {
+ log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
+ (int)length, u_errorName(errorCode));
+ }
+ errorCode=U_ZERO_ERROR;
+ length=uscript_getScriptExtensions(0xfdf2, scripts, LENGTHOF(scripts), &errorCode);
+ if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
+ log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
+ (int)length, u_errorName(errorCode));
+ }
+ errorCode=U_ZERO_ERROR;
+ length=uscript_getScriptExtensions(0xff65, scripts, LENGTHOF(scripts), &errorCode);
+ if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
+ log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
+ (int)length, u_errorName(errorCode));
+ }
+}
+
+void TestScriptMetadataAPI() {
+ /* API & code coverage. More testing in intltest/ucdtest.cpp. */
+ UErrorCode errorCode=U_ZERO_ERROR;
+ UChar sample[8];
+
+ if(uscript_getSampleString(USCRIPT_LATIN, sample, LENGTHOF(sample), &errorCode)!=1 ||
+ U_FAILURE(errorCode) ||
+ uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
+ sample[1]!=0) {
+ log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
+ }
+ sample[0]=0xfffe;
+ if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
+ errorCode!=U_BUFFER_OVERFLOW_ERROR ||
+ sample[0]!=0xfffe) {
+ log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
+ }
+ errorCode=U_ZERO_ERROR;
+ if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, LENGTHOF(sample), &errorCode)!=0 ||
+ U_FAILURE(errorCode) ||
+ sample[0]!=0) {
+ log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
+ }
+ sample[0]=0xfffe;
+ if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
+ errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
+ sample[0]!=0xfffe) {
+ log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
+ }
+
+ if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
+ uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_ASPIRATIONAL ||
+ uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
+ uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
+ uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
+ uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
+ uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
+ log_err("uscript_getUsage() failed\n");
+ }
+
+ if(uscript_isRightToLeft(USCRIPT_LATIN) ||
+ uscript_isRightToLeft(USCRIPT_CIRTH) ||
+ !uscript_isRightToLeft(USCRIPT_ARABIC) ||
+ !uscript_isRightToLeft(USCRIPT_HEBREW)) {
+ log_err("uscript_isRightToLeft() failed\n");
+ }
- }
+ if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
+ uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
+ !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
+ !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
+ log_err("uscript_breaksBetweenLetters() failed\n");
+ }
+
+ if(uscript_isCased(USCRIPT_CIRTH) ||
+ uscript_isCased(USCRIPT_HAN) ||
+ !uscript_isCased(USCRIPT_LATIN) ||
+ !uscript_isCased(USCRIPT_GREEK)) {
+ log_err("uscript_isCased() failed\n");
+ }
+}
+
+void TestBinaryValues() {
+ /*
+ * Unicode 5.1 explicitly defines binary property value aliases.
+ * Verify that they are all recognized.
+ */
+ static const char *const falseValues[]={ "N", "No", "F", "False" };
+ static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
+ int32_t i;
+ for(i=0; i<LENGTHOF(falseValues); ++i) {
+ if(FALSE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
+ log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=FALSE (Are you missing data?)\n", falseValues[i]);
+ }
+ }
+ for(i=0; i<LENGTHOF(trueValues); ++i) {
+ if(TRUE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
+ log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=TRUE (Are you missing data?)\n", trueValues[i]);
+ }
+ }
+}