X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/73c04bcfe1096173b00431f0cdc742894b15eef0..1a147d096ae81f4c8262f7bfc56bd19fc2dee932:/icuSources/test/cintltst/cstrcase.c diff --git a/icuSources/test/cintltst/cstrcase.c b/icuSources/test/cintltst/cstrcase.c index cdbb8535..e526b54f 100644 --- a/icuSources/test/cintltst/cstrcase.c +++ b/icuSources/test/cintltst/cstrcase.c @@ -1,12 +1,14 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * -* Copyright (C) 2002-2006, International Business Machines +* Copyright (C) 2002-2016, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: cstrcase.c -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -25,6 +27,8 @@ #include "unicode/ucasemap.h" #include "cmemory.h" #include "cintltst.h" +#include "ucasemap_imp.h" +#include "ustr_imp.h" /* test string case mapping functions --------------------------------------- */ @@ -43,12 +47,12 @@ TestCaseLower(void) { /* lowercase with root locale and separate buffers */ buffer[0]=0xabcd; errorCode=U_ZERO_ERROR; - length=u_strToLower(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, - beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR, + length=u_strToLower(buffer, UPRV_LENGTHOF(buffer), + beforeLower, UPRV_LENGTHOF(beforeLower), "", &errorCode); if( U_FAILURE(errorCode) || - length!=(sizeof(lowerRoot)/U_SIZEOF_UCHAR) || + length!=(UPRV_LENGTHOF(lowerRoot)) || uprv_memcmp(lowerRoot, buffer, length*U_SIZEOF_UCHAR)!=0 || buffer[length]!=0 ) { @@ -63,14 +67,14 @@ buffer[length]==0 ? "yes" : "no", /* lowercase with turkish locale and in the same buffer */ uprv_memcpy(buffer, beforeLower, sizeof(beforeLower)); - buffer[sizeof(beforeLower)/U_SIZEOF_UCHAR]=0; + buffer[UPRV_LENGTHOF(beforeLower)]=0; errorCode=U_ZERO_ERROR; - length=u_strToLower(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, + length=u_strToLower(buffer, UPRV_LENGTHOF(buffer), buffer, -1, /* implicit srcLength */ "tr", &errorCode); if( U_FAILURE(errorCode) || - length!=(sizeof(lowerTurkish)/U_SIZEOF_UCHAR) || + length!=(UPRV_LENGTHOF(lowerTurkish)) || uprv_memcmp(lowerTurkish, buffer, length*U_SIZEOF_UCHAR)!=0 || buffer[length]!=0 ) { @@ -84,11 +88,11 @@ buffer[length]==0 ? "yes" : "no", buffer[0]=buffer[2]=0xabcd; errorCode=U_ZERO_ERROR; length=u_strToLower(buffer, 2, /* set destCapacity=2 */ - beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR, + beforeLower, UPRV_LENGTHOF(beforeLower), "", &errorCode); if( errorCode!=U_BUFFER_OVERFLOW_ERROR || - length!=(sizeof(lowerRoot)/U_SIZEOF_UCHAR) || + length!=(UPRV_LENGTHOF(lowerRoot)) || uprv_memcmp(lowerRoot, buffer, 2*U_SIZEOF_UCHAR)!=0 || buffer[2]!=0xabcd ) { @@ -100,8 +104,8 @@ buffer[length]==0 ? "yes" : "no", /* test error handling */ errorCode=U_ZERO_ERROR; - length=u_strToLower(NULL, sizeof(buffer)/U_SIZEOF_UCHAR, - beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR, + length=u_strToLower(NULL, UPRV_LENGTHOF(buffer), + beforeLower, UPRV_LENGTHOF(beforeLower), "", &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { @@ -113,7 +117,7 @@ buffer[length]==0 ? "yes" : "no", buffer[0]=0xabcd; errorCode=U_ZERO_ERROR; length=u_strToLower(buffer, -1, - beforeLower, sizeof(beforeLower)/U_SIZEOF_UCHAR, + beforeLower, UPRV_LENGTHOF(beforeLower), "", &errorCode); if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR || @@ -141,12 +145,12 @@ TestCaseUpper(void) { /* uppercase with root locale and in the same buffer */ uprv_memcpy(buffer, beforeUpper, sizeof(beforeUpper)); errorCode=U_ZERO_ERROR; - length=u_strToUpper(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, - buffer, sizeof(beforeUpper)/U_SIZEOF_UCHAR, + length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer), + buffer, UPRV_LENGTHOF(beforeUpper), "", &errorCode); if( U_FAILURE(errorCode) || - length!=(sizeof(upperRoot)/U_SIZEOF_UCHAR) || + length!=(UPRV_LENGTHOF(upperRoot)) || uprv_memcmp(upperRoot, buffer, length*U_SIZEOF_UCHAR)!=0 || buffer[length]!=0 ) { @@ -159,12 +163,12 @@ TestCaseUpper(void) { /* uppercase with turkish locale and separate buffers */ buffer[0]=0xabcd; errorCode=U_ZERO_ERROR; - length=u_strToUpper(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, - beforeUpper, sizeof(beforeUpper)/U_SIZEOF_UCHAR, + length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer), + beforeUpper, UPRV_LENGTHOF(beforeUpper), "tr", &errorCode); if( U_FAILURE(errorCode) || - length!=(sizeof(upperTurkish)/U_SIZEOF_UCHAR) || + length!=(UPRV_LENGTHOF(upperTurkish)) || uprv_memcmp(upperTurkish, buffer, length*U_SIZEOF_UCHAR)!=0 || buffer[length]!=0 ) { @@ -177,11 +181,11 @@ TestCaseUpper(void) { /* test preflighting */ errorCode=U_ZERO_ERROR; length=u_strToUpper(NULL, 0, - beforeUpper, sizeof(beforeUpper)/U_SIZEOF_UCHAR, + beforeUpper, UPRV_LENGTHOF(beforeUpper), "tr", &errorCode); if( errorCode!=U_BUFFER_OVERFLOW_ERROR || - length!=(sizeof(upperTurkish)/U_SIZEOF_UCHAR) + length!=(UPRV_LENGTHOF(upperTurkish)) ) { log_err("error in u_strToUpper(turkish locale pure preflighting)=%ld error=%s\n", length, @@ -191,8 +195,8 @@ TestCaseUpper(void) { /* test error handling */ buffer[0]=0xabcd; errorCode=U_ZERO_ERROR; - length=u_strToUpper(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, - NULL, sizeof(beforeUpper)/U_SIZEOF_UCHAR, + length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer), + NULL, UPRV_LENGTHOF(beforeUpper), "tr", &errorCode); if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR || @@ -206,7 +210,7 @@ TestCaseUpper(void) { buffer[0]=0xabcd; errorCode=U_ZERO_ERROR; - length=u_strToUpper(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, + length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer), beforeUpper, -2, "tr", &errorCode); @@ -236,21 +240,21 @@ TestCaseTitle(void) { UErrorCode errorCode; errorCode=U_ZERO_ERROR; - titleIterChars=ubrk_open(UBRK_CHARACTER, "", beforeTitle, sizeof(beforeTitle)/U_SIZEOF_UCHAR, &errorCode); + titleIterChars=ubrk_open(UBRK_CHARACTER, "", beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode); if(U_FAILURE(errorCode)) { - log_err("error: ubrk_open(UBRK_CHARACTER)->%s\n", u_errorName(errorCode)); + log_err_status(errorCode, "error: ubrk_open(UBRK_CHARACTER)->%s\n", u_errorName(errorCode)); return; } /* titlecase with standard break iterator and in the same buffer */ uprv_memcpy(buffer, beforeTitle, sizeof(beforeTitle)); errorCode=U_ZERO_ERROR; - length=u_strToTitle(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, - buffer, sizeof(beforeTitle)/U_SIZEOF_UCHAR, + length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), + buffer, UPRV_LENGTHOF(beforeTitle), NULL, "", &errorCode); if( U_FAILURE(errorCode) || - length!=(sizeof(titleWord)/U_SIZEOF_UCHAR) || + length!=(UPRV_LENGTHOF(titleWord)) || uprv_memcmp(titleWord, buffer, length*U_SIZEOF_UCHAR)!=0 || buffer[length]!=0 ) { @@ -263,12 +267,12 @@ TestCaseTitle(void) { /* titlecase with UBRK_CHARACTERS and separate buffers */ buffer[0]=0xabcd; errorCode=U_ZERO_ERROR; - length=u_strToTitle(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, - beforeTitle, sizeof(beforeTitle)/U_SIZEOF_UCHAR, + length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), + beforeTitle, UPRV_LENGTHOF(beforeTitle), titleIterChars, "", &errorCode); if( U_FAILURE(errorCode) || - length!=(sizeof(titleChar)/U_SIZEOF_UCHAR) || + length!=(UPRV_LENGTHOF(titleChar)) || uprv_memcmp(titleChar, buffer, length*U_SIZEOF_UCHAR)!=0 || buffer[length]!=0 ) { @@ -281,11 +285,11 @@ TestCaseTitle(void) { /* test preflighting */ errorCode=U_ZERO_ERROR; length=u_strToTitle(NULL, 0, - beforeTitle, sizeof(beforeTitle)/U_SIZEOF_UCHAR, + beforeTitle, UPRV_LENGTHOF(beforeTitle), titleIterChars, "", &errorCode); if( errorCode!=U_BUFFER_OVERFLOW_ERROR || - length!=(sizeof(titleChar)/U_SIZEOF_UCHAR) + length!=(UPRV_LENGTHOF(titleChar)) ) { log_err("error in u_strToTitle(UBRK_CHARACTERS pure preflighting)=%ld error=%s\n", length, @@ -295,8 +299,8 @@ TestCaseTitle(void) { /* test error handling */ buffer[0]=0xabcd; errorCode=U_ZERO_ERROR; - length=u_strToTitle(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, - NULL, sizeof(beforeTitle)/U_SIZEOF_UCHAR, + length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), + NULL, UPRV_LENGTHOF(beforeTitle), titleIterChars, "", &errorCode); if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR || @@ -310,7 +314,7 @@ TestCaseTitle(void) { buffer[0]=0xabcd; errorCode=U_ZERO_ERROR; - length=u_strToTitle(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, + length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), beforeTitle, -2, titleIterChars, "", &errorCode); @@ -326,6 +330,68 @@ TestCaseTitle(void) { ubrk_close(titleIterChars); } +static void +TestCaseDutchTitle(void) { + static const UChar + + beforeTitle[]= { 0x69, 0x6A, 0x73, 0x73, 0x45, 0x6c, 0x20, 0x69, 0x67, 0x6c, 0x4f, 0x6f , 0x20 , 0x49, 0x4A, 0x53, 0x53, 0x45, 0x4C }, + titleRoot[]= { 0x49, 0x6A, 0x73, 0x73, 0x65, 0x6c, 0x20, 0x49, 0x67, 0x6c, 0x6f, 0x6f , 0x20 , 0x49, 0x6A, 0x73, 0x73, 0x65, 0x6C }, + titleDutch[]= { 0x49, 0x4A, 0x73, 0x73, 0x65, 0x6c, 0x20, 0x49, 0x67, 0x6c, 0x6f, 0x6f , 0x20 , 0x49, 0x4A, 0x73, 0x73, 0x65, 0x6C }; + + UChar buffer[32]; + UBreakIterator *titleIterWord; + int32_t length; + UErrorCode errorCode; + + errorCode=U_ZERO_ERROR; + titleIterWord=ubrk_open(UBRK_WORD, "", beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode); + if(U_FAILURE(errorCode)) { + log_err_status(errorCode, "error: ubrk_open(UBRK_WORD)->%s\n", u_errorName(errorCode)); + return; + } + + /* titlecase with default locale */ + buffer[0]=0xabcd; + errorCode=U_ZERO_ERROR; + length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), + beforeTitle, UPRV_LENGTHOF(beforeTitle), + titleIterWord, "", + &errorCode); + if( U_FAILURE(errorCode) || + length!=(UPRV_LENGTHOF(titleRoot)) || + uprv_memcmp(titleRoot, buffer, length*U_SIZEOF_UCHAR)!=0 || + buffer[length]!=0 + ) { + char charsOut[21]; + u_UCharsToChars(buffer,charsOut,sizeof(charsOut)); + log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s root locale string matches: %s\noutput buffer is {%s}\n", + length, + u_errorName(errorCode), + uprv_memcmp(titleRoot, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no", charsOut); + } + /* titlecase with Dutch locale */ + buffer[0]=0xabcd; + errorCode=U_ZERO_ERROR; + length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), + beforeTitle, UPRV_LENGTHOF(beforeTitle), + titleIterWord, "nl", + &errorCode); + if( U_FAILURE(errorCode) || + length!=(UPRV_LENGTHOF(titleDutch)) || + uprv_memcmp(titleDutch, buffer, length*U_SIZEOF_UCHAR)!=0 || + buffer[length]!=0 + ) { + char charsOut[21]; + u_UCharsToChars(buffer,charsOut,sizeof(charsOut)); + log_err("error in u_strToTitle(UBRK_CHARACTERS)=%ld error=%s dutch locale string matches: %s\noutput buffer is {%s}\n", + length, + u_errorName(errorCode), + uprv_memcmp(titleDutch, buffer, length*U_SIZEOF_UCHAR)==0 && buffer[length]==0 ? "yes" : "no", charsOut); + } + + ubrk_close(titleIterWord); +} + #endif /* test case folding and case-insensitive string compare -------------------- */ @@ -393,12 +459,12 @@ TestCaseFolding(void) { /* test full string case folding with default option and separate buffers */ buffer[0]=0xabcd; errorCode=U_ZERO_ERROR; - length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, - mixed, sizeof(mixed)/U_SIZEOF_UCHAR, + length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), + mixed, UPRV_LENGTHOF(mixed), U_FOLD_CASE_DEFAULT, &errorCode); if( U_FAILURE(errorCode) || - length!=(sizeof(foldedDefault)/U_SIZEOF_UCHAR) || + length!=(UPRV_LENGTHOF(foldedDefault)) || uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)!=0 || buffer[length]!=0 ) { @@ -412,12 +478,12 @@ TestCaseFolding(void) { if(isUnicode_3_1) { buffer[0]=0xabcd; errorCode=U_ZERO_ERROR; - length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, - mixed, sizeof(mixed)/U_SIZEOF_UCHAR, + length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), + mixed, UPRV_LENGTHOF(mixed), U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode); if( U_FAILURE(errorCode) || - length!=(sizeof(foldedExcludeSpecialI)/U_SIZEOF_UCHAR) || + length!=(UPRV_LENGTHOF(foldedExcludeSpecialI)) || uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)!=0 || buffer[length]!=0 ) { @@ -430,14 +496,14 @@ TestCaseFolding(void) { /* test full string case folding with default option and in the same buffer */ uprv_memcpy(buffer, mixed, sizeof(mixed)); - buffer[sizeof(mixed)/U_SIZEOF_UCHAR]=0; + buffer[UPRV_LENGTHOF(mixed)]=0; errorCode=U_ZERO_ERROR; - length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, + length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), buffer, -1, /* implicit srcLength */ U_FOLD_CASE_DEFAULT, &errorCode); if( U_FAILURE(errorCode) || - length!=(sizeof(foldedDefault)/U_SIZEOF_UCHAR) || + length!=(UPRV_LENGTHOF(foldedDefault)) || uprv_memcmp(foldedDefault, buffer, length*U_SIZEOF_UCHAR)!=0 || buffer[length]!=0 ) { @@ -451,12 +517,12 @@ TestCaseFolding(void) { if(isUnicode_3_1) { uprv_memcpy(buffer, mixed, sizeof(mixed)); errorCode=U_ZERO_ERROR; - length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, - buffer, sizeof(mixed)/U_SIZEOF_UCHAR, + length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), + buffer, UPRV_LENGTHOF(mixed), U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode); if( U_FAILURE(errorCode) || - length!=(sizeof(foldedExcludeSpecialI)/U_SIZEOF_UCHAR) || + length!=UPRV_LENGTHOF(foldedExcludeSpecialI) || uprv_memcmp(foldedExcludeSpecialI, buffer, length*U_SIZEOF_UCHAR)!=0 || buffer[length]!=0 ) { @@ -471,11 +537,11 @@ TestCaseFolding(void) { buffer[0]=buffer[2]=0xabcd; errorCode=U_ZERO_ERROR; length=u_strFoldCase(buffer, 2, /* set destCapacity=2 */ - mixed, sizeof(mixed)/U_SIZEOF_UCHAR, + mixed, UPRV_LENGTHOF(mixed), U_FOLD_CASE_DEFAULT, &errorCode); if( errorCode!=U_BUFFER_OVERFLOW_ERROR || - length!=(sizeof(foldedDefault)/U_SIZEOF_UCHAR) || + length!=UPRV_LENGTHOF(foldedDefault) || uprv_memcmp(foldedDefault, buffer, 2*U_SIZEOF_UCHAR)!=0 || buffer[2]!=0xabcd ) { @@ -487,11 +553,11 @@ TestCaseFolding(void) { errorCode=U_ZERO_ERROR; length=u_strFoldCase(NULL, 0, - mixed, sizeof(mixed)/U_SIZEOF_UCHAR, + mixed, UPRV_LENGTHOF(mixed), U_FOLD_CASE_DEFAULT, &errorCode); if( errorCode!=U_BUFFER_OVERFLOW_ERROR || - length!=(sizeof(foldedDefault)/U_SIZEOF_UCHAR) + length!=UPRV_LENGTHOF(foldedDefault) ) { log_err("error in u_strFoldCase(default pure preflighting)=%ld error=%s\n", length, @@ -500,8 +566,8 @@ TestCaseFolding(void) { /* test error handling */ errorCode=U_ZERO_ERROR; - length=u_strFoldCase(NULL, sizeof(buffer)/U_SIZEOF_UCHAR, - mixed, sizeof(mixed)/U_SIZEOF_UCHAR, + length=u_strFoldCase(NULL, UPRV_LENGTHOF(buffer), + mixed, UPRV_LENGTHOF(mixed), U_FOLD_CASE_DEFAULT, &errorCode); if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { @@ -513,7 +579,7 @@ TestCaseFolding(void) { buffer[0]=0xabcd; errorCode=U_ZERO_ERROR; length=u_strFoldCase(buffer, -1, - mixed, sizeof(mixed)/U_SIZEOF_UCHAR, + mixed, UPRV_LENGTHOF(mixed), U_FOLD_CASE_DEFAULT, &errorCode); if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR || @@ -527,8 +593,8 @@ TestCaseFolding(void) { buffer[0]=0xabcd; errorCode=U_ZERO_ERROR; - length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, - NULL, sizeof(mixed)/U_SIZEOF_UCHAR, + length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), + NULL, UPRV_LENGTHOF(mixed), U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode); if( errorCode!=U_ILLEGAL_ARGUMENT_ERROR || @@ -542,7 +608,7 @@ TestCaseFolding(void) { buffer[0]=0xabcd; errorCode=U_ZERO_ERROR; - length=u_strFoldCase(buffer, sizeof(buffer)/U_SIZEOF_UCHAR, + length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), mixed, -2, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode); @@ -575,12 +641,14 @@ TestCaseCompare(void) { lenMixed=u_strlen(mixed); lenOtherDefault=u_strlen(otherDefault); + (void)lenOtherDefault; /* Suppress set but not used warning. */ lenOtherExcludeSpecialI=u_strlen(otherExcludeSpecialI); lenDifferent=u_strlen(different); /* if unicodeVersion()>=3.1 then test exclude-special-i cases as well */ u_getUnicodeVersion(unicodeVersion); isUnicode_3_1= uprv_memcmp(unicodeVersion, unicode_3_1, 4)>=0; + (void)isUnicode_3_1; /* Suppress set but not used warning. */ /* test u_strcasecmp() */ result=u_strcasecmp(mixed, otherDefault, U_FOLD_CASE_DEFAULT); @@ -677,11 +745,15 @@ TestUCaseMap(void) { if(0!=strcmp(locale, "tr")) { log_err("ucasemap_getLocale(ucasemap_open(\"tur\"))==%s!=\"tr\"\n", locale); } - /* overly long locale IDs get truncated to their language code to avoid unnecessary allocation */ + /* overly long locale IDs may get truncated to their language code to avoid unnecessary allocation */ ucasemap_setLocale(csm, "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog", &errorCode); locale=ucasemap_getLocale(csm); - if(0!=strcmp(locale, "i-klingon")) { - log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s!=\"i-klingon\"\n", locale); + // "I-kLInGOn-the-quick-brown-fox-jumps-over-the-lazy-dog" is canonicalized + // into "tlh-the-quick-brown-fox-jumps-over-the-lazy-dog" + // and "the" will be treated as an extlang which replaces "tlh". + if(0!=strncmp(locale, "the", 3)) { + log_err("ucasemap_getLocale(ucasemap_setLocale(\"I-kLInGOn-the-quick-br...\"))==%s\n" + " does not start with \"the\"\n", locale); } errorCode=U_ZERO_ERROR; @@ -776,19 +848,213 @@ TestUCaseMap(void) { log_err("ucasemap_utf8ToUpper(overflow) failed\n"); } + /* C API coverage for case folding. More thorough test via C++ intltest's StringCaseTest::TestCasing(). */ + errorCode=U_ZERO_ERROR; + utf8Out[0]=0; + length=ucasemap_utf8FoldCase(csm, utf8Out, (int32_t)sizeof(utf8Out), aBc, 3, &errorCode); + if(U_FAILURE(errorCode) || length!=3 || 0!=strcmp(abc, utf8Out)) { + log_err("ucasemap_utf8FoldCase(aBc) failed\n"); + } + + ucasemap_close(csm); +} + +#if !UCONFIG_NO_BREAK_ITERATION + +/* Try titlecasing with options. */ +static void +TestUCaseMapToTitle(void) { + /* "a 'CaT. A 'dOg! 'eTc." where '=U+02BB */ + /* + * Note: The sentence BreakIterator does not recognize a '.' + * as a sentence terminator if it is followed by lowercase. + * That is why the example has the '!'. + */ + static const UChar + + beforeTitle[]= { 0x61, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x54, 0x63, 0x2e }, + titleWord[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x74, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x44, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x74, 0x63, 0x2e }, + titleWordNoAdjust[]={ 0x41, 0x20, 0x2bb, 0x63, 0x61, 0x74, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x6f, 0x67, 0x21, 0x20, 0x2bb, 0x65, 0x74, 0x63, 0x2e }, + titleSentNoLower[]= { 0x41, 0x20, 0x2bb, 0x43, 0x61, 0x54, 0x2e, 0x20, 0x41, 0x20, 0x2bb, 0x64, 0x4f, 0x67, 0x21, 0x20, 0x2bb, 0x45, 0x54, 0x63, 0x2e }; + + UChar buffer[32]; + UCaseMap *csm; + UBreakIterator *sentenceIter; + const UBreakIterator *iter; + int32_t length; + UErrorCode errorCode; + + errorCode=U_ZERO_ERROR; + csm=ucasemap_open("", 0, &errorCode); + if(U_FAILURE(errorCode)) { + log_err("ucasemap_open(\"\") failed - %s\n", u_errorName(errorCode)); + return; + } + + iter=ucasemap_getBreakIterator(csm); + if(iter!=NULL) { + log_err("ucasemap_getBreakIterator() returns %p!=NULL before setting any iterator or titlecasing\n", iter); + } + + /* Use default UBreakIterator: Word breaks. */ + length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode); + if( U_FAILURE(errorCode) || + length!=UPRV_LENGTHOF(titleWord) || + 0!=u_memcmp(buffer, titleWord, length) || + buffer[length]!=0 + ) { + log_err_status(errorCode, "ucasemap_toTitle(default iterator)=%ld failed - %s\n", (long)length, u_errorName(errorCode)); + } + if (U_SUCCESS(errorCode)) { + iter=ucasemap_getBreakIterator(csm); + if(iter==NULL) { + log_err("ucasemap_getBreakIterator() returns NULL after titlecasing\n"); + } + } + + /* Try U_TITLECASE_NO_BREAK_ADJUSTMENT. */ + ucasemap_setOptions(csm, U_TITLECASE_NO_BREAK_ADJUSTMENT, &errorCode); + if(U_FAILURE(errorCode)) { + log_err_status(errorCode, "error: ucasemap_setOptions(U_TITLECASE_NO_BREAK_ADJUSTMENT) failed - %s\n", u_errorName(errorCode)); + return; + } + + length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode); + if( U_FAILURE(errorCode) || + length!=UPRV_LENGTHOF(titleWordNoAdjust) || + 0!=u_memcmp(buffer, titleWordNoAdjust, length) || + buffer[length]!=0 + ) { + log_err("ucasemap_toTitle(default iterator, no break adjustment)=%ld failed - %s\n", (long)length, u_errorName(errorCode)); + } + + /* Set a sentence break iterator. */ + errorCode=U_ZERO_ERROR; + sentenceIter=ubrk_open(UBRK_SENTENCE, "", NULL, 0, &errorCode); + if(U_FAILURE(errorCode)) { + log_err("error: ubrk_open(UBRK_SENTENCE) failed - %s\n", u_errorName(errorCode)); + ucasemap_close(csm); + return; + } + ucasemap_setBreakIterator(csm, sentenceIter, &errorCode); + if(U_FAILURE(errorCode)) { + log_err("error: ucasemap_setBreakIterator(sentence iterator) failed - %s\n", u_errorName(errorCode)); + ubrk_close(sentenceIter); + ucasemap_close(csm); + return; + } + iter=ucasemap_getBreakIterator(csm); + if(iter!=sentenceIter) { + log_err("ucasemap_getBreakIterator() returns %p!=%p after setting the iterator\n", iter, sentenceIter); + } + + ucasemap_setOptions(csm, U_TITLECASE_NO_LOWERCASE, &errorCode); + if(U_FAILURE(errorCode)) { + log_err("error: ucasemap_setOptions(U_TITLECASE_NO_LOWERCASE) failed - %s\n", u_errorName(errorCode)); + return; + } + + /* Use the sentence break iterator with the option. Preflight first. */ + length=ucasemap_toTitle(csm, NULL, 0, beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode); + if( errorCode!=U_BUFFER_OVERFLOW_ERROR || + length!=UPRV_LENGTHOF(titleSentNoLower) + ) { + log_err("ucasemap_toTitle(preflight sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode)); + } + + errorCode=U_ZERO_ERROR; + buffer[0]=0; + length=ucasemap_toTitle(csm, buffer, UPRV_LENGTHOF(buffer), beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode); + if( U_FAILURE(errorCode) || + length!=UPRV_LENGTHOF(titleSentNoLower) || + 0!=u_memcmp(buffer, titleSentNoLower, length) || + buffer[length]!=0 + ) { + log_err("ucasemap_toTitle(sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode)); + } + + /* UTF-8 C API coverage. More thorough test via C++ intltest's StringCaseTest::TestCasing(). */ + { + char utf8BeforeTitle[64], utf8TitleSentNoLower[64], utf8[64]; + int32_t utf8BeforeTitleLength, utf8TitleSentNoLowerLength; + + errorCode=U_ZERO_ERROR; + u_strToUTF8(utf8BeforeTitle, (int32_t)sizeof(utf8BeforeTitle), &utf8BeforeTitleLength, beforeTitle, UPRV_LENGTHOF(beforeTitle), &errorCode); + u_strToUTF8(utf8TitleSentNoLower, (int32_t)sizeof(utf8TitleSentNoLower), &utf8TitleSentNoLowerLength, titleSentNoLower, UPRV_LENGTHOF(titleSentNoLower), &errorCode); + + length=ucasemap_utf8ToTitle(csm, utf8, (int32_t)sizeof(utf8), utf8BeforeTitle, utf8BeforeTitleLength, &errorCode); + if( U_FAILURE(errorCode) || + length!=utf8TitleSentNoLowerLength || + 0!=uprv_memcmp(utf8, utf8TitleSentNoLower, length) || + utf8[length]!=0 + ) { + log_err("ucasemap_utf8ToTitle(sentence break iterator, no lowercasing)=%ld failed - %s\n", (long)length, u_errorName(errorCode)); + } + } + ucasemap_close(csm); } +#endif + +/* Test case for internal API u_caseInsensitivePrefixMatch */ +static void +TestUCaseInsensitivePrefixMatch(void) { + struct { + const char *s1; + const char *s2; + int32_t r1; + int32_t r2; + } testCases[] = { + {"ABC", "ab", 2, 2}, + {"ABCD", "abcx", 3, 3}, + {"ABC", "xyz", 0, 0}, + /* U+00DF LATIN SMALL LETTER SHARP S */ + {"A\\u00dfBC", "Ass", 2, 3}, + {"Fust", "Fu\\u00dfball", 2, 2}, + {"\\u00dfsA", "s\\u00dfB", 2, 2}, + {"\\u00dfs", "s\\u00df", 2, 2}, + /* U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE */ + {"XYZ\\u0130i\\u0307xxx", "xyzi\\u0307\\u0130yyy", 6, 6}, + {0, 0, 0, 0} + }; + int32_t i; + + for (i = 0; testCases[i].s1 != 0; i++) { + UErrorCode sts = U_ZERO_ERROR; + UChar u1[64], u2[64]; + int32_t matchLen1, matchLen2; + + u_unescape(testCases[i].s1, u1, 64); + u_unescape(testCases[i].s2, u2, 64); + + u_caseInsensitivePrefixMatch(u1, -1, u2, -1, 0, &matchLen1, &matchLen2, &sts); + if (U_FAILURE(sts)) { + log_err("error: %s, s1=%s, s2=%s", u_errorName(sts), testCases[i].s1, testCases[i].s2); + } else if (matchLen1 != testCases[i].r1 || matchLen2 != testCases[i].r2) { + log_err("s1=%s, s2=%2 / match len1=%d, len2=%d / expected len1=%d, len2=%d", + testCases[i].s1, testCases[i].s2, + matchLen1, matchLen2, + testCases[i].r1, testCases[i].r2); + } + } +} + void addCaseTest(TestNode** root); void addCaseTest(TestNode** root) { /* cstrcase.c functions, declared in cucdtst.h */ addTest(root, &TestCaseLower, "tsutil/cstrcase/TestCaseLower"); addTest(root, &TestCaseUpper, "tsutil/cstrcase/TestCaseUpper"); -#if !UCONFIG_NO_BREAK_ITERATION +#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO addTest(root, &TestCaseTitle, "tsutil/cstrcase/TestCaseTitle"); + addTest(root, &TestCaseDutchTitle, "tsutil/cstrcase/TestCaseDutchTitle"); #endif addTest(root, &TestCaseFolding, "tsutil/cstrcase/TestCaseFolding"); addTest(root, &TestCaseCompare, "tsutil/cstrcase/TestCaseCompare"); addTest(root, &TestUCaseMap, "tsutil/cstrcase/TestUCaseMap"); +#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO + addTest(root, &TestUCaseMapToTitle, "tsutil/cstrcase/TestUCaseMapToTitle"); +#endif + addTest(root, &TestUCaseInsensitivePrefixMatch, "tsutil/cstrcase/TestUCaseInsensitivePrefixMatch"); }