X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/73c04bcfe1096173b00431f0cdc742894b15eef0..151279e3792e85d0417b499c229886b3af724f55:/icuSources/test/intltest/rbbiapts.cpp diff --git a/icuSources/test/intltest/rbbiapts.cpp b/icuSources/test/intltest/rbbiapts.cpp index 54f4db99..b61f060c 100644 --- a/icuSources/test/intltest/rbbiapts.cpp +++ b/icuSources/test/intltest/rbbiapts.cpp @@ -1,13 +1,11 @@ /******************************************************************** - * COPYRIGHT: - * Copyright (c) 1999-2006, International Business Machines Corporation and - * others. All Rights Reserved. + * Copyright (c) 1999-2012, International Business Machines + * Corporation and others. All Rights Reserved. + ******************************************************************** + * Date Name Description + * 12/14/99 Madhu Creation. + * 01/12/2000 Madhu updated for changed API ********************************************************************/ -/************************************************************************ -* Date Name Description -* 12/14/99 Madhu Creation. -* 01/12/2000 Madhu updated for changed API -************************************************************************/ #include "unicode/utypes.h" @@ -21,8 +19,10 @@ #include "rbbidata.h" #include "cstring.h" #include "ubrkimpl.h" +#include "unicode/locid.h" #include "unicode/ustring.h" #include "unicode/utext.h" +#include "cmemory.h" /** * API Test the RuleBasedBreakIterator class @@ -30,10 +30,10 @@ #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ -errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} +dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} -#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ -errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}} +#define TEST_ASSERT(expr) {if ((expr) == FALSE) { \ + errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};} void RBBIAPITest::TestCloneEquals() { @@ -44,7 +44,7 @@ void RBBIAPITest::TestCloneEquals() RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); if(U_FAILURE(status)){ - errln((UnicodeString)"FAIL : in construction"); + errcheckln(status, "Fail : in construction - %s", u_errorName(status)); return; } @@ -70,7 +70,7 @@ void RBBIAPITest::TestCloneEquals() errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed."); - // Quick test of RulesBasedBreakIterator assignment - + // Quick test of RulesBasedBreakIterator assignment - // Check that // two different iterators are != // they are == after assignment @@ -79,7 +79,7 @@ void RBBIAPITest::TestCloneEquals() logln("Testing assignment"); RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status); if(U_FAILURE(status)){ - errln((UnicodeString)"FAIL : in construction"); + errcheckln(status, "Fail : in construction - %s", u_errorName(status)); return; } @@ -124,16 +124,16 @@ void RBBIAPITest::TestCloneEquals() RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone(); RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone(); - if(*bi1clone != *bi1 || *bi1clone != *biequal || + if(*bi1clone != *bi1 || *bi1clone != *biequal || *bi1clone == *bi3 || *bi1clone == *bi2) errln((UnicodeString)"ERROR:1 RBBI's clone() method failed"); - if(*bi2clone == *bi1 || *bi2clone == *biequal || + if(*bi2clone == *bi1 || *bi2clone == *biequal || *bi2clone == *bi3 || *bi2clone != *bi2) errln((UnicodeString)"ERROR:2 RBBI's clone() method failed"); if(bi1->getText() != bi1clone->getText() || - bi2clone->getText() != bi2->getText() || + bi2clone->getText() != bi2->getText() || *bi2clone == *bi1clone ) errln((UnicodeString)"ERROR: RBBI's clone() method failed"); @@ -151,16 +151,19 @@ void RBBIAPITest::TestBoilerPlate() BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status); BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status); if (U_FAILURE(status)) { - errln("Creation of break iterator failed %s", u_errorName(status)); + errcheckln(status, "Creation of break iterator failed %s", u_errorName(status)); return; } if(*a!=*b){ errln("Failed: boilerplate method operator!= does not return correct results"); } - BreakIterator* c = BreakIterator::createWordInstance(Locale("ja"),status); - if(a && c){ - if(*c==*a){ - errln("Failed: boilerplate method opertator== does not return correct results"); + // Japanese word break iterators are identical to root with + // a dictionary-based break iterator + BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status); + BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status); + if(c && d){ + if(*c!=*d){ + errln("Failed: boilerplate method operator== does not return correct results"); } }else{ errln("creation of break iterator failed"); @@ -168,6 +171,7 @@ void RBBIAPITest::TestBoilerPlate() delete a; delete b; delete c; + delete d; } void RBBIAPITest::TestgetRules() @@ -177,7 +181,7 @@ void RBBIAPITest::TestgetRules() RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); if(U_FAILURE(status)){ - errln((UnicodeString)"FAIL: in construction"); + errcheckln(status, "FAIL: in construction - %s", u_errorName(status)); delete bi1; delete bi2; return; @@ -208,7 +212,7 @@ void RBBIAPITest::TestHashCode() RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); if(U_FAILURE(status)){ - errln((UnicodeString)"FAIL : in construction"); + errcheckln(status, "Fail : in construction - %s", u_errorName(status)); delete bi1; delete bi2; delete bi3; @@ -234,7 +238,7 @@ void RBBIAPITest::TestHashCode() errln((UnicodeString)"ERROR: different objects have same hashcodes"); delete bi1clone; - delete bi2clone; + delete bi2clone; delete bi1; delete bi2; delete bi3; @@ -243,13 +247,13 @@ void RBBIAPITest::TestHashCode() void RBBIAPITest::TestGetSetAdoptText() { logln((UnicodeString)"Testing getText setText "); - UErrorCode status=U_ZERO_ERROR; + IcuTestErrorCode status(*this, "TestGetSetAdoptText"); UnicodeString str1="first string."; UnicodeString str2="Second string."; - RuleBasedBreakIterator* charIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); - RuleBasedBreakIterator* wordIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); - if(U_FAILURE(status)){ - errln((UnicodeString)"FAIL : in construction"); + LocalPointer charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status)); + LocalPointer wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status)); + if(status.isFailure()){ + errcheckln(status, "Fail : in construction - %s", status.errorName()); return; } @@ -258,7 +262,7 @@ void RBBIAPITest::TestGetSetAdoptText() CharacterIterator* text1Clone = text1->clone(); CharacterIterator* text2= new StringCharacterIterator(str2); CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str" - + wordIter1->setText(str1); CharacterIterator *tci = &wordIter1->getText(); UnicodeString tstr; @@ -284,7 +288,7 @@ void RBBIAPITest::TestGetSetAdoptText() TEST_ASSERT(tstr == str1); - RuleBasedBreakIterator* rb=(RuleBasedBreakIterator*)wordIter1->clone(); + LocalPointer rb((RuleBasedBreakIterator*)wordIter1->clone()); rb->adoptText(text1); if(rb->getText() != *text1) errln((UnicodeString)"ERROR:1 error in adoptText "); @@ -313,9 +317,9 @@ void RBBIAPITest::TestGetSetAdoptText() const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */ // 012345678901 - status = U_ZERO_ERROR; - UText *ut = utext_openUTF8(NULL, s1, -1, &status); - wordIter1->setText(ut, status); + status.reset(); + LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status)); + wordIter1->setText(ut.getAlias(), status); TEST_ASSERT_SUCCESS(status); int32_t pos; @@ -330,10 +334,10 @@ void RBBIAPITest::TestGetSetAdoptText() pos = wordIter1->next(); TEST_ASSERT(pos==UBRK_DONE); - status = U_ZERO_ERROR; - UText *ut2 = utext_openUTF8(NULL, s2, -1, &status); + status.reset(); + LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status)); TEST_ASSERT_SUCCESS(status); - wordIter1->setText(ut2, status); + wordIter1->setText(ut2.getAlias(), status); TEST_ASSERT_SUCCESS(status); pos = wordIter1->first(); @@ -354,23 +358,15 @@ void RBBIAPITest::TestGetSetAdoptText() pos = wordIter1->previous(); TEST_ASSERT(pos==UBRK_DONE); - status = U_ZERO_ERROR; + status.reset(); UnicodeString sEmpty; - UText *gut2 = utext_openUnicodeString(NULL, &sEmpty, &status); - wordIter1->getUText(gut2, status); + LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status)); + wordIter1->getUText(gut2.getAlias(), status); TEST_ASSERT_SUCCESS(status); - utext_close(gut2); - - utext_close(ut); - utext_close(ut2); - - delete wordIter1; - delete charIter1; - delete rb; + status.reset(); +} - } - void RBBIAPITest::TestIteration() { // This test just verifies that the API is present. @@ -379,42 +375,42 @@ void RBBIAPITest::TestIteration() UErrorCode status=U_ZERO_ERROR; RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); if (U_FAILURE(status) || bi == NULL) { - errln("Failure creating character break iterator. Status = %s", u_errorName(status)); + errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); } delete bi; status=U_ZERO_ERROR; bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); if (U_FAILURE(status) || bi == NULL) { - errln("Failure creating Word break iterator. Status = %s", u_errorName(status)); + errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status)); } delete bi; status=U_ZERO_ERROR; bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status); if (U_FAILURE(status) || bi == NULL) { - errln("Failure creating Line break iterator. Status = %s", u_errorName(status)); + errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status)); } delete bi; status=U_ZERO_ERROR; bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status); if (U_FAILURE(status) || bi == NULL) { - errln("Failure creating Sentence break iterator. Status = %s", u_errorName(status)); + errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status)); } delete bi; status=U_ZERO_ERROR; bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status); if (U_FAILURE(status) || bi == NULL) { - errln("Failure creating Title break iterator. Status = %s", u_errorName(status)); + errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status)); } delete bi; status=U_ZERO_ERROR; bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); if (U_FAILURE(status) || bi == NULL) { - errln("Failure creating character break iterator. Status = %s", u_errorName(status)); + errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); return; // Skip the rest of these tests. } @@ -600,10 +596,10 @@ void RBBIAPITest::TestBuilder() { int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; UErrorCode status=U_ZERO_ERROR; UParseError parseError; - + RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); if(U_FAILURE(status)) { - errln("FAIL : in construction"); + dataerrln("Fail : in construction - %s", u_errorName(status)); } else { bi->setText(testString1); doBoundaryTest(*bi, testString1, bounds1); @@ -627,10 +623,10 @@ void RBBIAPITest::TestQuoteGrouping() { int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; UErrorCode status=U_ZERO_ERROR; UParseError parseError; - + RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); if(U_FAILURE(status)) { - errln("FAIL : in construction"); + dataerrln("Fail : in construction - %s", u_errorName(status)); } else { bi->setText(testString1); doBoundaryTest(*bi, testString1, bounds1); @@ -643,28 +639,28 @@ void RBBIAPITest::TestQuoteGrouping() { // Test word break rule status constants. // void RBBIAPITest::TestRuleStatus() { - UChar str[30]; - u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094", - // 012345678901234567 8 9 0 1 2 3 4 5 6 - // Ideographic Katakana Hiragana + UChar str[30]; + //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing + // changed UBRK_WORD_KANA to UBRK_WORD_IDEO + u_unescape("plain word 123.45 \\u30a1\\u30a2 ", + // 012345678901234567 8 9 0 + // Katakana str, 30); UnicodeString testString1(str); - int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26}; + int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21}; int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE, - UBRK_WORD_IDEO, UBRK_WORD_IDEO, UBRK_WORD_NONE, - UBRK_WORD_KANA, UBRK_WORD_NONE, UBRK_WORD_KANA, UBRK_WORD_KANA}; + UBRK_WORD_IDEO, UBRK_WORD_NONE}; int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT, - UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT, - UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT}; + UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT}; UErrorCode status=U_ZERO_ERROR; - + RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); if(U_FAILURE(status)) { - errln("FAIL : in construction"); + errcheckln(status, "Fail : in construction - %s", u_errorName(status)); } else { bi->setText(testString1); // First test that the breaks are in the right spots. @@ -683,7 +679,7 @@ void RBBIAPITest::TestRuleStatus() { errln("FAIL: incorrect tag value %d at position %d", tag, pos); break; } - + // Check that we get the same tag values from getRuleStatusVec() int32_t vec[10]; int t = bi->getRuleStatusVec(vec, 10, status); @@ -702,7 +698,7 @@ void RBBIAPITest::TestRuleStatus() { bi = (RuleBasedBreakIterator *) BreakIterator::createLineInstance(Locale::getEnglish(), status); if(U_FAILURE(status)) { - errln("failed to create word break iterator."); + errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status)); } else { int32_t i = 0; int32_t pos, tag; @@ -732,7 +728,7 @@ void RBBIAPITest::TestRuleStatus() { } if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT || UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT || - UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT ) { + (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) { errln("UBRK_LINE_* constants from header are inconsistent."); } } @@ -746,12 +742,12 @@ void RBBIAPITest::TestRuleStatus() { // Test the vector form of break rule status. // void RBBIAPITest::TestRuleStatusVec() { - UnicodeString rulesString = "[A-N]{100}; \n" + UnicodeString rulesString( "[A-N]{100}; \n" "[a-w]{200}; \n" "[\\p{L}]{300}; \n" "[\\p{N}]{400}; \n" "[0-5]{500}; \n" - "!.*;\n"; + "!.*;\n", -1, US_INV); UnicodeString testString1 = "Aapz5?"; int32_t statusVals[10]; int32_t numStatuses; @@ -759,10 +755,11 @@ void RBBIAPITest::TestRuleStatusVec() { UErrorCode status=U_ZERO_ERROR; UParseError parseError; - + RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status); - TEST_ASSERT_SUCCESS(status); - if (U_SUCCESS(status)) { + if (U_FAILURE(status)) { + dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); + } else { bi->setText(testString1); // A @@ -818,7 +815,7 @@ void RBBIAPITest::TestRuleStatusVec() { TEST_ASSERT(statusVals[0] == 0); // - // Check buffer overflow error handling. Char == A + // Check buffer overflow error handling. Char == A // bi->first(); pos = bi->next(); @@ -862,10 +859,10 @@ void RBBIAPITest::TestBug2190() { int32_t bounds1[] = {0, 4, 8}; UErrorCode status=U_ZERO_ERROR; UParseError parseError; - + RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); if(U_FAILURE(status)) { - errln("FAIL : in construction"); + dataerrln("Fail : in construction - %s", u_errorName(status)); } else { bi->setText(testString1); doBoundaryTest(*bi, testString1, bounds1); @@ -878,19 +875,31 @@ void RBBIAPITest::TestRegistration() { #if !UCONFIG_NO_SERVICE UErrorCode status = U_ZERO_ERROR; BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status); - // ok to not delete these if we exit because of error? BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status); BreakIterator* root_word = BreakIterator::createWordInstance("", status); BreakIterator* root_char = BreakIterator::createCharacterInstance("", status); + if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) { + dataerrln("Error creating instances of break interactors - %s", u_errorName(status)); + + delete ja_word; + delete ja_char; + delete root_word; + delete root_char; + + return; + } + URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status); { +#if 0 // With a dictionary based word breaking, ja_word is identical to root. if (ja_word && *ja_word == *root_word) { errln("japan not different from root"); } +#endif } - + { BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status); UBool fail = TRUE; @@ -902,7 +911,7 @@ void RBBIAPITest::TestRegistration() { errln("bad result for xx_XX/word"); } } - + { BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status); UBool fail = TRUE; @@ -914,7 +923,7 @@ void RBBIAPITest::TestRegistration() { errln("bad result for ja_JP/char"); } } - + { BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status); UBool fail = TRUE; @@ -926,7 +935,7 @@ void RBBIAPITest::TestRegistration() { errln("bad result for xx_XX/char"); } } - + { StringEnumeration* avail = BreakIterator::getAvailableLocales(); UBool found = FALSE; @@ -942,14 +951,14 @@ void RBBIAPITest::TestRegistration() { errln("did not find test locale"); } } - + { UBool unreg = BreakIterator::unregister(key, status); if (!unreg) { errln("unable to unregister"); } } - + { BreakIterator* result = BreakIterator::createWordInstance("en_US", status); BreakIterator* root = BreakIterator::createWordInstance("", status); @@ -963,7 +972,7 @@ void RBBIAPITest::TestRegistration() { errln("did not get root break"); } } - + { StringEnumeration* avail = BreakIterator::getAvailableLocales(); UBool found = FALSE; @@ -979,7 +988,7 @@ void RBBIAPITest::TestRegistration() { errln("found test locale"); } } - + { int32_t count; UBool foundLocale = FALSE; @@ -994,8 +1003,8 @@ void RBBIAPITest::TestRegistration() { errln("BreakIterator::getAvailableLocales(&count), failed to find EN."); } } - - + + // ja_word was adopted by factory delete ja_char; delete root_word; @@ -1008,18 +1017,18 @@ void RBBIAPITest::RoundtripRule(const char *dataFile) { UParseError parseError; parseError.line = 0; parseError.offset = 0; - UDataMemory *data = udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status); + LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status)); uint32_t length; const UChar *builtSource; const uint8_t *rbbiRules; const uint8_t *builtRules; if (U_FAILURE(status)) { - errln("Can't open \"%s\"", dataFile); + errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status)); return; } - builtRules = (const uint8_t *)udata_getMemory(data); + builtRules = (const uint8_t *)udata_getMemory(data.getAlias()); builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource); RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status); if (U_FAILURE(status)) { @@ -1034,7 +1043,6 @@ void RBBIAPITest::RoundtripRule(const char *dataFile) { return; } delete brkItr; - udata_close(data); } void RBBIAPITest::TestRoundtripRules() { @@ -1044,11 +1052,132 @@ void RBBIAPITest::TestRoundtripRules() { RoundtripRule("line"); RoundtripRule("char"); if (!quick) { - RoundtripRule("word_ja"); RoundtripRule("word_POSIX"); } } +// Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader* +// (these are protected so we access them via a local class RBBIWithProtectedFunctions). +// This is just a sanity check, not a thorough test (e.g. we don't check that the +// first delete actually frees rulesCopy). +void RBBIAPITest::TestCreateFromRBBIData() { + // Get some handy RBBIData + const char *brkName = "word"; // or "sent", "line", "char", etc. + UErrorCode status = U_ZERO_ERROR; + LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status)); + if ( U_SUCCESS(status) ) { + const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data.getAlias()); + uint32_t length = builtRules->fLength; + RBBIWithProtectedFunctions * brkItr; + + // Try the memory-adopting constructor, need to copy the data first + RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length); + if ( rulesCopy ) { + uprv_memcpy( rulesCopy, builtRules, length ); + + brkItr = new RBBIWithProtectedFunctions(rulesCopy, status); + if ( U_SUCCESS(status) ) { + delete brkItr; // this should free rulesCopy + } else { + errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) ); + status = U_ZERO_ERROR;// reset for the next test + uprv_free( rulesCopy ); + } + } + + // Now try the non-adopting constructor + brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status); + if ( U_SUCCESS(status) ) { + delete brkItr; // this should NOT attempt to free builtRules + if (builtRules->fLength != length) { // sanity check + errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" ); + } + } else { + errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) ); + } + } + + // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...) + // + status = U_ZERO_ERROR; + RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); + if (rb == NULL || U_FAILURE(status)) { + dataerrln("Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s", u_errorName(status)); + } else { + uint32_t length; + const uint8_t *rules = rb->getBinaryRules(length); + RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(*rb == *rb2); + UnicodeString words = "one two three "; + rb2->setText(words); + int wordCounter = 0; + while (rb2->next() != UBRK_DONE) { + wordCounter++; + } + TEST_ASSERT(wordCounter == 6); + + status = U_ZERO_ERROR; + RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1, status); + TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); + + delete rb; + delete rb2; + delete rb3; + } +} + + +void RBBIAPITest::TestRefreshInputText() { + /* + * RefreshInput changes out the input of a Break Iterator without + * changing anything else in the iterator's state. Used with Java JNI, + * when Java moves the underlying string storage. This test + * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence. + * The right set of boundaries should still be found. + */ + UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */ + UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0}; + UErrorCode status = U_ZERO_ERROR; + UText ut1 = UTEXT_INITIALIZER; + UText ut2 = UTEXT_INITIALIZER; + RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status); + TEST_ASSERT_SUCCESS(status); + + utext_openUChars(&ut1, testStr, -1, &status); + TEST_ASSERT_SUCCESS(status); + + if (U_SUCCESS(status)) { + bi->setText(&ut1, status); + TEST_ASSERT_SUCCESS(status); + + /* Line boundaries will occur before each letter in the original string */ + TEST_ASSERT(1 == bi->next()); + TEST_ASSERT(3 == bi->next()); + + /* Move the string, kill the original string. */ + u_strcpy(movedStr, testStr); + u_memset(testStr, 0x20, u_strlen(testStr)); + utext_openUChars(&ut2, movedStr, -1, &status); + TEST_ASSERT_SUCCESS(status); + RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(bi == returnedBI); + + /* Find the following matches, now working in the moved string. */ + TEST_ASSERT(5 == bi->next()); + TEST_ASSERT(7 == bi->next()); + TEST_ASSERT(8 == bi->next()); + TEST_ASSERT(UBRK_DONE == bi->next()); + + utext_close(&ut1); + utext_close(&ut2); + } + delete bi; + +} + + //--------------------------------------------- // runIndexedTest //--------------------------------------------- @@ -1058,19 +1187,29 @@ void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API "); switch (index) { // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break; +#if !UCONFIG_NO_FILE_IO case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break; case 1: name = "TestgetRules"; if (exec) TestgetRules(); break; case 2: name = "TestHashCode"; if (exec) TestHashCode(); break; case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break; case 4: name = "TestIteration"; if (exec) TestIteration(); break; +#else + case 0: case 1: case 2: case 3: case 4: name = "skip"; break; +#endif case 5: name = "TestBuilder"; if (exec) TestBuilder(); break; case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break; - case 7: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break; - case 8: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break; - case 9: name = "TestBug2190"; if (exec) TestBug2190(); break; - case 10: name = "TestRegistration"; if (exec) TestRegistration(); break; - case 11: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; + case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break; + case 8: name = "TestBug2190"; if (exec) TestBug2190(); break; +#if !UCONFIG_NO_FILE_IO + case 9: name = "TestRegistration"; if (exec) TestRegistration(); break; + case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; + case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break; case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break; + case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break; +#else + case 9: case 10: case 11: case 12: case 13: name = "skip"; break; +#endif + case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break; default: name = ""; break; // needed to end loop } @@ -1106,7 +1245,7 @@ void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotof if(gotoffset != expectedOffset) errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset); if(start <= gotoffset){ - testString.extractBetween(start, gotoffset, selected); + testString.extractBetween(start, gotoffset, selected); } else{ testString.extractBetween(gotoffset, start, selected); @@ -1117,4 +1256,18 @@ void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotof logln(prettify("****selected \"" + selected + "\"")); } +//--------------------------------------------- +//RBBIWithProtectedFunctions class functions +//--------------------------------------------- + +RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status) + : RuleBasedBreakIterator(data, status) +{ +} + +RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status) + : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status) +{ +} + #endif /* #if !UCONFIG_NO_BREAK_ITERATION */