X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..ba6d6ed23dec08b1cd5700a128c0752491c10ac9:/icuSources/test/intltest/rbbiapts.cpp diff --git a/icuSources/test/intltest/rbbiapts.cpp b/icuSources/test/intltest/rbbiapts.cpp index 2cd7d877..3704311b 100644 --- a/icuSources/test/intltest/rbbiapts.cpp +++ b/icuSources/test/intltest/rbbiapts.cpp @@ -1,13 +1,11 @@ /******************************************************************** - * COPYRIGHT: - * Copyright (c) 1999-2003, International Business Machines Corporation and - * others. All Rights Reserved. + * Copyright (c) 1999-2011, International Business Machines + * Corporation and others. All Rights Reserved. + ******************************************************************** + * Date Name Description + * 12/14/99 Madhu Creation. + * 01/12/2000 Madhu updated for changed API ********************************************************************/ -/************************************************************************ -* Date Name Description -* 12/14/99 Madhu Creation. -* 01/12/2000 Madhu updated for changed API -************************************************************************/ #include "unicode/utypes.h" @@ -20,12 +18,22 @@ #include "rbbiapts.h" #include "rbbidata.h" #include "cstring.h" +#include "ubrkimpl.h" +#include "unicode/locid.h" +#include "unicode/ustring.h" +#include "unicode/utext.h" +#include "cmemory.h" /** * API Test the RuleBasedBreakIterator class */ +#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ +dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} + +#define TEST_ASSERT(expr) {if ((expr) == FALSE) { \ + errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};} void RBBIAPITest::TestCloneEquals() { @@ -36,7 +44,7 @@ void RBBIAPITest::TestCloneEquals() RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); if(U_FAILURE(status)){ - errln((UnicodeString)"FAIL : in construction"); + errcheckln(status, "Fail : in construction - %s", u_errorName(status)); return; } @@ -62,16 +70,16 @@ void RBBIAPITest::TestCloneEquals() errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed."); - // Quick test of RulesBasedBreakIterator assignment - + // Quick test of RulesBasedBreakIterator assignment - // Check that // two different iterators are != // they are == after assignment // source and dest iterator produce the same next() after assignment. // deleting one doesn't disable the other. logln("Testing assignment"); - RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status); + RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status); if(U_FAILURE(status)){ - errln((UnicodeString)"FAIL : in construction"); + errcheckln(status, "Fail : in construction - %s", u_errorName(status)); return; } @@ -116,16 +124,16 @@ void RBBIAPITest::TestCloneEquals() RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone(); RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone(); - if(*bi1clone != *bi1 || *bi1clone != *biequal || + if(*bi1clone != *bi1 || *bi1clone != *biequal || *bi1clone == *bi3 || *bi1clone == *bi2) errln((UnicodeString)"ERROR:1 RBBI's clone() method failed"); - if(*bi2clone == *bi1 || *bi2clone == *biequal || + if(*bi2clone == *bi1 || *bi2clone == *biequal || *bi2clone == *bi3 || *bi2clone != *bi2) errln((UnicodeString)"ERROR:2 RBBI's clone() method failed"); if(bi1->getText() != bi1clone->getText() || - bi2clone->getText() != bi2->getText() || + bi2clone->getText() != bi2->getText() || *bi2clone == *bi1clone ) errln((UnicodeString)"ERROR: RBBI's clone() method failed"); @@ -140,14 +148,22 @@ void RBBIAPITest::TestCloneEquals() void RBBIAPITest::TestBoilerPlate() { UErrorCode status = U_ZERO_ERROR; - BreakIterator* a = BreakIterator::createLineInstance(Locale("hi"), status); - BreakIterator* b = BreakIterator::createLineInstance(Locale("hi_IN"),status); + BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status); + BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status); + if (U_FAILURE(status)) { + errcheckln(status, "Creation of break iterator failed %s", u_errorName(status)); + return; + } if(*a!=*b){ errln("Failed: boilerplate method operator!= does not return correct results"); } - BreakIterator* c = BreakIterator::createLineInstance(Locale("th"),status); - if(*c==*a){ - errln("Failed: boilerplate method opertator== does not return correct results"); + BreakIterator* c = BreakIterator::createWordInstance(Locale("ja"),status); + if(a && c){ + if(*c==*a){ + errln("Failed: boilerplate method opertator== does not return correct results"); + } + }else{ + errln("creation of break iterator failed"); } delete a; delete b; @@ -161,7 +177,7 @@ void RBBIAPITest::TestgetRules() RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); if(U_FAILURE(status)){ - errln((UnicodeString)"FAIL: in construction"); + errcheckln(status, "FAIL: in construction - %s", u_errorName(status)); delete bi1; delete bi2; return; @@ -192,7 +208,7 @@ void RBBIAPITest::TestHashCode() RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); if(U_FAILURE(status)){ - errln((UnicodeString)"FAIL : in construction"); + errcheckln(status, "Fail : in construction - %s", u_errorName(status)); delete bi1; delete bi2; delete bi3; @@ -218,7 +234,7 @@ void RBBIAPITest::TestHashCode() errln((UnicodeString)"ERROR: different objects have same hashcodes"); delete bi1clone; - delete bi2clone; + delete bi2clone; delete bi1; delete bi2; delete bi3; @@ -227,13 +243,13 @@ void RBBIAPITest::TestHashCode() void RBBIAPITest::TestGetSetAdoptText() { logln((UnicodeString)"Testing getText setText "); - UErrorCode status=U_ZERO_ERROR; + IcuTestErrorCode status(*this, "TestGetSetAdoptText"); UnicodeString str1="first string."; UnicodeString str2="Second string."; - RuleBasedBreakIterator* charIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); - RuleBasedBreakIterator* wordIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); - if(U_FAILURE(status)){ - errln((UnicodeString)"FAIL : in construction"); + LocalPointer charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status)); + LocalPointer wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status)); + if(status.isFailure()){ + errcheckln(status, "Fail : in construction - %s", status.errorName()); return; } @@ -242,10 +258,12 @@ void RBBIAPITest::TestGetSetAdoptText() CharacterIterator* text1Clone = text1->clone(); CharacterIterator* text2= new StringCharacterIterator(str2); CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str" - + wordIter1->setText(str1); - if(wordIter1->getText() != *text1) - errln((UnicodeString)"ERROR:1 error in setText or getText "); + CharacterIterator *tci = &wordIter1->getText(); + UnicodeString tstr; + tci->getText(tstr); + TEST_ASSERT(tstr == str1); if(wordIter1->current() != 0) errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); @@ -257,11 +275,16 @@ void RBBIAPITest::TestGetSetAdoptText() charIter1->adoptText(text1Clone); - if( wordIter1->getText() == charIter1->getText() || - wordIter1->getText() != *text2 || charIter1->getText() != *text1 ) - errln((UnicodeString)"ERROR:2 error is getText or setText()"); + TEST_ASSERT(wordIter1->getText() != charIter1->getText()); + tci = &wordIter1->getText(); + tci->getText(tstr); + TEST_ASSERT(tstr == str2); + tci = &charIter1->getText(); + tci->getText(tstr); + TEST_ASSERT(tstr == str1); - RuleBasedBreakIterator* rb=(RuleBasedBreakIterator*)wordIter1->clone(); + + LocalPointer rb((RuleBasedBreakIterator*)wordIter1->clone()); rb->adoptText(text1); if(rb->getText() != *text1) errln((UnicodeString)"ERROR:1 error in adoptText "); @@ -270,21 +293,76 @@ void RBBIAPITest::TestGetSetAdoptText() errln((UnicodeString)"ERROR:2 error in adoptText "); // Adopt where iterator range is less than the entire orignal source string. + // (With the change of the break engine to working with UText internally, + // CharacterIterators starting at positions other than zero are not supported) rb->adoptText(text3); - if(rb->preceding(2) != 3) { - errln((UnicodeString)"ERROR:3 error in adoptText "); - } - if(rb->following(11) != BreakIterator::DONE) { - errln((UnicodeString)"ERROR:4 error in adoptText "); - } - - delete wordIter1; - delete charIter1; - delete rb; + TEST_ASSERT(rb->preceding(2) == 0); + TEST_ASSERT(rb->following(11) == BreakIterator::DONE); + //if(rb->preceding(2) != 3) { + // errln((UnicodeString)"ERROR:3 error in adoptText "); + //} + //if(rb->following(11) != BreakIterator::DONE) { + // errln((UnicodeString)"ERROR:4 error in adoptText "); + //} + + // UText API + // + // Quick test to see if UText is working at all. + // + const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */ + const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */ + // 012345678901 + + status.reset(); + LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status)); + wordIter1->setText(ut.getAlias(), status); + TEST_ASSERT_SUCCESS(status); + + int32_t pos; + pos = wordIter1->first(); + TEST_ASSERT(pos==0); + pos = wordIter1->next(); + TEST_ASSERT(pos==5); + pos = wordIter1->next(); + TEST_ASSERT(pos==6); + pos = wordIter1->next(); + TEST_ASSERT(pos==11); + pos = wordIter1->next(); + TEST_ASSERT(pos==UBRK_DONE); + + status.reset(); + LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status)); + TEST_ASSERT_SUCCESS(status); + wordIter1->setText(ut2.getAlias(), status); + TEST_ASSERT_SUCCESS(status); + + pos = wordIter1->first(); + TEST_ASSERT(pos==0); + pos = wordIter1->next(); + TEST_ASSERT(pos==3); + pos = wordIter1->next(); + TEST_ASSERT(pos==4); + + pos = wordIter1->last(); + TEST_ASSERT(pos==6); + pos = wordIter1->previous(); + TEST_ASSERT(pos==4); + pos = wordIter1->previous(); + TEST_ASSERT(pos==3); + pos = wordIter1->previous(); + TEST_ASSERT(pos==0); + pos = wordIter1->previous(); + TEST_ASSERT(pos==UBRK_DONE); + + status.reset(); + UnicodeString sEmpty; + LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status)); + wordIter1->getUText(gut2.getAlias(), status); + TEST_ASSERT_SUCCESS(status); + status.reset(); +} - } - void RBBIAPITest::TestIteration() { // This test just verifies that the API is present. @@ -293,42 +371,42 @@ void RBBIAPITest::TestIteration() UErrorCode status=U_ZERO_ERROR; RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); if (U_FAILURE(status) || bi == NULL) { - errln("Failure creating character break iterator. Status = %s", u_errorName(status)); + errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); } delete bi; status=U_ZERO_ERROR; bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); if (U_FAILURE(status) || bi == NULL) { - errln("Failure creating Word break iterator. Status = %s", u_errorName(status)); + errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status)); } delete bi; status=U_ZERO_ERROR; bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status); if (U_FAILURE(status) || bi == NULL) { - errln("Failure creating Line break iterator. Status = %s", u_errorName(status)); + errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status)); } delete bi; status=U_ZERO_ERROR; bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status); if (U_FAILURE(status) || bi == NULL) { - errln("Failure creating Sentence break iterator. Status = %s", u_errorName(status)); + errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status)); } delete bi; status=U_ZERO_ERROR; bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status); if (U_FAILURE(status) || bi == NULL) { - errln("Failure creating Title break iterator. Status = %s", u_errorName(status)); + errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status)); } delete bi; status=U_ZERO_ERROR; bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); if (U_FAILURE(status) || bi == NULL) { - errln("Failure creating character break iterator. Status = %s", u_errorName(status)); + errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); return; // Skip the rest of these tests. } @@ -514,10 +592,10 @@ void RBBIAPITest::TestBuilder() { int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; UErrorCode status=U_ZERO_ERROR; UParseError parseError; - + RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); if(U_FAILURE(status)) { - errln("FAIL : in construction"); + dataerrln("Fail : in construction - %s", u_errorName(status)); } else { bi->setText(testString1); doBoundaryTest(*bi, testString1, bounds1); @@ -541,10 +619,10 @@ void RBBIAPITest::TestQuoteGrouping() { int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; UErrorCode status=U_ZERO_ERROR; UParseError parseError; - + RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); if(U_FAILURE(status)) { - errln("FAIL : in construction"); + dataerrln("Fail : in construction - %s", u_errorName(status)); } else { bi->setText(testString1); doBoundaryTest(*bi, testString1, bounds1); @@ -553,16 +631,17 @@ void RBBIAPITest::TestQuoteGrouping() { } // -// TestWordStatus +// TestRuleStatus // Test word break rule status constants. // -void RBBIAPITest::TestWordStatus() { - - - UnicodeString testString1 = // Ideographic Katakana Hiragana - CharsToUnicodeString("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094"); - // 012345678901234567 8 9 0 1 2 3 4 5 6 - int32_t bounds1[] = { 0, 5,6, 10,11, 17,18, 19, 20,21, 23,24, 25, 26}; +void RBBIAPITest::TestRuleStatus() { + UChar str[30]; + u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094", + // 012345678901234567 8 9 0 1 2 3 4 5 6 + // Ideographic Katakana Hiragana + str, 30); + UnicodeString testString1(str); + int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26}; int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE, UBRK_WORD_IDEO, UBRK_WORD_IDEO, UBRK_WORD_NONE, @@ -574,10 +653,10 @@ void RBBIAPITest::TestWordStatus() { UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT}; UErrorCode status=U_ZERO_ERROR; - - RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getDefault(), status); + + RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); if(U_FAILURE(status)) { - errln("FAIL : in construction"); + errcheckln(status, "Fail : in construction - %s", u_errorName(status)); } else { bi->setText(testString1); // First test that the breaks are in the right spots. @@ -596,12 +675,174 @@ void RBBIAPITest::TestWordStatus() { errln("FAIL: incorrect tag value %d at position %d", tag, pos); break; } + + // Check that we get the same tag values from getRuleStatusVec() + int32_t vec[10]; + int t = bi->getRuleStatusVec(vec, 10, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(t==1); + TEST_ASSERT(vec[0] == tag); + } + } + delete bi; + + // Now test line break status. This test mostly is to confirm that the status constants + // are correctly declared in the header. + testString1 = "test line. \n"; + // break type s s h + + bi = (RuleBasedBreakIterator *) + BreakIterator::createLineInstance(Locale::getEnglish(), status); + if(U_FAILURE(status)) { + errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status)); + } else { + int32_t i = 0; + int32_t pos, tag; + UBool success; + + bi->setText(testString1); + pos = bi->current(); + tag = bi->getRuleStatus(); + for (i=0; i<3; i++) { + switch (i) { + case 0: + success = pos==0 && tag==UBRK_LINE_SOFT; break; + case 1: + success = pos==5 && tag==UBRK_LINE_SOFT; break; + case 2: + success = pos==12 && tag==UBRK_LINE_HARD; break; + default: + success = FALSE; break; + } + if (success == FALSE) { + errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d", + i, pos, tag); + break; + } + pos = bi->next(); + tag = bi->getRuleStatus(); + } + if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT || + UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT || + (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) { + errln("UBRK_LINE_* constants from header are inconsistent."); } } delete bi; + } +// +// TestRuleStatusVec +// Test the vector form of break rule status. +// +void RBBIAPITest::TestRuleStatusVec() { + UnicodeString rulesString( "[A-N]{100}; \n" + "[a-w]{200}; \n" + "[\\p{L}]{300}; \n" + "[\\p{N}]{400}; \n" + "[0-5]{500}; \n" + "!.*;\n", -1, US_INV); + UnicodeString testString1 = "Aapz5?"; + int32_t statusVals[10]; + int32_t numStatuses; + int32_t pos; + + UErrorCode status=U_ZERO_ERROR; + UParseError parseError; + + RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status); + if (U_FAILURE(status)) { + dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); + } else { + bi->setText(testString1); + + // A + pos = bi->next(); + TEST_ASSERT(pos==1); + numStatuses = bi->getRuleStatusVec(statusVals, 10, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(numStatuses == 2); + TEST_ASSERT(statusVals[0] == 100); + TEST_ASSERT(statusVals[1] == 300); + + // a + pos = bi->next(); + TEST_ASSERT(pos==2); + numStatuses = bi->getRuleStatusVec(statusVals, 10, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(numStatuses == 2); + TEST_ASSERT(statusVals[0] == 200); + TEST_ASSERT(statusVals[1] == 300); + + // p + pos = bi->next(); + TEST_ASSERT(pos==3); + numStatuses = bi->getRuleStatusVec(statusVals, 10, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(numStatuses == 2); + TEST_ASSERT(statusVals[0] == 200); + TEST_ASSERT(statusVals[1] == 300); + + // z + pos = bi->next(); + TEST_ASSERT(pos==4); + numStatuses = bi->getRuleStatusVec(statusVals, 10, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(numStatuses == 1); + TEST_ASSERT(statusVals[0] == 300); + + // 5 + pos = bi->next(); + TEST_ASSERT(pos==5); + numStatuses = bi->getRuleStatusVec(statusVals, 10, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(numStatuses == 2); + TEST_ASSERT(statusVals[0] == 400); + TEST_ASSERT(statusVals[1] == 500); + + // ? + pos = bi->next(); + TEST_ASSERT(pos==6); + numStatuses = bi->getRuleStatusVec(statusVals, 10, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(numStatuses == 1); + TEST_ASSERT(statusVals[0] == 0); + + // + // Check buffer overflow error handling. Char == A + // + bi->first(); + pos = bi->next(); + TEST_ASSERT(pos==1); + memset(statusVals, -1, sizeof(statusVals)); + numStatuses = bi->getRuleStatusVec(statusVals, 0, status); + TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); + TEST_ASSERT(numStatuses == 2); + TEST_ASSERT(statusVals[0] == -1); + + status = U_ZERO_ERROR; + memset(statusVals, -1, sizeof(statusVals)); + numStatuses = bi->getRuleStatusVec(statusVals, 1, status); + TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); + TEST_ASSERT(numStatuses == 2); + TEST_ASSERT(statusVals[0] == 100); + TEST_ASSERT(statusVals[1] == -1); + + status = U_ZERO_ERROR; + memset(statusVals, -1, sizeof(statusVals)); + numStatuses = bi->getRuleStatusVec(statusVals, 2, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(numStatuses == 2); + TEST_ASSERT(statusVals[0] == 100); + TEST_ASSERT(statusVals[1] == 300); + TEST_ASSERT(statusVals[2] == -1); + } + delete bi; + +} + // // Bug 2190 Regression test. Builder crash on rule consisting of only a // $variable reference @@ -614,10 +855,10 @@ void RBBIAPITest::TestBug2190() { int32_t bounds1[] = {0, 4, 8}; UErrorCode status=U_ZERO_ERROR; UParseError parseError; - + RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); if(U_FAILURE(status)) { - errln("FAIL : in construction"); + dataerrln("Fail : in construction - %s", u_errorName(status)); } else { bi->setText(testString1); doBoundaryTest(*bi, testString1, bounds1); @@ -627,48 +868,68 @@ void RBBIAPITest::TestBug2190() { void RBBIAPITest::TestRegistration() { +#if !UCONFIG_NO_SERVICE UErrorCode status = U_ZERO_ERROR; - BreakIterator* thai_word = BreakIterator::createWordInstance("th_TH", status); - + BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status); + // ok to not delete these if we exit because of error? - BreakIterator* thai_char = BreakIterator::createCharacterInstance("th_TH", status); + BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status); BreakIterator* root_word = BreakIterator::createWordInstance("", status); BreakIterator* root_char = BreakIterator::createCharacterInstance("", status); - URegistryKey key = BreakIterator::registerInstance(thai_word, "xx", UBRK_WORD, status); + if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) { + dataerrln("Error creating instances of break interactors - %s", u_errorName(status)); + delete ja_word; + delete ja_char; + delete root_word; + delete root_char; + + return; + } + + URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status); { - if (*thai_word == *root_word) { - errln("thai not different from root"); + if (ja_word && *ja_word == *root_word) { + errln("japan not different from root"); } } - + { BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status); - UBool fail = *result != *thai_word; + UBool fail = TRUE; + if(result){ + fail = *result != *ja_word; + } delete result; if (fail) { errln("bad result for xx_XX/word"); } } - + { - BreakIterator* result = BreakIterator::createCharacterInstance("th_TH", status); - UBool fail = *result != *thai_char; + BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status); + UBool fail = TRUE; + if(result){ + fail = *result != *ja_char; + } delete result; if (fail) { - errln("bad result for th_TH/char"); + errln("bad result for ja_JP/char"); } } - + { BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status); - UBool fail = *result != *root_char; + UBool fail = TRUE; + if(result){ + fail = *result != *root_char; + } delete result; if (fail) { errln("bad result for xx_XX/char"); } } - + { StringEnumeration* avail = BreakIterator::getAvailableLocales(); UBool found = FALSE; @@ -684,25 +945,28 @@ void RBBIAPITest::TestRegistration() { errln("did not find test locale"); } } - + { UBool unreg = BreakIterator::unregister(key, status); if (!unreg) { errln("unable to unregister"); } } - + { - BreakIterator* result = BreakIterator::createWordInstance("xx", status); + BreakIterator* result = BreakIterator::createWordInstance("en_US", status); BreakIterator* root = BreakIterator::createWordInstance("", status); - UBool fail = *root != *result; + UBool fail = TRUE; + if(root){ + fail = *root != *result; + } delete root; delete result; if (fail) { errln("did not get root break"); } } - + { StringEnumeration* avail = BreakIterator::getAvailableLocales(); UBool found = FALSE; @@ -718,7 +982,7 @@ void RBBIAPITest::TestRegistration() { errln("found test locale"); } } - + { int32_t count; UBool foundLocale = FALSE; @@ -733,31 +997,32 @@ void RBBIAPITest::TestRegistration() { errln("BreakIterator::getAvailableLocales(&count), failed to find EN."); } } - - - // that_word was adopted by factory - delete thai_char; + + + // ja_word was adopted by factory + delete ja_char; delete root_word; delete root_char; +#endif } void RBBIAPITest::RoundtripRule(const char *dataFile) { UErrorCode status = U_ZERO_ERROR; UParseError parseError; - parseError.line = 0; - parseError.offset = 0; - UDataMemory *data = udata_open(NULL, "brk", dataFile, &status); + parseError.line = 0; + parseError.offset = 0; + LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status)); uint32_t length; const UChar *builtSource; const uint8_t *rbbiRules; const uint8_t *builtRules; if (U_FAILURE(status)) { - errln("Can't open \"%s\"", dataFile); + errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status)); return; } - builtRules = (const uint8_t *)udata_getMemory(data); + builtRules = (const uint8_t *)udata_getMemory(data.getAlias()); builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource); RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status); if (U_FAILURE(status)) { @@ -772,7 +1037,6 @@ void RBBIAPITest::RoundtripRule(const char *dataFile) { return; } delete brkItr; - udata_close(data); } void RBBIAPITest::TestRoundtripRules() { @@ -782,11 +1046,133 @@ void RBBIAPITest::TestRoundtripRules() { RoundtripRule("line"); RoundtripRule("char"); if (!quick) { - RoundtripRule("word_th"); - RoundtripRule("line_th"); + RoundtripRule("word_ja"); + RoundtripRule("word_POSIX"); } } +// Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader* +// (these are protected so we access them via a local class RBBIWithProtectedFunctions). +// This is just a sanity check, not a thorough test (e.g. we don't check that the +// first delete actually frees rulesCopy). +void RBBIAPITest::TestCreateFromRBBIData() { + // Get some handy RBBIData + const char *brkName = "word"; // or "sent", "line", "char", etc. + UErrorCode status = U_ZERO_ERROR; + LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status)); + if ( U_SUCCESS(status) ) { + const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data.getAlias()); + uint32_t length = builtRules->fLength; + RBBIWithProtectedFunctions * brkItr; + + // Try the memory-adopting constructor, need to copy the data first + RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length); + if ( rulesCopy ) { + uprv_memcpy( rulesCopy, builtRules, length ); + + brkItr = new RBBIWithProtectedFunctions(rulesCopy, status); + if ( U_SUCCESS(status) ) { + delete brkItr; // this should free rulesCopy + } else { + errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) ); + status = U_ZERO_ERROR;// reset for the next test + uprv_free( rulesCopy ); + } + } + + // Now try the non-adopting constructor + brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status); + if ( U_SUCCESS(status) ) { + delete brkItr; // this should NOT attempt to free builtRules + if (builtRules->fLength != length) { // sanity check + errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" ); + } + } else { + errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) ); + } + } + + // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...) + // + status = U_ZERO_ERROR; + RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); + if (rb == NULL || U_FAILURE(status)) { + dataerrln("Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s", u_errorName(status)); + } else { + uint32_t length; + const uint8_t *rules = rb->getBinaryRules(length); + RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(*rb == *rb2); + UnicodeString words = "one two three "; + rb2->setText(words); + int wordCounter = 0; + while (rb2->next() != UBRK_DONE) { + wordCounter++; + } + TEST_ASSERT(wordCounter == 6); + + status = U_ZERO_ERROR; + RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1, status); + TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); + + delete rb; + delete rb2; + delete rb3; + } +} + + +void RBBIAPITest::TestRefreshInputText() { + /* + * RefreshInput changes out the input of a Break Iterator without + * changing anything else in the iterator's state. Used with Java JNI, + * when Java moves the underlying string storage. This test + * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence. + * The right set of boundaries should still be found. + */ + UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */ + UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0}; + UErrorCode status = U_ZERO_ERROR; + UText ut1 = UTEXT_INITIALIZER; + UText ut2 = UTEXT_INITIALIZER; + RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status); + TEST_ASSERT_SUCCESS(status); + + utext_openUChars(&ut1, testStr, -1, &status); + TEST_ASSERT_SUCCESS(status); + + if (U_SUCCESS(status)) { + bi->setText(&ut1, status); + TEST_ASSERT_SUCCESS(status); + + /* Line boundaries will occur before each letter in the original string */ + TEST_ASSERT(1 == bi->next()); + TEST_ASSERT(3 == bi->next()); + + /* Move the string, kill the original string. */ + u_strcpy(movedStr, testStr); + u_memset(testStr, 0x20, u_strlen(testStr)); + utext_openUChars(&ut2, movedStr, -1, &status); + TEST_ASSERT_SUCCESS(status); + RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(bi == returnedBI); + + /* Find the following matches, now working in the moved string. */ + TEST_ASSERT(5 == bi->next()); + TEST_ASSERT(7 == bi->next()); + TEST_ASSERT(8 == bi->next()); + TEST_ASSERT(UBRK_DONE == bi->next()); + + utext_close(&ut1); + utext_close(&ut2); + } + delete bi; + +} + + //--------------------------------------------- // runIndexedTest //--------------------------------------------- @@ -796,22 +1182,31 @@ void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API "); switch (index) { // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break; +#if !UCONFIG_NO_FILE_IO case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break; case 1: name = "TestgetRules"; if (exec) TestgetRules(); break; case 2: name = "TestHashCode"; if (exec) TestHashCode(); break; case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break; case 4: name = "TestIteration"; if (exec) TestIteration(); break; - case 5: name = "extra"; break; /* Extra */ - case 6: name = "extra"; break; /* Extra */ - case 7: name = "TestBuilder"; if (exec) TestBuilder(); break; - case 8: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break; - case 9: name = "TestWordStatus"; if (exec) TestWordStatus(); break; - case 10: name = "TestBug2190"; if (exec) TestBug2190(); break; - case 11: name = "TestRegistration"; if (exec) TestRegistration(); break; - case 12: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; - case 13: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break; - - default: name = ""; break; /*needed to end loop*/ +#else + case 0: case 1: case 2: case 3: case 4: name = "skip"; break; +#endif + case 5: name = "TestBuilder"; if (exec) TestBuilder(); break; + case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break; + case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break; + case 8: name = "TestBug2190"; if (exec) TestBug2190(); break; +#if !UCONFIG_NO_FILE_IO + case 9: name = "TestRegistration"; if (exec) TestRegistration(); break; + case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; + case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break; + case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break; + case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break; +#else + case 9: case 10: case 11: case 12: case 13: name = "skip"; break; +#endif + case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break; + + default: name = ""; break; // needed to end loop } } @@ -845,7 +1240,7 @@ void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotof if(gotoffset != expectedOffset) errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset); if(start <= gotoffset){ - testString.extractBetween(start, gotoffset, selected); + testString.extractBetween(start, gotoffset, selected); } else{ testString.extractBetween(gotoffset, start, selected); @@ -856,4 +1251,18 @@ void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotof logln(prettify("****selected \"" + selected + "\"")); } +//--------------------------------------------- +//RBBIWithProtectedFunctions class functions +//--------------------------------------------- + +RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status) + : RuleBasedBreakIterator(data, status) +{ +} + +RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status) + : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status) +{ +} + #endif /* #if !UCONFIG_NO_BREAK_ITERATION */