/********************************************************************
- * COPYRIGHT:
- * Copyright (c) 1999-2003, International Business Machines Corporation and
- * others. All Rights Reserved.
+ * Copyright (c) 1999-2011, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ ********************************************************************
+ * Date Name Description
+ * 12/14/99 Madhu Creation.
+ * 01/12/2000 Madhu updated for changed API
********************************************************************/
-/************************************************************************
-* Date Name Description
-* 12/14/99 Madhu Creation.
-* 01/12/2000 Madhu updated for changed API
-************************************************************************/
#include "unicode/utypes.h"
#include "rbbiapts.h"
#include "rbbidata.h"
#include "cstring.h"
+#include "ubrkimpl.h"
+#include "unicode/locid.h"
+#include "unicode/ustring.h"
+#include "unicode/utext.h"
+#include "cmemory.h"
/**
* API Test the RuleBasedBreakIterator class
*/
+#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
+dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
+
+#define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
+ errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
void RBBIAPITest::TestCloneEquals()
{
RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
if(U_FAILURE(status)){
- errln((UnicodeString)"FAIL : in construction");
+ errcheckln(status, "Fail : in construction - %s", u_errorName(status));
return;
}
errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed.");
- // Quick test of RulesBasedBreakIterator assignment -
+ // Quick test of RulesBasedBreakIterator assignment -
// Check that
// two different iterators are !=
// they are == after assignment
// source and dest iterator produce the same next() after assignment.
// deleting one doesn't disable the other.
logln("Testing assignment");
- RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
+ RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
if(U_FAILURE(status)){
- errln((UnicodeString)"FAIL : in construction");
+ errcheckln(status, "Fail : in construction - %s", u_errorName(status));
return;
}
RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
- if(*bi1clone != *bi1 || *bi1clone != *biequal ||
+ if(*bi1clone != *bi1 || *bi1clone != *biequal ||
*bi1clone == *bi3 || *bi1clone == *bi2)
errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
- if(*bi2clone == *bi1 || *bi2clone == *biequal ||
+ if(*bi2clone == *bi1 || *bi2clone == *biequal ||
*bi2clone == *bi3 || *bi2clone != *bi2)
errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
if(bi1->getText() != bi1clone->getText() ||
- bi2clone->getText() != bi2->getText() ||
+ bi2clone->getText() != bi2->getText() ||
*bi2clone == *bi1clone )
errln((UnicodeString)"ERROR: RBBI's clone() method failed");
void RBBIAPITest::TestBoilerPlate()
{
UErrorCode status = U_ZERO_ERROR;
- BreakIterator* a = BreakIterator::createLineInstance(Locale("hi"), status);
- BreakIterator* b = BreakIterator::createLineInstance(Locale("hi_IN"),status);
+ BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
+ BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
+ if (U_FAILURE(status)) {
+ errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
+ return;
+ }
if(*a!=*b){
errln("Failed: boilerplate method operator!= does not return correct results");
}
- BreakIterator* c = BreakIterator::createLineInstance(Locale("th"),status);
- if(*c==*a){
- errln("Failed: boilerplate method opertator== does not return correct results");
+ BreakIterator* c = BreakIterator::createWordInstance(Locale("ja"),status);
+ if(a && c){
+ if(*c==*a){
+ errln("Failed: boilerplate method opertator== does not return correct results");
+ }
+ }else{
+ errln("creation of break iterator failed");
}
delete a;
delete b;
RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
if(U_FAILURE(status)){
- errln((UnicodeString)"FAIL: in construction");
+ errcheckln(status, "FAIL: in construction - %s", u_errorName(status));
delete bi1;
delete bi2;
return;
RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
if(U_FAILURE(status)){
- errln((UnicodeString)"FAIL : in construction");
+ errcheckln(status, "Fail : in construction - %s", u_errorName(status));
delete bi1;
delete bi2;
delete bi3;
errln((UnicodeString)"ERROR: different objects have same hashcodes");
delete bi1clone;
- delete bi2clone;
+ delete bi2clone;
delete bi1;
delete bi2;
delete bi3;
void RBBIAPITest::TestGetSetAdoptText()
{
logln((UnicodeString)"Testing getText setText ");
- UErrorCode status=U_ZERO_ERROR;
+ IcuTestErrorCode status(*this, "TestGetSetAdoptText");
UnicodeString str1="first string.";
UnicodeString str2="Second string.";
- RuleBasedBreakIterator* charIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
- RuleBasedBreakIterator* wordIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
- if(U_FAILURE(status)){
- errln((UnicodeString)"FAIL : in construction");
+ LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
+ LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
+ if(status.isFailure()){
+ errcheckln(status, "Fail : in construction - %s", status.errorName());
return;
}
CharacterIterator* text1Clone = text1->clone();
CharacterIterator* text2= new StringCharacterIterator(str2);
CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str"
-
+
wordIter1->setText(str1);
- if(wordIter1->getText() != *text1)
- errln((UnicodeString)"ERROR:1 error in setText or getText ");
+ CharacterIterator *tci = &wordIter1->getText();
+ UnicodeString tstr;
+ tci->getText(tstr);
+ TEST_ASSERT(tstr == str1);
if(wordIter1->current() != 0)
errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
charIter1->adoptText(text1Clone);
- if( wordIter1->getText() == charIter1->getText() ||
- wordIter1->getText() != *text2 || charIter1->getText() != *text1 )
- errln((UnicodeString)"ERROR:2 error is getText or setText()");
+ TEST_ASSERT(wordIter1->getText() != charIter1->getText());
+ tci = &wordIter1->getText();
+ tci->getText(tstr);
+ TEST_ASSERT(tstr == str2);
+ tci = &charIter1->getText();
+ tci->getText(tstr);
+ TEST_ASSERT(tstr == str1);
- RuleBasedBreakIterator* rb=(RuleBasedBreakIterator*)wordIter1->clone();
+
+ LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
rb->adoptText(text1);
if(rb->getText() != *text1)
errln((UnicodeString)"ERROR:1 error in adoptText ");
errln((UnicodeString)"ERROR:2 error in adoptText ");
// Adopt where iterator range is less than the entire orignal source string.
+ // (With the change of the break engine to working with UText internally,
+ // CharacterIterators starting at positions other than zero are not supported)
rb->adoptText(text3);
- if(rb->preceding(2) != 3) {
- errln((UnicodeString)"ERROR:3 error in adoptText ");
- }
- if(rb->following(11) != BreakIterator::DONE) {
- errln((UnicodeString)"ERROR:4 error in adoptText ");
- }
-
- delete wordIter1;
- delete charIter1;
- delete rb;
+ TEST_ASSERT(rb->preceding(2) == 0);
+ TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
+ //if(rb->preceding(2) != 3) {
+ // errln((UnicodeString)"ERROR:3 error in adoptText ");
+ //}
+ //if(rb->following(11) != BreakIterator::DONE) {
+ // errln((UnicodeString)"ERROR:4 error in adoptText ");
+ //}
+
+ // UText API
+ //
+ // Quick test to see if UText is working at all.
+ //
+ const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
+ const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
+ // 012345678901
+
+ status.reset();
+ LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
+ wordIter1->setText(ut.getAlias(), status);
+ TEST_ASSERT_SUCCESS(status);
+
+ int32_t pos;
+ pos = wordIter1->first();
+ TEST_ASSERT(pos==0);
+ pos = wordIter1->next();
+ TEST_ASSERT(pos==5);
+ pos = wordIter1->next();
+ TEST_ASSERT(pos==6);
+ pos = wordIter1->next();
+ TEST_ASSERT(pos==11);
+ pos = wordIter1->next();
+ TEST_ASSERT(pos==UBRK_DONE);
+
+ status.reset();
+ LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
+ TEST_ASSERT_SUCCESS(status);
+ wordIter1->setText(ut2.getAlias(), status);
+ TEST_ASSERT_SUCCESS(status);
+
+ pos = wordIter1->first();
+ TEST_ASSERT(pos==0);
+ pos = wordIter1->next();
+ TEST_ASSERT(pos==3);
+ pos = wordIter1->next();
+ TEST_ASSERT(pos==4);
+
+ pos = wordIter1->last();
+ TEST_ASSERT(pos==6);
+ pos = wordIter1->previous();
+ TEST_ASSERT(pos==4);
+ pos = wordIter1->previous();
+ TEST_ASSERT(pos==3);
+ pos = wordIter1->previous();
+ TEST_ASSERT(pos==0);
+ pos = wordIter1->previous();
+ TEST_ASSERT(pos==UBRK_DONE);
+
+ status.reset();
+ UnicodeString sEmpty;
+ LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
+ wordIter1->getUText(gut2.getAlias(), status);
+ TEST_ASSERT_SUCCESS(status);
+ status.reset();
+}
- }
-
void RBBIAPITest::TestIteration()
{
// This test just verifies that the API is present.
UErrorCode status=U_ZERO_ERROR;
RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
if (U_FAILURE(status) || bi == NULL) {
- errln("Failure creating character break iterator. Status = %s", u_errorName(status));
+ errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
}
delete bi;
status=U_ZERO_ERROR;
bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
if (U_FAILURE(status) || bi == NULL) {
- errln("Failure creating Word break iterator. Status = %s", u_errorName(status));
+ errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status));
}
delete bi;
status=U_ZERO_ERROR;
bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
if (U_FAILURE(status) || bi == NULL) {
- errln("Failure creating Line break iterator. Status = %s", u_errorName(status));
+ errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status));
}
delete bi;
status=U_ZERO_ERROR;
bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
if (U_FAILURE(status) || bi == NULL) {
- errln("Failure creating Sentence break iterator. Status = %s", u_errorName(status));
+ errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status));
}
delete bi;
status=U_ZERO_ERROR;
bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
if (U_FAILURE(status) || bi == NULL) {
- errln("Failure creating Title break iterator. Status = %s", u_errorName(status));
+ errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status));
}
delete bi;
status=U_ZERO_ERROR;
bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
if (U_FAILURE(status) || bi == NULL) {
- errln("Failure creating character break iterator. Status = %s", u_errorName(status));
+ errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
return; // Skip the rest of these tests.
}
int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
UErrorCode status=U_ZERO_ERROR;
UParseError parseError;
-
+
RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
if(U_FAILURE(status)) {
- errln("FAIL : in construction");
+ dataerrln("Fail : in construction - %s", u_errorName(status));
} else {
bi->setText(testString1);
doBoundaryTest(*bi, testString1, bounds1);
int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
UErrorCode status=U_ZERO_ERROR;
UParseError parseError;
-
+
RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
if(U_FAILURE(status)) {
- errln("FAIL : in construction");
+ dataerrln("Fail : in construction - %s", u_errorName(status));
} else {
bi->setText(testString1);
doBoundaryTest(*bi, testString1, bounds1);
}
//
-// TestWordStatus
+// TestRuleStatus
// Test word break rule status constants.
//
-void RBBIAPITest::TestWordStatus() {
-
-
- UnicodeString testString1 = // Ideographic Katakana Hiragana
- CharsToUnicodeString("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094");
- // 012345678901234567 8 9 0 1 2 3 4 5 6
- int32_t bounds1[] = { 0, 5,6, 10,11, 17,18, 19, 20,21, 23,24, 25, 26};
+void RBBIAPITest::TestRuleStatus() {
+ UChar str[30];
+ u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094",
+ // 012345678901234567 8 9 0 1 2 3 4 5 6
+ // Ideographic Katakana Hiragana
+ str, 30);
+ UnicodeString testString1(str);
+ int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26};
int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,
UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,
UBRK_WORD_IDEO, UBRK_WORD_IDEO, UBRK_WORD_NONE,
UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT};
UErrorCode status=U_ZERO_ERROR;
-
- RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getDefault(), status);
+
+ RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
if(U_FAILURE(status)) {
- errln("FAIL : in construction");
+ errcheckln(status, "Fail : in construction - %s", u_errorName(status));
} else {
bi->setText(testString1);
// First test that the breaks are in the right spots.
errln("FAIL: incorrect tag value %d at position %d", tag, pos);
break;
}
+
+ // Check that we get the same tag values from getRuleStatusVec()
+ int32_t vec[10];
+ int t = bi->getRuleStatusVec(vec, 10, status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(t==1);
+ TEST_ASSERT(vec[0] == tag);
+ }
+ }
+ delete bi;
+
+ // Now test line break status. This test mostly is to confirm that the status constants
+ // are correctly declared in the header.
+ testString1 = "test line. \n";
+ // break type s s h
+
+ bi = (RuleBasedBreakIterator *)
+ BreakIterator::createLineInstance(Locale::getEnglish(), status);
+ if(U_FAILURE(status)) {
+ errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status));
+ } else {
+ int32_t i = 0;
+ int32_t pos, tag;
+ UBool success;
+
+ bi->setText(testString1);
+ pos = bi->current();
+ tag = bi->getRuleStatus();
+ for (i=0; i<3; i++) {
+ switch (i) {
+ case 0:
+ success = pos==0 && tag==UBRK_LINE_SOFT; break;
+ case 1:
+ success = pos==5 && tag==UBRK_LINE_SOFT; break;
+ case 2:
+ success = pos==12 && tag==UBRK_LINE_HARD; break;
+ default:
+ success = FALSE; break;
+ }
+ if (success == FALSE) {
+ errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d",
+ i, pos, tag);
+ break;
+ }
+ pos = bi->next();
+ tag = bi->getRuleStatus();
+ }
+ if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
+ UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
+ (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
+ errln("UBRK_LINE_* constants from header are inconsistent.");
}
}
delete bi;
+
}
+//
+// TestRuleStatusVec
+// Test the vector form of break rule status.
+//
+void RBBIAPITest::TestRuleStatusVec() {
+ UnicodeString rulesString( "[A-N]{100}; \n"
+ "[a-w]{200}; \n"
+ "[\\p{L}]{300}; \n"
+ "[\\p{N}]{400}; \n"
+ "[0-5]{500}; \n"
+ "!.*;\n", -1, US_INV);
+ UnicodeString testString1 = "Aapz5?";
+ int32_t statusVals[10];
+ int32_t numStatuses;
+ int32_t pos;
+
+ UErrorCode status=U_ZERO_ERROR;
+ UParseError parseError;
+
+ RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
+ if (U_FAILURE(status)) {
+ dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
+ } else {
+ bi->setText(testString1);
+
+ // A
+ pos = bi->next();
+ TEST_ASSERT(pos==1);
+ numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(numStatuses == 2);
+ TEST_ASSERT(statusVals[0] == 100);
+ TEST_ASSERT(statusVals[1] == 300);
+
+ // a
+ pos = bi->next();
+ TEST_ASSERT(pos==2);
+ numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(numStatuses == 2);
+ TEST_ASSERT(statusVals[0] == 200);
+ TEST_ASSERT(statusVals[1] == 300);
+
+ // p
+ pos = bi->next();
+ TEST_ASSERT(pos==3);
+ numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(numStatuses == 2);
+ TEST_ASSERT(statusVals[0] == 200);
+ TEST_ASSERT(statusVals[1] == 300);
+
+ // z
+ pos = bi->next();
+ TEST_ASSERT(pos==4);
+ numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(numStatuses == 1);
+ TEST_ASSERT(statusVals[0] == 300);
+
+ // 5
+ pos = bi->next();
+ TEST_ASSERT(pos==5);
+ numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(numStatuses == 2);
+ TEST_ASSERT(statusVals[0] == 400);
+ TEST_ASSERT(statusVals[1] == 500);
+
+ // ?
+ pos = bi->next();
+ TEST_ASSERT(pos==6);
+ numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(numStatuses == 1);
+ TEST_ASSERT(statusVals[0] == 0);
+
+ //
+ // Check buffer overflow error handling. Char == A
+ //
+ bi->first();
+ pos = bi->next();
+ TEST_ASSERT(pos==1);
+ memset(statusVals, -1, sizeof(statusVals));
+ numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
+ TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
+ TEST_ASSERT(numStatuses == 2);
+ TEST_ASSERT(statusVals[0] == -1);
+
+ status = U_ZERO_ERROR;
+ memset(statusVals, -1, sizeof(statusVals));
+ numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
+ TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
+ TEST_ASSERT(numStatuses == 2);
+ TEST_ASSERT(statusVals[0] == 100);
+ TEST_ASSERT(statusVals[1] == -1);
+
+ status = U_ZERO_ERROR;
+ memset(statusVals, -1, sizeof(statusVals));
+ numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(numStatuses == 2);
+ TEST_ASSERT(statusVals[0] == 100);
+ TEST_ASSERT(statusVals[1] == 300);
+ TEST_ASSERT(statusVals[2] == -1);
+ }
+ delete bi;
+
+}
+
//
// Bug 2190 Regression test. Builder crash on rule consisting of only a
// $variable reference
int32_t bounds1[] = {0, 4, 8};
UErrorCode status=U_ZERO_ERROR;
UParseError parseError;
-
+
RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
if(U_FAILURE(status)) {
- errln("FAIL : in construction");
+ dataerrln("Fail : in construction - %s", u_errorName(status));
} else {
bi->setText(testString1);
doBoundaryTest(*bi, testString1, bounds1);
void RBBIAPITest::TestRegistration() {
+#if !UCONFIG_NO_SERVICE
UErrorCode status = U_ZERO_ERROR;
- BreakIterator* thai_word = BreakIterator::createWordInstance("th_TH", status);
-
+ BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
+
// ok to not delete these if we exit because of error?
- BreakIterator* thai_char = BreakIterator::createCharacterInstance("th_TH", status);
+ BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
BreakIterator* root_word = BreakIterator::createWordInstance("", status);
BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
- URegistryKey key = BreakIterator::registerInstance(thai_word, "xx", UBRK_WORD, status);
+ if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
+ dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
+ delete ja_word;
+ delete ja_char;
+ delete root_word;
+ delete root_char;
+
+ return;
+ }
+
+ URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
{
- if (*thai_word == *root_word) {
- errln("thai not different from root");
+ if (ja_word && *ja_word == *root_word) {
+ errln("japan not different from root");
}
}
-
+
{
BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
- UBool fail = *result != *thai_word;
+ UBool fail = TRUE;
+ if(result){
+ fail = *result != *ja_word;
+ }
delete result;
if (fail) {
errln("bad result for xx_XX/word");
}
}
-
+
{
- BreakIterator* result = BreakIterator::createCharacterInstance("th_TH", status);
- UBool fail = *result != *thai_char;
+ BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
+ UBool fail = TRUE;
+ if(result){
+ fail = *result != *ja_char;
+ }
delete result;
if (fail) {
- errln("bad result for th_TH/char");
+ errln("bad result for ja_JP/char");
}
}
-
+
{
BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
- UBool fail = *result != *root_char;
+ UBool fail = TRUE;
+ if(result){
+ fail = *result != *root_char;
+ }
delete result;
if (fail) {
errln("bad result for xx_XX/char");
}
}
-
+
{
StringEnumeration* avail = BreakIterator::getAvailableLocales();
UBool found = FALSE;
errln("did not find test locale");
}
}
-
+
{
UBool unreg = BreakIterator::unregister(key, status);
if (!unreg) {
errln("unable to unregister");
}
}
-
+
{
- BreakIterator* result = BreakIterator::createWordInstance("xx", status);
+ BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
BreakIterator* root = BreakIterator::createWordInstance("", status);
- UBool fail = *root != *result;
+ UBool fail = TRUE;
+ if(root){
+ fail = *root != *result;
+ }
delete root;
delete result;
if (fail) {
errln("did not get root break");
}
}
-
+
{
StringEnumeration* avail = BreakIterator::getAvailableLocales();
UBool found = FALSE;
errln("found test locale");
}
}
-
+
{
int32_t count;
UBool foundLocale = FALSE;
errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
}
}
-
-
- // that_word was adopted by factory
- delete thai_char;
+
+
+ // ja_word was adopted by factory
+ delete ja_char;
delete root_word;
delete root_char;
+#endif
}
void RBBIAPITest::RoundtripRule(const char *dataFile) {
UErrorCode status = U_ZERO_ERROR;
UParseError parseError;
- parseError.line = 0;
- parseError.offset = 0;
- UDataMemory *data = udata_open(NULL, "brk", dataFile, &status);
+ parseError.line = 0;
+ parseError.offset = 0;
+ LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
uint32_t length;
const UChar *builtSource;
const uint8_t *rbbiRules;
const uint8_t *builtRules;
if (U_FAILURE(status)) {
- errln("Can't open \"%s\"", dataFile);
+ errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status));
return;
}
- builtRules = (const uint8_t *)udata_getMemory(data);
+ builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
if (U_FAILURE(status)) {
return;
}
delete brkItr;
- udata_close(data);
}
void RBBIAPITest::TestRoundtripRules() {
RoundtripRule("line");
RoundtripRule("char");
if (!quick) {
- RoundtripRule("word_th");
- RoundtripRule("line_th");
+ RoundtripRule("word_ja");
+ RoundtripRule("word_POSIX");
}
}
+// Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
+// (these are protected so we access them via a local class RBBIWithProtectedFunctions).
+// This is just a sanity check, not a thorough test (e.g. we don't check that the
+// first delete actually frees rulesCopy).
+void RBBIAPITest::TestCreateFromRBBIData() {
+ // Get some handy RBBIData
+ const char *brkName = "word"; // or "sent", "line", "char", etc.
+ UErrorCode status = U_ZERO_ERROR;
+ LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status));
+ if ( U_SUCCESS(status) ) {
+ const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data.getAlias());
+ uint32_t length = builtRules->fLength;
+ RBBIWithProtectedFunctions * brkItr;
+
+ // Try the memory-adopting constructor, need to copy the data first
+ RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length);
+ if ( rulesCopy ) {
+ uprv_memcpy( rulesCopy, builtRules, length );
+
+ brkItr = new RBBIWithProtectedFunctions(rulesCopy, status);
+ if ( U_SUCCESS(status) ) {
+ delete brkItr; // this should free rulesCopy
+ } else {
+ errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) );
+ status = U_ZERO_ERROR;// reset for the next test
+ uprv_free( rulesCopy );
+ }
+ }
+
+ // Now try the non-adopting constructor
+ brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status);
+ if ( U_SUCCESS(status) ) {
+ delete brkItr; // this should NOT attempt to free builtRules
+ if (builtRules->fLength != length) { // sanity check
+ errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" );
+ }
+ } else {
+ errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) );
+ }
+ }
+
+ // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...)
+ //
+ status = U_ZERO_ERROR;
+ RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
+ if (rb == NULL || U_FAILURE(status)) {
+ dataerrln("Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s", u_errorName(status));
+ } else {
+ uint32_t length;
+ const uint8_t *rules = rb->getBinaryRules(length);
+ RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(*rb == *rb2);
+ UnicodeString words = "one two three ";
+ rb2->setText(words);
+ int wordCounter = 0;
+ while (rb2->next() != UBRK_DONE) {
+ wordCounter++;
+ }
+ TEST_ASSERT(wordCounter == 6);
+
+ status = U_ZERO_ERROR;
+ RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1, status);
+ TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
+
+ delete rb;
+ delete rb2;
+ delete rb3;
+ }
+}
+
+
+void RBBIAPITest::TestRefreshInputText() {
+ /*
+ * RefreshInput changes out the input of a Break Iterator without
+ * changing anything else in the iterator's state. Used with Java JNI,
+ * when Java moves the underlying string storage. This test
+ * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
+ * The right set of boundaries should still be found.
+ */
+ UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
+ UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
+ UErrorCode status = U_ZERO_ERROR;
+ UText ut1 = UTEXT_INITIALIZER;
+ UText ut2 = UTEXT_INITIALIZER;
+ RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
+ TEST_ASSERT_SUCCESS(status);
+
+ utext_openUChars(&ut1, testStr, -1, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ if (U_SUCCESS(status)) {
+ bi->setText(&ut1, status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* Line boundaries will occur before each letter in the original string */
+ TEST_ASSERT(1 == bi->next());
+ TEST_ASSERT(3 == bi->next());
+
+ /* Move the string, kill the original string. */
+ u_strcpy(movedStr, testStr);
+ u_memset(testStr, 0x20, u_strlen(testStr));
+ utext_openUChars(&ut2, movedStr, -1, &status);
+ TEST_ASSERT_SUCCESS(status);
+ RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(bi == returnedBI);
+
+ /* Find the following matches, now working in the moved string. */
+ TEST_ASSERT(5 == bi->next());
+ TEST_ASSERT(7 == bi->next());
+ TEST_ASSERT(8 == bi->next());
+ TEST_ASSERT(UBRK_DONE == bi->next());
+
+ utext_close(&ut1);
+ utext_close(&ut2);
+ }
+ delete bi;
+
+}
+
+
//---------------------------------------------
// runIndexedTest
//---------------------------------------------
if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
switch (index) {
// case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
+#if !UCONFIG_NO_FILE_IO
case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
case 1: name = "TestgetRules"; if (exec) TestgetRules(); break;
case 2: name = "TestHashCode"; if (exec) TestHashCode(); break;
case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
case 4: name = "TestIteration"; if (exec) TestIteration(); break;
- case 5: name = "extra"; break; /* Extra */
- case 6: name = "extra"; break; /* Extra */
- case 7: name = "TestBuilder"; if (exec) TestBuilder(); break;
- case 8: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
- case 9: name = "TestWordStatus"; if (exec) TestWordStatus(); break;
- case 10: name = "TestBug2190"; if (exec) TestBug2190(); break;
- case 11: name = "TestRegistration"; if (exec) TestRegistration(); break;
- case 12: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
- case 13: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
-
- default: name = ""; break; /*needed to end loop*/
+#else
+ case 0: case 1: case 2: case 3: case 4: name = "skip"; break;
+#endif
+ case 5: name = "TestBuilder"; if (exec) TestBuilder(); break;
+ case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
+ case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break;
+ case 8: name = "TestBug2190"; if (exec) TestBug2190(); break;
+#if !UCONFIG_NO_FILE_IO
+ case 9: name = "TestRegistration"; if (exec) TestRegistration(); break;
+ case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
+ case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
+ case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
+ case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break;
+#else
+ case 9: case 10: case 11: case 12: case 13: name = "skip"; break;
+#endif
+ case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break;
+
+ default: name = ""; break; // needed to end loop
}
}
if(gotoffset != expectedOffset)
errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
if(start <= gotoffset){
- testString.extractBetween(start, gotoffset, selected);
+ testString.extractBetween(start, gotoffset, selected);
}
else{
testString.extractBetween(gotoffset, start, selected);
logln(prettify("****selected \"" + selected + "\""));
}
+//---------------------------------------------
+//RBBIWithProtectedFunctions class functions
+//---------------------------------------------
+
+RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status)
+ : RuleBasedBreakIterator(data, status)
+{
+}
+
+RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status)
+ : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status)
+{
+}
+
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */