+#endif
+}
+
+
+// TestBug12519 - Correct handling of Locales by assignment / copy / clone
+
+void RBBITest::TestBug12519() {
+ UErrorCode status = U_ZERO_ERROR;
+ LocalPointer<RuleBasedBreakIterator> biEn((RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status));
+ LocalPointer<RuleBasedBreakIterator> biFr((RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getFrance(), status));
+ if (!assertSuccess(WHERE, status)) {
+ dataerrln("%s %d status = %s", __FILE__, __LINE__, u_errorName(status));
+ return;
+ }
+ assertTrue(WHERE, Locale::getEnglish() == biEn->getLocale(ULOC_VALID_LOCALE, status));
+
+ assertTrue(WHERE, Locale::getFrench() == biFr->getLocale(ULOC_VALID_LOCALE, status));
+ assertTrue(WHERE "Locales do not participate in BreakIterator equality.", *biEn == *biFr);
+
+ LocalPointer<RuleBasedBreakIterator>cloneEn((RuleBasedBreakIterator *)biEn->clone());
+ assertTrue(WHERE, *biEn == *cloneEn);
+ assertTrue(WHERE, Locale::getEnglish() == cloneEn->getLocale(ULOC_VALID_LOCALE, status));
+
+ LocalPointer<RuleBasedBreakIterator>cloneFr((RuleBasedBreakIterator *)biFr->clone());
+ assertTrue(WHERE, *biFr == *cloneFr);
+ assertTrue(WHERE, Locale::getFrench() == cloneFr->getLocale(ULOC_VALID_LOCALE, status));
+
+ LocalPointer<RuleBasedBreakIterator>biDe((RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getGerman(), status));
+ UnicodeString text("Hallo Welt");
+ biDe->setText(text);
+ assertTrue(WHERE "before assignment of \"biDe = biFr\", they should be different, but are equal.", *biFr != *biDe);
+ *biDe = *biFr;
+ assertTrue(WHERE "after assignment of \"biDe = biFr\", they should be equal, but are not.", *biFr == *biDe);
+}
+
+void RBBITest::TestBug12677() {
+ // Check that stripping of comments from rules for getRules() is not confused by
+ // the presence of '#' characters in the rules that do not introduce comments.
+ UnicodeString rules(u"!!forward; \n"
+ "$x = [ab#]; # a set with a # literal. \n"
+ " # .; # a comment that looks sort of like a rule. \n"
+ " '#' '?'; # a rule with a quoted # \n"
+ );
+
+ UErrorCode status = U_ZERO_ERROR;
+ UParseError pe;
+ RuleBasedBreakIterator bi(rules, pe, status);
+ assertSuccess(WHERE, status);
+ UnicodeString rtRules = bi.getRules();
+ assertEquals(WHERE, UnicodeString(u"!!forward; $x = [ab#]; '#' '?'; "), rtRules);
+}
+
+
+void RBBITest::TestTableRedundancies() {
+ UErrorCode status = U_ZERO_ERROR;
+
+ LocalPointer<RuleBasedBreakIterator> bi (
+ (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status));
+ assertSuccess(WHERE, status);
+ if (U_FAILURE(status)) return;
+
+ RBBIDataWrapper *dw = bi->fData;
+ const RBBIStateTable *fwtbl = dw->fForwardTable;
+ int32_t numCharClasses = dw->fHeader->fCatCount;
+ // printf("Char Classes: %d states: %d\n", numCharClasses, fwtbl->fNumStates);
+
+ // Check for duplicate columns (character categories)
+
+ std::vector<UnicodeString> columns;
+ for (int32_t column = 0; column < numCharClasses; column++) {
+ UnicodeString s;
+ for (int32_t r = 1; r < (int32_t)fwtbl->fNumStates; r++) {
+ RBBIStateTableRow *row = (RBBIStateTableRow *) (fwtbl->fTableData + (fwtbl->fRowLen * r));
+ s.append(row->fNextState[column]);
+ }
+ columns.push_back(s);
+ }
+ // Ignore column (char class) 0 while checking; it's special, and may have duplicates.
+ for (int c1=1; c1<numCharClasses; c1++) {
+ for (int c2 = c1+1; c2 < numCharClasses; c2++) {
+ if (columns.at(c1) == columns.at(c2)) {
+ errln("%s:%d Duplicate columns (%d, %d)\n", __FILE__, __LINE__, c1, c2);
+ goto out;
+ }
+ }
+ }
+ out:
+
+ // Check for duplicate states
+ std::vector<UnicodeString> rows;
+ for (int32_t r=0; r < (int32_t)fwtbl->fNumStates; r++) {
+ UnicodeString s;
+ RBBIStateTableRow *row = (RBBIStateTableRow *) (fwtbl->fTableData + (fwtbl->fRowLen * r));
+ assertTrue(WHERE, row->fAccepting >= -1);
+ s.append(row->fAccepting + 1); // values of -1 are expected.
+ s.append(row->fLookAhead);
+ s.append(row->fTagIdx);
+ for (int32_t column = 0; column < numCharClasses; column++) {
+ s.append(row->fNextState[column]);
+ }
+ rows.push_back(s);
+ }
+ for (int r1=0; r1 < (int32_t)fwtbl->fNumStates; r1++) {
+ for (int r2 = r1+1; r2 < (int32_t)fwtbl->fNumStates; r2++) {
+ if (rows.at(r1) == rows.at(r2)) {
+ errln("%s:%d Duplicate rows (%d, %d)\n", __FILE__, __LINE__, r1, r2);
+ return;
+ }
+ }
+ }
+}
+
+// Bug 13447: verify that getRuleStatus() returns the value corresponding to current(),
+// even after next() has returned DONE.
+
+void RBBITest::TestBug13447() {
+ UErrorCode status = U_ZERO_ERROR;
+ LocalPointer<RuleBasedBreakIterator> bi(
+ (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status));
+ assertSuccess(WHERE, status);
+ if (U_FAILURE(status)) return;
+ UnicodeString data(u"1234");
+ bi->setText(data);
+ assertEquals(WHERE, UBRK_WORD_NONE, bi->getRuleStatus());
+ assertEquals(WHERE, 4, bi->next());
+ assertEquals(WHERE, UBRK_WORD_NUMBER, bi->getRuleStatus());
+ assertEquals(WHERE, UBRK_DONE, bi->next());
+ assertEquals(WHERE, 4, bi->current());
+ assertEquals(WHERE, UBRK_WORD_NUMBER, bi->getRuleStatus());
+}
+
+// TestReverse exercises both the synthesized safe reverse rules and the logic
+// for filling the break iterator cache when starting from random positions
+// in the text.
+//
+// It's a monkey test, working on random data, with the expected data obtained
+// from forward iteration (no safe rules involved), comparing with results
+// when indexing into the interior of the string (safe rules needed).
+
+void RBBITest::TestReverse() {
+ UErrorCode status = U_ZERO_ERROR;
+
+ TestReverse(std::unique_ptr<RuleBasedBreakIterator>((RuleBasedBreakIterator *)
+ BreakIterator::createCharacterInstance(Locale::getEnglish(), status)));
+ assertSuccess(WHERE, status, true);
+ status = U_ZERO_ERROR;
+ TestReverse(std::unique_ptr<RuleBasedBreakIterator>((RuleBasedBreakIterator *)
+ BreakIterator::createWordInstance(Locale::getEnglish(), status)));
+ assertSuccess(WHERE, status, true);
+ status = U_ZERO_ERROR;
+ TestReverse(std::unique_ptr<RuleBasedBreakIterator>((RuleBasedBreakIterator *)
+ BreakIterator::createLineInstance(Locale::getEnglish(), status)));
+ assertSuccess(WHERE, status, true);
+ status = U_ZERO_ERROR;
+ TestReverse(std::unique_ptr<RuleBasedBreakIterator>((RuleBasedBreakIterator *)
+ BreakIterator::createSentenceInstance(Locale::getEnglish(), status)));
+ assertSuccess(WHERE, status, true);
+}
+
+void RBBITest::TestReverse(std::unique_ptr<RuleBasedBreakIterator>bi) {
+ if (!bi) {
+ return;
+ }
+
+ // From the mapping trie in the break iterator's internal data, create a
+ // vector of UnicodeStrings, one for each character category, containing
+ // all of the code points that map to that category. Unicode planes 0 and 1 only,
+ // to avoid an execess of unassigned code points.
+
+ RBBIDataWrapper *data = bi->fData;
+ int32_t categoryCount = data->fHeader->fCatCount;
+ UTrie2 *trie = data->fTrie;
+
+ std::vector<UnicodeString> strings(categoryCount, UnicodeString());
+ for (int cp=0; cp<0x1fff0; ++cp) {
+ int cat = utrie2_get32(trie, cp);
+ cat &= ~0x4000; // And off the dictionary bit from the category.
+ assertTrue(WHERE, cat < categoryCount && cat >= 0);
+ if (cat < 0 || cat >= categoryCount) return;
+ strings[cat].append(cp);
+ }
+
+ icu_rand randomGen;
+ const int testStringLength = 10000;
+ UnicodeString testString;
+
+ for (int i=0; i<testStringLength; ++i) {
+ int charClass = randomGen() % categoryCount;
+ if (strings[charClass].length() > 0) {
+ int cp = strings[charClass].char32At(randomGen() % strings[charClass].length());
+ testString.append(cp);
+ }
+ }
+
+ typedef std::pair<UBool, int32_t> Result;
+ std::vector<Result> expectedResults;
+ bi->setText(testString);
+ for (int i=0; i<testString.length(); ++i) {
+ bool isboundary = bi->isBoundary(i);
+ int ruleStatus = bi->getRuleStatus();
+ expectedResults.push_back(std::make_pair(isboundary, ruleStatus));
+ }
+
+ for (int i=testString.length()-1; i>=0; --i) {
+ bi->setText(testString); // clears the internal break cache
+ Result expected = expectedResults[i];
+ assertEquals(WHERE, expected.first, bi->isBoundary(i));
+ assertEquals(WHERE, expected.second, bi->getRuleStatus());
+ }