+
+// Check getBinaryRules() and construction of a break iterator from those rules.
+
+void RBBIAPITest::TestGetBinaryRules() {
+ UErrorCode status=U_ZERO_ERROR;
+ LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
+ if (U_FAILURE(status)) {
+ dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status));
+ return;
+ }
+ RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
+ if (rbbi == NULL) {
+ dataerrln("FAIL: RuleBasedBreakIterator is NULL");
+ return;
+ }
+
+ // Check that the new line break iterator is nominally functional.
+ UnicodeString helloWorld("Hello, World!");
+ rbbi->setText(helloWorld);
+ int n = 0;
+ while (bi->next() != UBRK_DONE) {
+ ++n;
+ }
+ TEST_ASSERT(n == 2);
+
+ // Extract the binary rules as a uint8_t blob.
+ uint32_t ruleLength;
+ const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
+ TEST_ASSERT(ruleLength > 0);
+ TEST_ASSERT(binRules != NULL);
+
+ // Clone the binary rules, and create a break iterator from that.
+ // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
+ uint8_t *clonedRules = new uint8_t[ruleLength];
+ memcpy(clonedRules, binRules, ruleLength);
+ RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
+ TEST_ASSERT_SUCCESS(status);
+
+ // Check that the cloned line break iterator is nominally alive.
+ clonedBI.setText(helloWorld);
+ n = 0;
+ while (clonedBI.next() != UBRK_DONE) {
+ ++n;
+ }
+ TEST_ASSERT(n == 2);
+
+ delete[] clonedRules;
+}
+
+
+void RBBIAPITest::TestRefreshInputText() {
+ /*
+ * RefreshInput changes out the input of a Break Iterator without
+ * changing anything else in the iterator's state. Used with Java JNI,
+ * when Java moves the underlying string storage. This test
+ * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
+ * The right set of boundaries should still be found.
+ */
+ UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
+ UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
+ UErrorCode status = U_ZERO_ERROR;
+ UText ut1 = UTEXT_INITIALIZER;
+ UText ut2 = UTEXT_INITIALIZER;
+ RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
+ TEST_ASSERT_SUCCESS(status);
+
+ utext_openUChars(&ut1, testStr, -1, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ if (U_SUCCESS(status)) {
+ bi->setText(&ut1, status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* Line boundaries will occur before each letter in the original string */
+ TEST_ASSERT(1 == bi->next());
+ TEST_ASSERT(3 == bi->next());
+
+ /* Move the string, kill the original string. */
+ u_strcpy(movedStr, testStr);
+ u_memset(testStr, 0x20, u_strlen(testStr));
+ utext_openUChars(&ut2, movedStr, -1, &status);
+ TEST_ASSERT_SUCCESS(status);
+ RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(bi == returnedBI);
+
+ /* Find the following matches, now working in the moved string. */
+ TEST_ASSERT(5 == bi->next());
+ TEST_ASSERT(7 == bi->next());
+ TEST_ASSERT(8 == bi->next());
+ TEST_ASSERT(UBRK_DONE == bi->next());
+
+ utext_close(&ut1);
+ utext_close(&ut2);
+ }
+ delete bi;
+
+}
+
+#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
+static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
+ static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
+ it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
+
+ int32_t *pos = new int32_t[ustr.length()];
+ int32_t posCount = 0;
+
+ // calculate breaks up front, so we can print out
+ // sans any debugging
+ for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
+ pos[posCount++] = n;
+ if(posCount>=ustr.length()) {
+ it.errln("brk count exceeds string length!");
+ return;
+ }
+ }
+ UnicodeString out;
+ out.append((UChar)CHSTR);
+ int32_t prev = 0;
+ for(int32_t i=0;i<posCount;i++) {
+ int32_t n=pos[i];
+ out.append(ustr.tempSubString(prev,n-prev));
+ out.append((UChar)PILCROW);
+ prev=n;
+ }
+ out.append(ustr.tempSubString(prev,ustr.length()-prev));
+ out.append((UChar)CHEND);
+ it.logln(out);
+
+ out.remove();
+ for(int32_t i=0;i<posCount;i++) {
+ char tmp[100];
+ sprintf(tmp,"%d ",pos[i]);
+ out.append(UnicodeString(tmp));
+ }
+ it.logln(out);
+ delete [] pos;
+}
+#endif
+
+void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
+#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
+ UErrorCode status = U_ZERO_ERROR;
+ LocalPointer<FilteredBreakIteratorBuilder> builder;
+ LocalPointer<BreakIterator> baseBI;
+ LocalPointer<BreakIterator> filteredBI;
+ LocalPointer<BreakIterator> frenchBI;
+
+ const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
+ const UnicodeString ABBR_MR("Mr.");
+ const UnicodeString ABBR_CAPT("Capt.");
+
+ {
+ logln("Constructing empty builder\n");
+ builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
+ TEST_ASSERT_SUCCESS(status);
+
+ logln("Constructing base BI\n");
+ baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
+ TEST_ASSERT_SUCCESS(status);
+
+ logln("Building new BI\n");
+ filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
+ TEST_ASSERT_SUCCESS(status);
+
+ if (U_SUCCESS(status)) {
+ logln("Testing:");
+ filteredBI->setText(text);
+ TEST_ASSERT(20 == filteredBI->next()); // Mr.
+ TEST_ASSERT(84 == filteredBI->next()); // recovered.
+ TEST_ASSERT(90 == filteredBI->next()); // Capt.
+ TEST_ASSERT(181 == filteredBI->next()); // Mr.
+ TEST_ASSERT(278 == filteredBI->next()); // charge.
+ filteredBI->first();
+ prtbrks(filteredBI.getAlias(), text, *this);
+ }
+ }
+
+ {
+ logln("Constructing empty builder\n");
+ builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
+ TEST_ASSERT_SUCCESS(status);
+
+ if (U_SUCCESS(status)) {
+ logln("Adding Mr. as an exception\n");
+ TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
+ TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
+ TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
+ TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
+ TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
+ TEST_ASSERT_SUCCESS(status);
+
+ logln("Constructing base BI\n");
+ baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
+ TEST_ASSERT_SUCCESS(status);
+
+ logln("Building new BI\n");
+ filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
+ TEST_ASSERT_SUCCESS(status);
+
+ logln("Testing:");
+ filteredBI->setText(text);
+ TEST_ASSERT(84 == filteredBI->next());
+ TEST_ASSERT(90 == filteredBI->next());// Capt.
+ TEST_ASSERT(278 == filteredBI->next());
+ filteredBI->first();
+ prtbrks(filteredBI.getAlias(), text, *this);
+ }
+ }
+
+
+ {
+ logln("Constructing empty builder\n");
+ builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
+ TEST_ASSERT_SUCCESS(status);
+
+ if (U_SUCCESS(status)) {
+ logln("Adding Mr. and Capt as an exception\n");
+ TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
+ TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
+ TEST_ASSERT_SUCCESS(status);
+
+ logln("Constructing base BI\n");
+ baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
+ TEST_ASSERT_SUCCESS(status);
+
+ logln("Building new BI\n");
+ filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
+ TEST_ASSERT_SUCCESS(status);
+
+ logln("Testing:");
+ filteredBI->setText(text);
+ TEST_ASSERT(84 == filteredBI->next());
+ TEST_ASSERT(278 == filteredBI->next());
+ filteredBI->first();
+ prtbrks(filteredBI.getAlias(), text, *this);
+ }
+ }
+
+
+ {
+ logln("Constructing English builder\n");
+ builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
+ TEST_ASSERT_SUCCESS(status);
+
+ logln("Constructing base BI\n");
+ baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
+ TEST_ASSERT_SUCCESS(status);
+
+ if (U_SUCCESS(status)) {
+ logln("unsuppressing 'Capt'");
+ TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
+
+ logln("Building new BI\n");
+ filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
+ TEST_ASSERT_SUCCESS(status);
+
+ if(filteredBI.isValid()) {
+ logln("Testing:");
+ filteredBI->setText(text);
+ TEST_ASSERT(84 == filteredBI->next());
+ TEST_ASSERT(90 == filteredBI->next());
+ TEST_ASSERT(278 == filteredBI->next());
+ filteredBI->first();
+ prtbrks(filteredBI.getAlias(), text, *this);
+ }
+ }
+ }
+
+
+ {
+ logln("Constructing English builder\n");
+ builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
+ TEST_ASSERT_SUCCESS(status);
+
+ logln("Constructing base BI\n");
+ baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
+ TEST_ASSERT_SUCCESS(status);
+
+ if (U_SUCCESS(status)) {
+ logln("Building new BI\n");
+ filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
+ TEST_ASSERT_SUCCESS(status);
+
+ if(filteredBI.isValid()) {
+ logln("Testing:");
+ filteredBI->setText(text);
+ TEST_ASSERT(84 == filteredBI->next());
+ TEST_ASSERT(278 == filteredBI->next());
+ filteredBI->first();
+ prtbrks(filteredBI.getAlias(), text, *this);
+ }
+ }
+ }
+
+ // reenable once french is in
+ {
+ logln("Constructing French builder");
+ builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
+ TEST_ASSERT_SUCCESS(status);
+
+ logln("Constructing base BI\n");
+ baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
+ TEST_ASSERT_SUCCESS(status);
+
+ if (U_SUCCESS(status)) {
+ logln("Building new BI\n");
+ frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
+ TEST_ASSERT_SUCCESS(status);
+ }
+
+ if(frenchBI.isValid()) {
+ logln("Testing:");
+ UnicodeString frText("C'est MM. Duval.");
+ frenchBI->setText(frText);
+ TEST_ASSERT(16 == frenchBI->next());
+ TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
+ frenchBI->first();
+ prtbrks(frenchBI.getAlias(), frText, *this);
+ logln("Testing against English:");
+ filteredBI->setText(frText);
+ TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
+ TEST_ASSERT(16 == filteredBI->next());
+ TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
+ filteredBI->first();
+ prtbrks(filteredBI.getAlias(), frText, *this);
+
+ // Verify ==
+ assertTrue(WHERE, *frenchBI == *frenchBI);
+ assertTrue(WHERE, *filteredBI != *frenchBI);
+ assertTrue(WHERE, *frenchBI != *filteredBI);
+ } else {
+ dataerrln("French BI: not valid.");
+ }
+ }
+
+#else
+ logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
+#endif
+}
+