+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
- * Copyright (c) 1999-2014, International Business Machines
+ * Copyright (c) 1999-2016, International Business Machines
* Corporation and others. All Rights Reserved.
* Date Name Description
#include "unicode/ustring.h"
#include "unicode/utext.h"
#include "cmemory.h"
#include "unicode/filteredbrk.h"
#include <stdio.h> // for sprintf
const uint8_t *builtRules;
if (U_FAILURE(status)) {
- errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status));
+ errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status));
builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
if (U_FAILURE(status)) {
- errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
- u_errorName(status), parseError.line, parseError.offset);
+ errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
+ __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset);
+ errln(UnicodeString(builtSource));
rbbiRules = brkItr->getBinaryRules(length);
logln("Comparing \"%s\" len=%d", dataFile, length);
if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
- errln("Built rules and rebuilt rules are different %s", dataFile);
+ errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile);
delete brkItr;
-// Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
-// (these are protected so we access them via a local class RBBIWithProtectedFunctions).
-// This is just a sanity check, not a thorough test (e.g. we don't check that the
-// first delete actually frees rulesCopy).
-void RBBIAPITest::TestCreateFromRBBIData() {
- // Get some handy RBBIData
- const char *brkName = "word"; // or "sent", "line", "char", etc.
- UErrorCode status = U_ZERO_ERROR;
- LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status));
- if ( U_SUCCESS(status) ) {
- const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data.getAlias());
- uint32_t length = builtRules->fLength;
- RBBIWithProtectedFunctions * brkItr;
- // Try the memory-adopting constructor, need to copy the data first
- RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length);
- if ( rulesCopy ) {
- uprv_memcpy( rulesCopy, builtRules, length );
- brkItr = new RBBIWithProtectedFunctions(rulesCopy, status);
- if ( U_SUCCESS(status) ) {
- delete brkItr; // this should free rulesCopy
- } else {
- errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) );
- status = U_ZERO_ERROR;// reset for the next test
- uprv_free( rulesCopy );
- }
- }
- // Now try the non-adopting constructor
- brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status);
- if ( U_SUCCESS(status) ) {
- delete brkItr; // this should NOT attempt to free builtRules
- if (builtRules->fLength != length) { // sanity check
- errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" );
- }
- } else {
- errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) );
- }
+// Check getBinaryRules() and construction of a break iterator from those rules.
+void RBBIAPITest::TestGetBinaryRules() {
+ UErrorCode status=U_ZERO_ERROR;
+ LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
+ if (U_FAILURE(status)) {
+ dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status));
+ return;
+ }
+ RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
+ if (rbbi == NULL) {
+ dataerrln("FAIL: RuleBasedBreakIterator is NULL");
+ return;
- // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...)
- //
- status = U_ZERO_ERROR;
- RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
- if (rb == NULL || U_FAILURE(status)) {
- dataerrln("Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s", u_errorName(status));
- } else {
- uint32_t length;
- const uint8_t *rules = rb->getBinaryRules(length);
- RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status);
- TEST_ASSERT(*rb == *rb2);
- UnicodeString words = "one two three ";
- rb2->setText(words);
- int wordCounter = 0;
- while (rb2->next() != UBRK_DONE) {
- wordCounter++;
- }
- TEST_ASSERT(wordCounter == 6);
+ // Check that the new line break iterator is nominally functional.
+ UnicodeString helloWorld("Hello, World!");
+ rbbi->setText(helloWorld);
+ int n = 0;
+ while (bi->next() != UBRK_DONE) {
+ ++n;
+ }
+ TEST_ASSERT(n == 2);
- status = U_ZERO_ERROR;
- RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1, status);
+ // Extract the binary rules as a uint8_t blob.
+ uint32_t ruleLength;
+ const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
+ TEST_ASSERT(ruleLength > 0);
+ TEST_ASSERT(binRules != NULL);
- delete rb;
- delete rb2;
- delete rb3;
+ // Clone the binary rules, and create a break iterator from that.
+ // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
+ uint8_t *clonedRules = new uint8_t[ruleLength];
+ memcpy(clonedRules, binRules, ruleLength);
+ RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
+ // Check that the cloned line break iterator is nominally alive.
+ clonedBI.setText(helloWorld);
+ n = 0;
+ while (clonedBI.next() != UBRK_DONE) {
+ ++n;
+ TEST_ASSERT(n == 2);
+ delete[] clonedRules;
static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
UErrorCode status = U_ZERO_ERROR;
LocalPointer<FilteredBreakIteratorBuilder> builder;
LocalPointer<BreakIterator> baseBI;
LocalPointer<BreakIterator> filteredBI;
+ LocalPointer<BreakIterator> frenchBI;
const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
const UnicodeString ABBR_MR("Mr.");
baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
- logln("Building new BI\n");
+ logln("Building new BI\n");
filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
- logln("Testing:");
- filteredBI->setText(text);
- TEST_ASSERT(20 == filteredBI->next()); // Mr.
- TEST_ASSERT(84 == filteredBI->next()); // recovered.
- TEST_ASSERT(90 == filteredBI->next()); // Capt.
- TEST_ASSERT(181 == filteredBI->next()); // Mr.
- TEST_ASSERT(278 == filteredBI->next()); // charge.
- filteredBI->first();
- prtbrks(filteredBI.getAlias(), text, *this);
+ if (U_SUCCESS(status)) {
+ logln("Testing:");
+ filteredBI->setText(text);
+ TEST_ASSERT(20 == filteredBI->next()); // Mr.
+ TEST_ASSERT(84 == filteredBI->next()); // recovered.
+ TEST_ASSERT(90 == filteredBI->next()); // Capt.
+ TEST_ASSERT(181 == filteredBI->next()); // Mr.
+ TEST_ASSERT(278 == filteredBI->next()); // charge.
+ filteredBI->first();
+ prtbrks(filteredBI.getAlias(), text, *this);
+ }
- logln("Adding Mr. as an exception\n");
- TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
- TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
- TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
- TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
- TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
+ if (U_SUCCESS(status)) {
+ logln("Adding Mr. as an exception\n");
+ TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
+ TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
+ TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
+ TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
+ TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
- logln("Constructing base BI\n");
- baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
+ logln("Constructing base BI\n");
+ baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
- logln("Building new BI\n");
- filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
+ logln("Building new BI\n");
+ filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
- logln("Testing:");
- filteredBI->setText(text);
- TEST_ASSERT(84 == filteredBI->next());
- TEST_ASSERT(90 == filteredBI->next());// Capt.
- TEST_ASSERT(278 == filteredBI->next());
- filteredBI->first();
- prtbrks(filteredBI.getAlias(), text, *this);
+ logln("Testing:");
+ filteredBI->setText(text);
+ TEST_ASSERT(84 == filteredBI->next());
+ TEST_ASSERT(90 == filteredBI->next());// Capt.
+ TEST_ASSERT(278 == filteredBI->next());
+ filteredBI->first();
+ prtbrks(filteredBI.getAlias(), text, *this);
+ }
- logln("Adding Mr. and Capt as an exception\n");
- TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
- TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
+ if (U_SUCCESS(status)) {
+ logln("Adding Mr. and Capt as an exception\n");
+ TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
+ TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
- logln("Constructing base BI\n");
- baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
+ logln("Constructing base BI\n");
+ baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
- logln("Building new BI\n");
- filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
+ logln("Building new BI\n");
+ filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
- logln("Testing:");
- filteredBI->setText(text);
- TEST_ASSERT(84 == filteredBI->next());
- TEST_ASSERT(278 == filteredBI->next());
- filteredBI->first();
- prtbrks(filteredBI.getAlias(), text, *this);
+ logln("Testing:");
+ filteredBI->setText(text);
+ TEST_ASSERT(84 == filteredBI->next());
+ TEST_ASSERT(278 == filteredBI->next());
+ filteredBI->first();
+ prtbrks(filteredBI.getAlias(), text, *this);
+ }
baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
- logln("unsuppressing 'Capt'");
- TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
+ if (U_SUCCESS(status)) {
+ logln("unsuppressing 'Capt'");
+ TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
- logln("Building new BI\n");
- filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
+ logln("Building new BI\n");
+ filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
- if(filteredBI.isValid()) {
- logln("Testing:");
- filteredBI->setText(text);
- TEST_ASSERT(84 == filteredBI->next());
- TEST_ASSERT(90 == filteredBI->next());
- TEST_ASSERT(278 == filteredBI->next());
- filteredBI->first();
- prtbrks(filteredBI.getAlias(), text, *this);
+ if(filteredBI.isValid()) {
+ logln("Testing:");
+ filteredBI->setText(text);
+ TEST_ASSERT(84 == filteredBI->next());
+ TEST_ASSERT(90 == filteredBI->next());
+ TEST_ASSERT(278 == filteredBI->next());
+ filteredBI->first();
+ prtbrks(filteredBI.getAlias(), text, *this);
+ }
baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
- logln("Building new BI\n");
- filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
+ if (U_SUCCESS(status)) {
+ logln("Building new BI\n");
+ filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
- if(filteredBI.isValid()) {
- logln("Testing:");
- filteredBI->setText(text);
- TEST_ASSERT(84 == filteredBI->next());
- TEST_ASSERT(278 == filteredBI->next());
- filteredBI->first();
- prtbrks(filteredBI.getAlias(), text, *this);
+ if(filteredBI.isValid()) {
+ logln("Testing:");
+ filteredBI->setText(text);
+ TEST_ASSERT(84 == filteredBI->next());
+ TEST_ASSERT(278 == filteredBI->next());
+ filteredBI->first();
+ prtbrks(filteredBI.getAlias(), text, *this);
+ }
-#if 0
// reenable once french is in
logln("Constructing French builder");
baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
- logln("Building new BI\n");
- filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
+ if (U_SUCCESS(status)) {
+ logln("Building new BI\n");
+ frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
+ }
- if(filteredBI.isValid()) {
+ if(frenchBI.isValid()) {
- filteredBI->setText(text);
- TEST_ASSERT(20 == filteredBI->next());
- TEST_ASSERT(84 == filteredBI->next());
+ UnicodeString frText("C'est MM. Duval.");
+ frenchBI->setText(frText);
+ TEST_ASSERT(16 == frenchBI->next());
+ TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
+ frenchBI->first();
+ prtbrks(frenchBI.getAlias(), frText, *this);
+ logln("Testing against English:");
+ filteredBI->setText(frText);
+ TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
+ TEST_ASSERT(16 == filteredBI->next());
+ TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
- prtbrks(filteredBI.getAlias(), text, *this);
- }
+ prtbrks(filteredBI.getAlias(), frText, *this);
+ // Verify ==
+ TEST_ASSERT_TRUE(*frenchBI == *frenchBI);
+ TEST_ASSERT_TRUE(*filteredBI != *frenchBI);
+ TEST_ASSERT_TRUE(*frenchBI != *filteredBI);
+ } else {
+ dataerrln("French BI: not valid.");
+ }
void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
- switch (index) {
- // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
- case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
- case 1: name = "TestgetRules"; if (exec) TestgetRules(); break;
- case 2: name = "TestHashCode"; if (exec) TestHashCode(); break;
- case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
- case 4: name = "TestIteration"; if (exec) TestIteration(); break;
- case 0: case 1: case 2: case 3: case 4: name = "skip"; break;
+ TESTCASE_AUTO(TestCloneEquals);
+ TESTCASE_AUTO(TestgetRules);
+ TESTCASE_AUTO(TestHashCode);
+ TESTCASE_AUTO(TestGetSetAdoptText);
+ TESTCASE_AUTO(TestIteration);
- case 5: name = "TestBuilder"; if (exec) TestBuilder(); break;
- case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
- case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break;
- case 8: name = "TestBug2190"; if (exec) TestBug2190(); break;
+ TESTCASE_AUTO(TestBuilder);
+ TESTCASE_AUTO(TestQuoteGrouping);
+ TESTCASE_AUTO(TestRuleStatusVec);
+ TESTCASE_AUTO(TestBug2190);
- case 9: name = "TestRegistration"; if (exec) TestRegistration(); break;
- case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
- case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
- case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
- case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break;
- case 9: case 10: case 11: case 12: case 13: name = "skip"; break;
+ TESTCASE_AUTO(TestRegistration);
+ TESTCASE_AUTO(TestBoilerPlate);
+ TESTCASE_AUTO(TestRuleStatus);
+ TESTCASE_AUTO(TestRoundtripRules);
+ TESTCASE_AUTO(TestGetBinaryRules);
- case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break;
- case 15: name = "TestFilteredBreakIteratorBuilder"; if(exec) TestFilteredBreakIteratorBuilder(); break;
- case 15: name="skip"; break;
+ TESTCASE_AUTO(TestRefreshInputText);
+ TESTCASE_AUTO(TestFilteredBreakIteratorBuilder);
- default: name = ""; break; // needed to end loop
- }
//Internal subroutines
logln(prettify("****selected \"" + selected + "\""));
-//RBBIWithProtectedFunctions class functions
-RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status)
- : RuleBasedBreakIterator(data, status)
-RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status)
- : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status)