2 ******************************************************************************* 
   3 * Copyright (C) 1997-2015, International Business Machines Corporation and 
   4 * others. All Rights Reserved. 
   5 ******************************************************************************* 
   9 * Modification History: 
  11 *   Date        Name        Description 
  12 *   02/18/97    aliu        Converted from OpenClass.  Added DONE. 
  13 *   01/13/2000  helena      Added UErrorCode parameter to createXXXInstance methods. 
  14 ***************************************************************************************** 
  17 // ***************************************************************************** 
  18 // This file was generated from the java source file BreakIterator.java 
  19 // ***************************************************************************** 
  21 #include "unicode/utypes.h" 
  23 #if !UCONFIG_NO_BREAK_ITERATION 
  25 #include "unicode/rbbi.h" 
  26 #include "unicode/brkiter.h" 
  27 #include "unicode/udata.h" 
  28 #include "unicode/ures.h" 
  29 #include "unicode/ustring.h" 
  39 #include "unicode/filteredbrk.h" 
  41 // ***************************************************************************** 
  42 // class BreakIterator 
  43 // This class implements methods for finding the location of boundaries in text. 
  44 // Instances of BreakIterator maintain a current position and scan over text 
  45 // returning the index of characters where boundaries occur. 
  46 // ***************************************************************************** 
  50 // ------------------------------------- 
  53 BreakIterator::buildInstance(const Locale
& loc
, const char *type
, int32_t kind
, UErrorCode 
&status
) 
  57     CharString actualLocale
; 
  59     const UChar
* brkfname 
= NULL
; 
  60     UResourceBundle brkRulesStack
; 
  61     UResourceBundle brkNameStack
; 
  62     UResourceBundle 
*brkRules 
= &brkRulesStack
; 
  63     UResourceBundle 
*brkName  
= &brkNameStack
; 
  64     RuleBasedBreakIterator 
*result 
= NULL
; 
  66     if (U_FAILURE(status
)) 
  69     ures_initStackObject(brkRules
); 
  70     ures_initStackObject(brkName
); 
  73     UResourceBundle 
*b 
= ures_openNoDefault(U_ICUDATA_BRKITR
, loc
.getName(), &status
); 
  75     // Get the "boundaries" array. 
  76     if (U_SUCCESS(status
)) { 
  77         brkRules 
= ures_getByKeyWithFallback(b
, "boundaries", brkRules
, &status
); 
  78         // Get the string object naming the rules file 
  79         brkName 
= ures_getByKeyWithFallback(brkRules
, type
, brkName
, &status
); 
  80         // Get the actual string 
  81         brkfname 
= ures_getString(brkName
, &size
, &status
); 
  82         U_ASSERT((size_t)size
<sizeof(fnbuff
)); 
  83         if ((size_t)size
>=sizeof(fnbuff
)) { 
  85             if (U_SUCCESS(status
)) { 
  86                 status 
= U_BUFFER_OVERFLOW_ERROR
; 
  90         // Use the string if we found it 
  91         if (U_SUCCESS(status
) && brkfname
) { 
  92             actualLocale
.append(ures_getLocaleInternal(brkName
, &status
), -1, status
); 
  94             UChar
* extStart
=u_strchr(brkfname
, 0x002e); 
  97                 len 
= (int)(extStart
-brkfname
); 
  98                 u_UCharsToChars(extStart
+1, ext
, sizeof(ext
)); // nul terminates the buff 
  99                 u_UCharsToChars(brkfname
, fnbuff
, len
); 
 101             fnbuff
[len
]=0; // nul terminate 
 105     ures_close(brkRules
); 
 108     UDataMemory
* file 
= udata_open(U_ICUDATA_BRKITR
, ext
, fnbuff
, &status
); 
 109     if (U_FAILURE(status
)) { 
 114     // Create a RuleBasedBreakIterator 
 115     result 
= new RuleBasedBreakIterator(file
, status
); 
 117     // If there is a result, set the valid locale and actual locale, and the kind 
 118     if (U_SUCCESS(status
) && result 
!= NULL
) { 
 119         U_LOCALE_BASED(locBased
, *(BreakIterator
*)result
); 
 120         locBased
.setLocaleIDs(ures_getLocaleByType(b
, ULOC_VALID_LOCALE
, &status
),  
 121                               actualLocale
.data()); 
 122         result
->setBreakType(kind
); 
 127     if (U_FAILURE(status
) && result 
!= NULL
) {  // Sometimes redundant check, but simple 
 132     if (result 
== NULL
) { 
 134         if (U_SUCCESS(status
)) { 
 135             status 
= U_MEMORY_ALLOCATION_ERROR
; 
 142 // Creates a break iterator for word breaks. 
 143 BreakIterator
* U_EXPORT2
 
 144 BreakIterator::createWordInstance(const Locale
& key
, UErrorCode
& status
) 
 146     return createInstance(key
, UBRK_WORD
, status
); 
 149 // ------------------------------------- 
 151 // Creates a break iterator  for line breaks. 
 152 BreakIterator
* U_EXPORT2
 
 153 BreakIterator::createLineInstance(const Locale
& key
, UErrorCode
& status
) 
 155     return createInstance(key
, UBRK_LINE
, status
); 
 158 // ------------------------------------- 
 160 // Creates a break iterator  for character breaks. 
 161 BreakIterator
* U_EXPORT2
 
 162 BreakIterator::createCharacterInstance(const Locale
& key
, UErrorCode
& status
) 
 164     return createInstance(key
, UBRK_CHARACTER
, status
); 
 167 // ------------------------------------- 
 169 // Creates a break iterator  for sentence breaks. 
 170 BreakIterator
* U_EXPORT2
 
 171 BreakIterator::createSentenceInstance(const Locale
& key
, UErrorCode
& status
) 
 173     return createInstance(key
, UBRK_SENTENCE
, status
); 
 176 // ------------------------------------- 
 178 // Creates a break iterator for title casing breaks. 
 179 BreakIterator
* U_EXPORT2
 
 180 BreakIterator::createTitleInstance(const Locale
& key
, UErrorCode
& status
) 
 182     return createInstance(key
, UBRK_TITLE
, status
); 
 185 // ------------------------------------- 
 187 // Gets all the available locales that has localized text boundary data. 
 188 const Locale
* U_EXPORT2
 
 189 BreakIterator::getAvailableLocales(int32_t& count
) 
 191     return Locale::getAvailableLocales(count
); 
 194 // ------------------------------------------ 
 196 // Default constructor and destructor 
 198 //------------------------------------------- 
 200 BreakIterator::BreakIterator() 
 202     *validLocale 
= *actualLocale 
= 0; 
 205 BreakIterator::~BreakIterator() 
 209 // ------------------------------------------ 
 213 //------------------------------------------- 
 214 #if !UCONFIG_NO_SERVICE 
 216 // ------------------------------------- 
 218 class ICUBreakIteratorFactory 
: public ICUResourceBundleFactory 
{ 
 220     virtual ~ICUBreakIteratorFactory(); 
 222     virtual UObject
* handleCreate(const Locale
& loc
, int32_t kind
, const ICUService
* /*service*/, UErrorCode
& status
) const { 
 223         return BreakIterator::makeInstance(loc
, kind
, status
); 
 227 ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {} 
 229 // ------------------------------------- 
 231 class ICUBreakIteratorService 
: public ICULocaleService 
{ 
 233     ICUBreakIteratorService() 
 234         : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) 
 236         UErrorCode status 
= U_ZERO_ERROR
; 
 237         registerFactory(new ICUBreakIteratorFactory(), status
); 
 240     virtual ~ICUBreakIteratorService(); 
 242     virtual UObject
* cloneInstance(UObject
* instance
) const { 
 243         return ((BreakIterator
*)instance
)->clone(); 
 246     virtual UObject
* handleDefault(const ICUServiceKey
& key
, UnicodeString
* /*actualID*/, UErrorCode
& status
) const { 
 247         LocaleKey
& lkey 
= (LocaleKey
&)key
; 
 248         int32_t kind 
= lkey
.kind(); 
 250         lkey
.currentLocale(loc
); 
 251         return BreakIterator::makeInstance(loc
, kind
, status
); 
 254     virtual UBool 
isDefault() const { 
 255         return countFactories() == 1; 
 259 ICUBreakIteratorService::~ICUBreakIteratorService() {} 
 261 // ------------------------------------- 
 263 // defined in ucln_cmn.h 
 266 static icu::UInitOnce gInitOnce
; 
 267 static icu::ICULocaleService
* gService 
= NULL
; 
 272  * Release all static memory held by breakiterator. 
 275 static UBool U_CALLCONV 
breakiterator_cleanup(void) { 
 276 #if !UCONFIG_NO_SERVICE 
 288 static void U_CALLCONV 
 
 290     gService 
= new ICUBreakIteratorService(); 
 291     ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR
, breakiterator_cleanup
); 
 294 static ICULocaleService
* 
 297     umtx_initOnce(gInitOnce
, &initService
); 
 302 // ------------------------------------- 
 307     return !gInitOnce
.isReset() && getService() != NULL
; 
 310 // ------------------------------------- 
 312 URegistryKey U_EXPORT2
 
 313 BreakIterator::registerInstance(BreakIterator
* toAdopt
, const Locale
& locale
, UBreakIteratorType kind
, UErrorCode
& status
) 
 315     ICULocaleService 
*service 
= getService(); 
 316     if (service 
== NULL
) { 
 317         status 
= U_MEMORY_ALLOCATION_ERROR
; 
 320     return service
->registerInstance(toAdopt
, locale
, kind
, status
); 
 323 // ------------------------------------- 
 326 BreakIterator::unregister(URegistryKey key
, UErrorCode
& status
) 
 328     if (U_SUCCESS(status
)) { 
 330             return gService
->unregister(key
, status
); 
 332         status 
= U_MEMORY_ALLOCATION_ERROR
; 
 337 // ------------------------------------- 
 339 StringEnumeration
* U_EXPORT2
 
 340 BreakIterator::getAvailableLocales(void) 
 342     ICULocaleService 
*service 
= getService(); 
 343     if (service 
== NULL
) { 
 346     return service
->getAvailableLocales(); 
 348 #endif /* UCONFIG_NO_SERVICE */ 
 350 // ------------------------------------- 
 353 BreakIterator::createInstance(const Locale
& loc
, int32_t kind
, UErrorCode
& status
) 
 355     if (U_FAILURE(status
)) { 
 359 #if !UCONFIG_NO_SERVICE 
 361         Locale 
actualLoc(""); 
 362         BreakIterator 
*result 
= (BreakIterator
*)gService
->get(loc
, kind
, &actualLoc
, status
); 
 363         // TODO: The way the service code works in ICU 2.8 is that if 
 364         // there is a real registered break iterator, the actualLoc 
 365         // will be populated, but if the handleDefault path is taken 
 366         // (because nothing is registered that can handle the 
 367         // requested locale) then the actualLoc comes back empty.  In 
 368         // that case, the returned object already has its actual/valid 
 369         // locale data populated (by makeInstance, which is what 
 370         // handleDefault calls), so we don't touch it.  YES, A COMMENT 
 371         // THIS LONG is a sign of bad code -- so the action item is to 
 372         // revisit this in ICU 3.0 and clean it up/fix it/remove it. 
 373         if (U_SUCCESS(status
) && (result 
!= NULL
) && *actualLoc
.getName() != 0) { 
 374             U_LOCALE_BASED(locBased
, *result
); 
 375             locBased
.setLocaleIDs(actualLoc
.getName(), actualLoc
.getName()); 
 382         return makeInstance(loc
, kind
, status
); 
 386 // ------------------------------------- 
 387 enum { kKeyValueLenMax 
= 32 }; 
 390 BreakIterator::makeInstance(const Locale
& loc
, int32_t kind
, UErrorCode
& status
) 
 393     if (U_FAILURE(status
)) { 
 396     char lbType
[kKeyValueLenMax
]; 
 398     BreakIterator 
*result 
= NULL
; 
 401         result 
= BreakIterator::buildInstance(loc
, "grapheme", kind
, status
); 
 404         result 
= BreakIterator::buildInstance(loc
, "word", kind
, status
); 
 407         uprv_strcpy(lbType
, "line"); 
 409             char lbKeyValue
[kKeyValueLenMax
] = {0}; 
 410             UErrorCode kvStatus 
= U_ZERO_ERROR
; 
 411             int32_t kLen 
= loc
.getKeywordValue("lb", lbKeyValue
, kKeyValueLenMax
, kvStatus
); 
 412             if (U_SUCCESS(kvStatus
) && kLen 
> 0 && (uprv_strcmp(lbKeyValue
,"strict")==0 || uprv_strcmp(lbKeyValue
,"normal")==0 || uprv_strcmp(lbKeyValue
,"loose")==0)) { 
 413                 uprv_strcat(lbType
, "_"); 
 414                 uprv_strcat(lbType
, lbKeyValue
); 
 417         result 
= BreakIterator::buildInstance(loc
, lbType
, kind
, status
); 
 420         result 
= BreakIterator::buildInstance(loc
, "sentence", kind
, status
); 
 422             char ssKeyValue
[kKeyValueLenMax
] = {0}; 
 423             UErrorCode kvStatus 
= U_ZERO_ERROR
; 
 424             int32_t kLen 
= loc
.getKeywordValue("ss", ssKeyValue
, kKeyValueLenMax
, kvStatus
); 
 425             if (U_SUCCESS(kvStatus
) && kLen 
> 0 && uprv_strcmp(ssKeyValue
,"standard")==0) { 
 426                 FilteredBreakIteratorBuilder
* fbiBuilder 
= FilteredBreakIteratorBuilder::createInstance(loc
, kvStatus
); 
 427                 if (U_SUCCESS(kvStatus
)) { 
 428                     result 
= fbiBuilder
->build(result
, status
); 
 435         result 
= BreakIterator::buildInstance(loc
, "title", kind
, status
); 
 438         status 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 441     if (U_FAILURE(status
)) { 
 449 BreakIterator::getLocale(ULocDataLocaleType type
, UErrorCode
& status
) const { 
 450     U_LOCALE_BASED(locBased
, *this); 
 451     return locBased
.getLocale(type
, status
); 
 455 BreakIterator::getLocaleID(ULocDataLocaleType type
, UErrorCode
& status
) const { 
 456     U_LOCALE_BASED(locBased
, *this); 
 457     return locBased
.getLocaleID(type
, status
); 
 461 // This implementation of getRuleStatus is a do-nothing stub, here to 
 462 // provide a default implementation for any derived BreakIterator classes that 
 463 // do not implement it themselves. 
 464 int32_t BreakIterator::getRuleStatus() const { 
 468 // This implementation of getRuleStatusVec is a do-nothing stub, here to 
 469 // provide a default implementation for any derived BreakIterator classes that 
 470 // do not implement it themselves. 
 471 int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec
, int32_t capacity
, UErrorCode 
&status
) { 
 472     if (U_FAILURE(status
)) { 
 476         status 
= U_BUFFER_OVERFLOW_ERROR
; 
 483 BreakIterator::BreakIterator (const Locale
& valid
, const Locale
& actual
) { 
 484   U_LOCALE_BASED(locBased
, (*this)); 
 485   locBased
.setLocaleIDs(valid
, actual
); 
 490 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */