1 // © 2016 and later: Unicode, Inc. and others. 
   2 // License & terms of use: http://www.unicode.org/copyright.html 
   4 ******************************************************************************* 
   5 * Copyright (C) 1997-2015, International Business Machines Corporation and 
   6 * others. All Rights Reserved. 
   7 ******************************************************************************* 
  11 * Modification History: 
  13 *   Date        Name        Description 
  14 *   02/18/97    aliu        Converted from OpenClass.  Added DONE. 
  15 *   01/13/2000  helena      Added UErrorCode parameter to createXXXInstance methods. 
  16 ***************************************************************************************** 
  19 // ***************************************************************************** 
  20 // This file was generated from the java source file BreakIterator.java 
  21 // ***************************************************************************** 
  23 #include "unicode/utypes.h" 
  25 #if !UCONFIG_NO_BREAK_ITERATION 
  27 #include "unicode/rbbi.h" 
  28 #include "unicode/brkiter.h" 
  29 #include "unicode/udata.h" 
  30 #include "unicode/ures.h" 
  31 #include "unicode/ustring.h" 
  32 #include "unicode/filteredbrk.h" 
  42 #include "unicode/filteredbrk.h" 
  44 // ***************************************************************************** 
  45 // class BreakIterator 
  46 // This class implements methods for finding the location of boundaries in text. 
  47 // Instances of BreakIterator maintain a current position and scan over text 
  48 // returning the index of characters where boundaries occur. 
  49 // ***************************************************************************** 
  53 // ------------------------------------- 
  56 BreakIterator::buildInstance(const Locale
& loc
, const char *type
, int32_t kind
, UErrorCode 
&status
) 
  60     CharString actualLocale
; 
  62     const UChar
* brkfname 
= NULL
; 
  63     UResourceBundle brkRulesStack
; 
  64     UResourceBundle brkNameStack
; 
  65     UResourceBundle 
*brkRules 
= &brkRulesStack
; 
  66     UResourceBundle 
*brkName  
= &brkNameStack
; 
  67     RuleBasedBreakIterator 
*result 
= NULL
; 
  69     if (U_FAILURE(status
)) 
  72     ures_initStackObject(brkRules
); 
  73     ures_initStackObject(brkName
); 
  76     UResourceBundle 
*b 
= ures_openNoDefault(U_ICUDATA_BRKITR
, loc
.getName(), &status
); 
  78     // Get the "boundaries" array. 
  79     if (U_SUCCESS(status
)) { 
  80         brkRules 
= ures_getByKeyWithFallback(b
, "boundaries", brkRules
, &status
); 
  81         // Get the string object naming the rules file 
  82         brkName 
= ures_getByKeyWithFallback(brkRules
, type
, brkName
, &status
); 
  83         // Get the actual string 
  84         brkfname 
= ures_getString(brkName
, &size
, &status
); 
  85         U_ASSERT((size_t)size
<sizeof(fnbuff
)); 
  86         if ((size_t)size
>=sizeof(fnbuff
)) { 
  88             if (U_SUCCESS(status
)) { 
  89                 status 
= U_BUFFER_OVERFLOW_ERROR
; 
  93         // Use the string if we found it 
  94         if (U_SUCCESS(status
) && brkfname
) { 
  95             actualLocale
.append(ures_getLocaleInternal(brkName
, &status
), -1, status
); 
  97             UChar
* extStart
=u_strchr(brkfname
, 0x002e); 
 100                 len 
= (int)(extStart
-brkfname
); 
 101                 u_UCharsToChars(extStart
+1, ext
, sizeof(ext
)); // nul terminates the buff 
 102                 u_UCharsToChars(brkfname
, fnbuff
, len
); 
 104             fnbuff
[len
]=0; // nul terminate 
 108     ures_close(brkRules
); 
 111     UDataMemory
* file 
= udata_open(U_ICUDATA_BRKITR
, ext
, fnbuff
, &status
); 
 112     if (U_FAILURE(status
)) { 
 117     // Create a RuleBasedBreakIterator 
 118     result 
= new RuleBasedBreakIterator(file
, status
); 
 120     // If there is a result, set the valid locale and actual locale, and the kind 
 121     if (U_SUCCESS(status
) && result 
!= NULL
) { 
 122         U_LOCALE_BASED(locBased
, *(BreakIterator
*)result
); 
 123         locBased
.setLocaleIDs(ures_getLocaleByType(b
, ULOC_VALID_LOCALE
, &status
),  
 124                               actualLocale
.data()); 
 125         result
->setBreakType(kind
); 
 130     if (U_FAILURE(status
) && result 
!= NULL
) {  // Sometimes redundant check, but simple 
 135     if (result 
== NULL
) { 
 137         if (U_SUCCESS(status
)) { 
 138             status 
= U_MEMORY_ALLOCATION_ERROR
; 
 145 // Creates a break iterator for word breaks. 
 146 BreakIterator
* U_EXPORT2
 
 147 BreakIterator::createWordInstance(const Locale
& key
, UErrorCode
& status
) 
 149     return createInstance(key
, UBRK_WORD
, status
); 
 152 // ------------------------------------- 
 154 // Creates a break iterator  for line breaks. 
 155 BreakIterator
* U_EXPORT2
 
 156 BreakIterator::createLineInstance(const Locale
& key
, UErrorCode
& status
) 
 158     return createInstance(key
, UBRK_LINE
, status
); 
 161 // ------------------------------------- 
 163 // Creates a break iterator  for character breaks. 
 164 BreakIterator
* U_EXPORT2
 
 165 BreakIterator::createCharacterInstance(const Locale
& key
, UErrorCode
& status
) 
 167     return createInstance(key
, UBRK_CHARACTER
, status
); 
 170 // ------------------------------------- 
 172 // Creates a break iterator  for sentence breaks. 
 173 BreakIterator
* U_EXPORT2
 
 174 BreakIterator::createSentenceInstance(const Locale
& key
, UErrorCode
& status
) 
 176     return createInstance(key
, UBRK_SENTENCE
, status
); 
 179 // ------------------------------------- 
 181 // Creates a break iterator for title casing breaks. 
 182 BreakIterator
* U_EXPORT2
 
 183 BreakIterator::createTitleInstance(const Locale
& key
, UErrorCode
& status
) 
 185     return createInstance(key
, UBRK_TITLE
, status
); 
 188 // ------------------------------------- 
 190 // Gets all the available locales that has localized text boundary data. 
 191 const Locale
* U_EXPORT2
 
 192 BreakIterator::getAvailableLocales(int32_t& count
) 
 194     return Locale::getAvailableLocales(count
); 
 197 // ------------------------------------------ 
 199 // Default constructor and destructor 
 201 //------------------------------------------- 
 203 BreakIterator::BreakIterator() 
 206     *validLocale 
= *actualLocale 
= 0; 
 209 BreakIterator::~BreakIterator() 
 213 // ------------------------------------------ 
 217 //------------------------------------------- 
 218 #if !UCONFIG_NO_SERVICE 
 220 // ------------------------------------- 
 222 class ICUBreakIteratorFactory 
: public ICUResourceBundleFactory 
{ 
 224     virtual ~ICUBreakIteratorFactory(); 
 226     virtual UObject
* handleCreate(const Locale
& loc
, int32_t kind
, const ICUService
* /*service*/, UErrorCode
& status
) const { 
 227         return BreakIterator::makeInstance(loc
, kind
, status
); 
 231 ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {} 
 233 // ------------------------------------- 
 235 class ICUBreakIteratorService 
: public ICULocaleService 
{ 
 237     ICUBreakIteratorService() 
 238         : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) 
 240         UErrorCode status 
= U_ZERO_ERROR
; 
 241         registerFactory(new ICUBreakIteratorFactory(), status
); 
 244     virtual ~ICUBreakIteratorService(); 
 246     virtual UObject
* cloneInstance(UObject
* instance
) const { 
 247         return ((BreakIterator
*)instance
)->clone(); 
 250     virtual UObject
* handleDefault(const ICUServiceKey
& key
, UnicodeString
* /*actualID*/, UErrorCode
& status
) const { 
 251         LocaleKey
& lkey 
= (LocaleKey
&)key
; 
 252         int32_t kind 
= lkey
.kind(); 
 254         lkey
.currentLocale(loc
); 
 255         return BreakIterator::makeInstance(loc
, kind
, status
); 
 258     virtual UBool 
isDefault() const { 
 259         return countFactories() == 1; 
 263 ICUBreakIteratorService::~ICUBreakIteratorService() {} 
 265 // ------------------------------------- 
 267 // defined in ucln_cmn.h 
 270 static icu::UInitOnce gInitOnce
; 
 271 static icu::ICULocaleService
* gService 
= NULL
; 
 276  * Release all static memory held by breakiterator. 
 279 static UBool U_CALLCONV 
breakiterator_cleanup(void) { 
 280 #if !UCONFIG_NO_SERVICE 
 292 static void U_CALLCONV 
 
 294     gService 
= new ICUBreakIteratorService(); 
 295     ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR
, breakiterator_cleanup
); 
 298 static ICULocaleService
* 
 301     umtx_initOnce(gInitOnce
, &initService
); 
 306 // ------------------------------------- 
 311     return !gInitOnce
.isReset() && getService() != NULL
; 
 314 // ------------------------------------- 
 316 URegistryKey U_EXPORT2
 
 317 BreakIterator::registerInstance(BreakIterator
* toAdopt
, const Locale
& locale
, UBreakIteratorType kind
, UErrorCode
& status
) 
 319     ICULocaleService 
*service 
= getService(); 
 320     if (service 
== NULL
) { 
 321         status 
= U_MEMORY_ALLOCATION_ERROR
; 
 324     return service
->registerInstance(toAdopt
, locale
, kind
, status
); 
 327 // ------------------------------------- 
 330 BreakIterator::unregister(URegistryKey key
, UErrorCode
& status
) 
 332     if (U_SUCCESS(status
)) { 
 334             return gService
->unregister(key
, status
); 
 336         status 
= U_MEMORY_ALLOCATION_ERROR
; 
 341 // ------------------------------------- 
 343 StringEnumeration
* U_EXPORT2
 
 344 BreakIterator::getAvailableLocales(void) 
 346     ICULocaleService 
*service 
= getService(); 
 347     if (service 
== NULL
) { 
 350     return service
->getAvailableLocales(); 
 352 #endif /* UCONFIG_NO_SERVICE */ 
 354 // ------------------------------------- 
 357 BreakIterator::createInstance(const Locale
& loc
, int32_t kind
, UErrorCode
& status
) 
 359     if (U_FAILURE(status
)) { 
 363 #if !UCONFIG_NO_SERVICE 
 365         Locale 
actualLoc(""); 
 366         BreakIterator 
*result 
= (BreakIterator
*)gService
->get(loc
, kind
, &actualLoc
, status
); 
 367         // TODO: The way the service code works in ICU 2.8 is that if 
 368         // there is a real registered break iterator, the actualLoc 
 369         // will be populated, but if the handleDefault path is taken 
 370         // (because nothing is registered that can handle the 
 371         // requested locale) then the actualLoc comes back empty.  In 
 372         // that case, the returned object already has its actual/valid 
 373         // locale data populated (by makeInstance, which is what 
 374         // handleDefault calls), so we don't touch it.  YES, A COMMENT 
 375         // THIS LONG is a sign of bad code -- so the action item is to 
 376         // revisit this in ICU 3.0 and clean it up/fix it/remove it. 
 377         if (U_SUCCESS(status
) && (result 
!= NULL
) && *actualLoc
.getName() != 0) { 
 378             U_LOCALE_BASED(locBased
, *result
); 
 379             locBased
.setLocaleIDs(actualLoc
.getName(), actualLoc
.getName()); 
 386         return makeInstance(loc
, kind
, status
); 
 390 // ------------------------------------- 
 391 enum { kKeyValueLenMax 
= 32 }; 
 394 BreakIterator::makeInstance(const Locale
& loc
, int32_t kind
, UErrorCode
& status
) 
 397     if (U_FAILURE(status
)) { 
 400     char lbType
[kKeyValueLenMax
]; 
 402     BreakIterator 
*result 
= NULL
; 
 405         result 
= BreakIterator::buildInstance(loc
, "grapheme", kind
, status
); 
 408         result 
= BreakIterator::buildInstance(loc
, "word", kind
, status
); 
 411         uprv_strcpy(lbType
, "line"); 
 413             char lbKeyValue
[kKeyValueLenMax
] = {0}; 
 414             UErrorCode kvStatus 
= U_ZERO_ERROR
; 
 415             int32_t kLen 
= loc
.getKeywordValue("lb", lbKeyValue
, kKeyValueLenMax
, kvStatus
); 
 416             if (U_SUCCESS(kvStatus
) && kLen 
> 0 && (uprv_strcmp(lbKeyValue
,"strict")==0 || uprv_strcmp(lbKeyValue
,"normal")==0 || uprv_strcmp(lbKeyValue
,"loose")==0)) { 
 417                 uprv_strcat(lbType
, "_"); 
 418                 uprv_strcat(lbType
, lbKeyValue
); 
 421         result 
= BreakIterator::buildInstance(loc
, lbType
, kind
, status
); 
 422         if (U_SUCCESS(status
) && result 
!= NULL
) { 
 423             char lwKeyValue
[kKeyValueLenMax
] = {0}; 
 424             UErrorCode kvStatus 
= U_ZERO_ERROR
; 
 425             int32_t kLen 
= loc
.getKeywordValue("lw", lwKeyValue
, kKeyValueLenMax
, kvStatus
); 
 426             result
->setKeepAll(U_SUCCESS(kvStatus
) && kLen 
> 0 && uprv_strcmp(lwKeyValue
,"keepall")==0); 
 430         result 
= BreakIterator::buildInstance(loc
, "sentence", kind
, status
); 
 431 #if !UCONFIG_NO_FILTERED_BREAK_ITERATION 
 433             char ssKeyValue
[kKeyValueLenMax
] = {0}; 
 434             UErrorCode kvStatus 
= U_ZERO_ERROR
; 
 435             int32_t kLen 
= loc
.getKeywordValue("ss", ssKeyValue
, kKeyValueLenMax
, kvStatus
); 
 436             if (U_SUCCESS(kvStatus
) && kLen 
> 0 && uprv_strcmp(ssKeyValue
,"standard")==0) { 
 437                 FilteredBreakIteratorBuilder
* fbiBuilder 
= FilteredBreakIteratorBuilder::createInstance(loc
, kvStatus
); 
 438                 if (U_SUCCESS(kvStatus
)) { 
 439                     result 
= fbiBuilder
->build(result
, status
); 
 447         result 
= BreakIterator::buildInstance(loc
, "title", kind
, status
); 
 450         status 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 453     if (U_FAILURE(status
)) { 
 461 BreakIterator::getLocale(ULocDataLocaleType type
, UErrorCode
& status
) const { 
 462     U_LOCALE_BASED(locBased
, *this); 
 463     return locBased
.getLocale(type
, status
); 
 467 BreakIterator::getLocaleID(ULocDataLocaleType type
, UErrorCode
& status
) const { 
 468     U_LOCALE_BASED(locBased
, *this); 
 469     return locBased
.getLocaleID(type
, status
); 
 473 // This implementation of getRuleStatus is a do-nothing stub, here to 
 474 // provide a default implementation for any derived BreakIterator classes that 
 475 // do not implement it themselves. 
 476 int32_t BreakIterator::getRuleStatus() const { 
 480 // This implementation of getRuleStatusVec is a do-nothing stub, here to 
 481 // provide a default implementation for any derived BreakIterator classes that 
 482 // do not implement it themselves. 
 483 int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec
, int32_t capacity
, UErrorCode 
&status
) { 
 484     if (U_FAILURE(status
)) { 
 488         status 
= U_BUFFER_OVERFLOW_ERROR
; 
 495 BreakIterator::BreakIterator (const Locale
& valid
, const Locale
& actual
) { 
 496   U_LOCALE_BASED(locBased
, (*this)); 
 497   locBased
.setLocaleIDs(valid
, actual
); 
 502 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */