2 *******************************************************************************
3 * Copyright (C) 1997-2015, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
9 * Modification History:
11 * Date Name Description
12 * 02/18/97 aliu Converted from OpenClass. Added DONE.
13 * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
14 *****************************************************************************************
17 // *****************************************************************************
18 // This file was generated from the java source file BreakIterator.java
19 // *****************************************************************************
21 #include "unicode/utypes.h"
23 #if !UCONFIG_NO_BREAK_ITERATION
25 #include "unicode/rbbi.h"
26 #include "unicode/brkiter.h"
27 #include "unicode/udata.h"
28 #include "unicode/ures.h"
29 #include "unicode/ustring.h"
39 #include "unicode/filteredbrk.h"
41 // *****************************************************************************
42 // class BreakIterator
43 // This class implements methods for finding the location of boundaries in text.
44 // Instances of BreakIterator maintain a current position and scan over text
45 // returning the index of characters where boundaries occur.
46 // *****************************************************************************
50 // -------------------------------------
53 BreakIterator::buildInstance(const Locale
& loc
, const char *type
, int32_t kind
, UErrorCode
&status
)
57 CharString actualLocale
;
59 const UChar
* brkfname
= NULL
;
60 UResourceBundle brkRulesStack
;
61 UResourceBundle brkNameStack
;
62 UResourceBundle
*brkRules
= &brkRulesStack
;
63 UResourceBundle
*brkName
= &brkNameStack
;
64 RuleBasedBreakIterator
*result
= NULL
;
66 if (U_FAILURE(status
))
69 ures_initStackObject(brkRules
);
70 ures_initStackObject(brkName
);
73 UResourceBundle
*b
= ures_openNoDefault(U_ICUDATA_BRKITR
, loc
.getName(), &status
);
75 // Get the "boundaries" array.
76 if (U_SUCCESS(status
)) {
77 brkRules
= ures_getByKeyWithFallback(b
, "boundaries", brkRules
, &status
);
78 // Get the string object naming the rules file
79 brkName
= ures_getByKeyWithFallback(brkRules
, type
, brkName
, &status
);
80 // Get the actual string
81 brkfname
= ures_getString(brkName
, &size
, &status
);
82 U_ASSERT((size_t)size
<sizeof(fnbuff
));
83 if ((size_t)size
>=sizeof(fnbuff
)) {
85 if (U_SUCCESS(status
)) {
86 status
= U_BUFFER_OVERFLOW_ERROR
;
90 // Use the string if we found it
91 if (U_SUCCESS(status
) && brkfname
) {
92 actualLocale
.append(ures_getLocaleInternal(brkName
, &status
), -1, status
);
94 UChar
* extStart
=u_strchr(brkfname
, 0x002e);
97 len
= (int)(extStart
-brkfname
);
98 u_UCharsToChars(extStart
+1, ext
, sizeof(ext
)); // nul terminates the buff
99 u_UCharsToChars(brkfname
, fnbuff
, len
);
101 fnbuff
[len
]=0; // nul terminate
105 ures_close(brkRules
);
108 UDataMemory
* file
= udata_open(U_ICUDATA_BRKITR
, ext
, fnbuff
, &status
);
109 if (U_FAILURE(status
)) {
114 // Create a RuleBasedBreakIterator
115 result
= new RuleBasedBreakIterator(file
, status
);
117 // If there is a result, set the valid locale and actual locale, and the kind
118 if (U_SUCCESS(status
) && result
!= NULL
) {
119 U_LOCALE_BASED(locBased
, *(BreakIterator
*)result
);
120 locBased
.setLocaleIDs(ures_getLocaleByType(b
, ULOC_VALID_LOCALE
, &status
),
121 actualLocale
.data());
122 result
->setBreakType(kind
);
127 if (U_FAILURE(status
) && result
!= NULL
) { // Sometimes redundant check, but simple
132 if (result
== NULL
) {
134 if (U_SUCCESS(status
)) {
135 status
= U_MEMORY_ALLOCATION_ERROR
;
142 // Creates a break iterator for word breaks.
143 BreakIterator
* U_EXPORT2
144 BreakIterator::createWordInstance(const Locale
& key
, UErrorCode
& status
)
146 return createInstance(key
, UBRK_WORD
, status
);
149 // -------------------------------------
151 // Creates a break iterator for line breaks.
152 BreakIterator
* U_EXPORT2
153 BreakIterator::createLineInstance(const Locale
& key
, UErrorCode
& status
)
155 return createInstance(key
, UBRK_LINE
, status
);
158 // -------------------------------------
160 // Creates a break iterator for character breaks.
161 BreakIterator
* U_EXPORT2
162 BreakIterator::createCharacterInstance(const Locale
& key
, UErrorCode
& status
)
164 return createInstance(key
, UBRK_CHARACTER
, status
);
167 // -------------------------------------
169 // Creates a break iterator for sentence breaks.
170 BreakIterator
* U_EXPORT2
171 BreakIterator::createSentenceInstance(const Locale
& key
, UErrorCode
& status
)
173 return createInstance(key
, UBRK_SENTENCE
, status
);
176 // -------------------------------------
178 // Creates a break iterator for title casing breaks.
179 BreakIterator
* U_EXPORT2
180 BreakIterator::createTitleInstance(const Locale
& key
, UErrorCode
& status
)
182 return createInstance(key
, UBRK_TITLE
, status
);
185 // -------------------------------------
187 // Gets all the available locales that has localized text boundary data.
188 const Locale
* U_EXPORT2
189 BreakIterator::getAvailableLocales(int32_t& count
)
191 return Locale::getAvailableLocales(count
);
194 // ------------------------------------------
196 // Default constructor and destructor
198 //-------------------------------------------
200 BreakIterator::BreakIterator()
202 *validLocale
= *actualLocale
= 0;
205 BreakIterator::~BreakIterator()
209 // ------------------------------------------
213 //-------------------------------------------
214 #if !UCONFIG_NO_SERVICE
216 // -------------------------------------
218 class ICUBreakIteratorFactory
: public ICUResourceBundleFactory
{
220 virtual ~ICUBreakIteratorFactory();
222 virtual UObject
* handleCreate(const Locale
& loc
, int32_t kind
, const ICUService
* /*service*/, UErrorCode
& status
) const {
223 return BreakIterator::makeInstance(loc
, kind
, status
);
227 ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {}
229 // -------------------------------------
231 class ICUBreakIteratorService
: public ICULocaleService
{
233 ICUBreakIteratorService()
234 : ICULocaleService(UNICODE_STRING("Break Iterator", 14))
236 UErrorCode status
= U_ZERO_ERROR
;
237 registerFactory(new ICUBreakIteratorFactory(), status
);
240 virtual ~ICUBreakIteratorService();
242 virtual UObject
* cloneInstance(UObject
* instance
) const {
243 return ((BreakIterator
*)instance
)->clone();
246 virtual UObject
* handleDefault(const ICUServiceKey
& key
, UnicodeString
* /*actualID*/, UErrorCode
& status
) const {
247 LocaleKey
& lkey
= (LocaleKey
&)key
;
248 int32_t kind
= lkey
.kind();
250 lkey
.currentLocale(loc
);
251 return BreakIterator::makeInstance(loc
, kind
, status
);
254 virtual UBool
isDefault() const {
255 return countFactories() == 1;
259 ICUBreakIteratorService::~ICUBreakIteratorService() {}
261 // -------------------------------------
263 // defined in ucln_cmn.h
266 static icu::UInitOnce gInitOnce
;
267 static icu::ICULocaleService
* gService
= NULL
;
272 * Release all static memory held by breakiterator.
275 static UBool U_CALLCONV
breakiterator_cleanup(void) {
276 #if !UCONFIG_NO_SERVICE
288 static void U_CALLCONV
290 gService
= new ICUBreakIteratorService();
291 ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR
, breakiterator_cleanup
);
294 static ICULocaleService
*
297 umtx_initOnce(gInitOnce
, &initService
);
302 // -------------------------------------
307 return !gInitOnce
.isReset() && getService() != NULL
;
310 // -------------------------------------
312 URegistryKey U_EXPORT2
313 BreakIterator::registerInstance(BreakIterator
* toAdopt
, const Locale
& locale
, UBreakIteratorType kind
, UErrorCode
& status
)
315 ICULocaleService
*service
= getService();
316 if (service
== NULL
) {
317 status
= U_MEMORY_ALLOCATION_ERROR
;
320 return service
->registerInstance(toAdopt
, locale
, kind
, status
);
323 // -------------------------------------
326 BreakIterator::unregister(URegistryKey key
, UErrorCode
& status
)
328 if (U_SUCCESS(status
)) {
330 return gService
->unregister(key
, status
);
332 status
= U_MEMORY_ALLOCATION_ERROR
;
337 // -------------------------------------
339 StringEnumeration
* U_EXPORT2
340 BreakIterator::getAvailableLocales(void)
342 ICULocaleService
*service
= getService();
343 if (service
== NULL
) {
346 return service
->getAvailableLocales();
348 #endif /* UCONFIG_NO_SERVICE */
350 // -------------------------------------
353 BreakIterator::createInstance(const Locale
& loc
, int32_t kind
, UErrorCode
& status
)
355 if (U_FAILURE(status
)) {
359 #if !UCONFIG_NO_SERVICE
361 Locale
actualLoc("");
362 BreakIterator
*result
= (BreakIterator
*)gService
->get(loc
, kind
, &actualLoc
, status
);
363 // TODO: The way the service code works in ICU 2.8 is that if
364 // there is a real registered break iterator, the actualLoc
365 // will be populated, but if the handleDefault path is taken
366 // (because nothing is registered that can handle the
367 // requested locale) then the actualLoc comes back empty. In
368 // that case, the returned object already has its actual/valid
369 // locale data populated (by makeInstance, which is what
370 // handleDefault calls), so we don't touch it. YES, A COMMENT
371 // THIS LONG is a sign of bad code -- so the action item is to
372 // revisit this in ICU 3.0 and clean it up/fix it/remove it.
373 if (U_SUCCESS(status
) && (result
!= NULL
) && *actualLoc
.getName() != 0) {
374 U_LOCALE_BASED(locBased
, *result
);
375 locBased
.setLocaleIDs(actualLoc
.getName(), actualLoc
.getName());
382 return makeInstance(loc
, kind
, status
);
386 // -------------------------------------
387 enum { kKeyValueLenMax
= 32 };
390 BreakIterator::makeInstance(const Locale
& loc
, int32_t kind
, UErrorCode
& status
)
393 if (U_FAILURE(status
)) {
396 char lbType
[kKeyValueLenMax
];
398 BreakIterator
*result
= NULL
;
401 result
= BreakIterator::buildInstance(loc
, "grapheme", kind
, status
);
404 result
= BreakIterator::buildInstance(loc
, "word", kind
, status
);
407 uprv_strcpy(lbType
, "line");
409 char lbKeyValue
[kKeyValueLenMax
] = {0};
410 UErrorCode kvStatus
= U_ZERO_ERROR
;
411 int32_t kLen
= loc
.getKeywordValue("lb", lbKeyValue
, kKeyValueLenMax
, kvStatus
);
412 if (U_SUCCESS(kvStatus
) && kLen
> 0 && (uprv_strcmp(lbKeyValue
,"strict")==0 || uprv_strcmp(lbKeyValue
,"normal")==0 || uprv_strcmp(lbKeyValue
,"loose")==0)) {
413 uprv_strcat(lbType
, "_");
414 uprv_strcat(lbType
, lbKeyValue
);
417 result
= BreakIterator::buildInstance(loc
, lbType
, kind
, status
);
420 result
= BreakIterator::buildInstance(loc
, "sentence", kind
, status
);
422 char ssKeyValue
[kKeyValueLenMax
] = {0};
423 UErrorCode kvStatus
= U_ZERO_ERROR
;
424 int32_t kLen
= loc
.getKeywordValue("ss", ssKeyValue
, kKeyValueLenMax
, kvStatus
);
425 if (U_SUCCESS(kvStatus
) && kLen
> 0 && uprv_strcmp(ssKeyValue
,"standard")==0) {
426 FilteredBreakIteratorBuilder
* fbiBuilder
= FilteredBreakIteratorBuilder::createInstance(loc
, kvStatus
);
427 if (U_SUCCESS(kvStatus
)) {
428 result
= fbiBuilder
->build(result
, status
);
435 result
= BreakIterator::buildInstance(loc
, "title", kind
, status
);
438 status
= U_ILLEGAL_ARGUMENT_ERROR
;
441 if (U_FAILURE(status
)) {
449 BreakIterator::getLocale(ULocDataLocaleType type
, UErrorCode
& status
) const {
450 U_LOCALE_BASED(locBased
, *this);
451 return locBased
.getLocale(type
, status
);
455 BreakIterator::getLocaleID(ULocDataLocaleType type
, UErrorCode
& status
) const {
456 U_LOCALE_BASED(locBased
, *this);
457 return locBased
.getLocaleID(type
, status
);
461 // This implementation of getRuleStatus is a do-nothing stub, here to
462 // provide a default implementation for any derived BreakIterator classes that
463 // do not implement it themselves.
464 int32_t BreakIterator::getRuleStatus() const {
468 // This implementation of getRuleStatusVec is a do-nothing stub, here to
469 // provide a default implementation for any derived BreakIterator classes that
470 // do not implement it themselves.
471 int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec
, int32_t capacity
, UErrorCode
&status
) {
472 if (U_FAILURE(status
)) {
476 status
= U_BUFFER_OVERFLOW_ERROR
;
483 BreakIterator::BreakIterator (const Locale
& valid
, const Locale
& actual
) {
484 U_LOCALE_BASED(locBased
, (*this));
485 locBased
.setLocaleIDs(valid
, actual
);
490 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */