]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | /* |
4 | ******************************************************************************* | |
f3c0d7a5 | 5 | * Copyright (C) 1997-2015, International Business Machines Corporation and |
729e4ab9 | 6 | * others. All Rights Reserved. |
b75a7d8f A |
7 | ******************************************************************************* |
8 | * | |
57a6839d | 9 | * File brkiter.cpp |
b75a7d8f A |
10 | * |
11 | * Modification History: | |
12 | * | |
13 | * Date Name Description | |
14 | * 02/18/97 aliu Converted from OpenClass. Added DONE. | |
15 | * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. | |
16 | ***************************************************************************************** | |
17 | */ | |
18 | ||
19 | // ***************************************************************************** | |
20 | // This file was generated from the java source file BreakIterator.java | |
21 | // ***************************************************************************** | |
22 | ||
23 | #include "unicode/utypes.h" | |
24 | ||
25 | #if !UCONFIG_NO_BREAK_ITERATION | |
26 | ||
73c04bcf | 27 | #include "unicode/rbbi.h" |
b75a7d8f A |
28 | #include "unicode/brkiter.h" |
29 | #include "unicode/udata.h" | |
374ca955 | 30 | #include "unicode/ures.h" |
73c04bcf | 31 | #include "unicode/ustring.h" |
2ca993e8 | 32 | #include "unicode/filteredbrk.h" |
374ca955 | 33 | #include "ucln_cmn.h" |
b75a7d8f | 34 | #include "cstring.h" |
46f4442e | 35 | #include "umutex.h" |
73c04bcf | 36 | #include "servloc.h" |
374ca955 A |
37 | #include "locbased.h" |
38 | #include "uresimp.h" | |
73c04bcf A |
39 | #include "uassert.h" |
40 | #include "ubrkimpl.h" | |
b331163b A |
41 | #include "charstr.h" |
42 | #include "unicode/filteredbrk.h" | |
b75a7d8f A |
43 | |
44 | // ***************************************************************************** | |
45 | // class BreakIterator | |
46 | // This class implements methods for finding the location of boundaries in text. | |
47 | // Instances of BreakIterator maintain a current position and scan over text | |
48 | // returning the index of characters where boundaries occur. | |
49 | // ***************************************************************************** | |
50 | ||
51 | U_NAMESPACE_BEGIN | |
52 | ||
b75a7d8f A |
53 | // ------------------------------------- |
54 | ||
b75a7d8f | 55 | BreakIterator* |
73c04bcf | 56 | BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status) |
b75a7d8f | 57 | { |
374ca955 | 58 | char fnbuff[256]; |
73c04bcf | 59 | char ext[4]={'\0'}; |
b331163b | 60 | CharString actualLocale; |
374ca955 A |
61 | int32_t size; |
62 | const UChar* brkfname = NULL; | |
73c04bcf A |
63 | UResourceBundle brkRulesStack; |
64 | UResourceBundle brkNameStack; | |
65 | UResourceBundle *brkRules = &brkRulesStack; | |
66 | UResourceBundle *brkName = &brkNameStack; | |
67 | RuleBasedBreakIterator *result = NULL; | |
46f4442e | 68 | |
b75a7d8f A |
69 | if (U_FAILURE(status)) |
70 | return NULL; | |
71 | ||
374ca955 A |
72 | ures_initStackObject(brkRules); |
73 | ures_initStackObject(brkName); | |
74 | ||
75 | // Get the locale | |
b331163b | 76 | UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status); |
374ca955 A |
77 | |
78 | // Get the "boundaries" array. | |
79 | if (U_SUCCESS(status)) { | |
80 | brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status); | |
81 | // Get the string object naming the rules file | |
82 | brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status); | |
83 | // Get the actual string | |
84 | brkfname = ures_getString(brkName, &size, &status); | |
73c04bcf A |
85 | U_ASSERT((size_t)size<sizeof(fnbuff)); |
86 | if ((size_t)size>=sizeof(fnbuff)) { | |
87 | size=0; | |
88 | if (U_SUCCESS(status)) { | |
89 | status = U_BUFFER_OVERFLOW_ERROR; | |
90 | } | |
91 | } | |
374ca955 A |
92 | |
93 | // Use the string if we found it | |
94 | if (U_SUCCESS(status) && brkfname) { | |
b331163b | 95 | actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status); |
46f4442e | 96 | |
73c04bcf A |
97 | UChar* extStart=u_strchr(brkfname, 0x002e); |
98 | int len = 0; | |
99 | if(extStart!=NULL){ | |
729e4ab9 | 100 | len = (int)(extStart-brkfname); |
73c04bcf A |
101 | u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff |
102 | u_UCharsToChars(brkfname, fnbuff, len); | |
103 | } | |
104 | fnbuff[len]=0; // nul terminate | |
374ca955 | 105 | } |
b75a7d8f A |
106 | } |
107 | ||
374ca955 A |
108 | ures_close(brkRules); |
109 | ures_close(brkName); | |
46f4442e | 110 | |
73c04bcf | 111 | UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status); |
b75a7d8f | 112 | if (U_FAILURE(status)) { |
374ca955 | 113 | ures_close(b); |
b75a7d8f A |
114 | return NULL; |
115 | } | |
b75a7d8f | 116 | |
73c04bcf A |
117 | // Create a RuleBasedBreakIterator |
118 | result = new RuleBasedBreakIterator(file, status); | |
374ca955 | 119 | |
73c04bcf | 120 | // If there is a result, set the valid locale and actual locale, and the kind |
374ca955 | 121 | if (U_SUCCESS(status) && result != NULL) { |
46f4442e | 122 | U_LOCALE_BASED(locBased, *(BreakIterator*)result); |
b331163b A |
123 | locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), |
124 | actualLocale.data()); | |
73c04bcf | 125 | result->setBreakType(kind); |
b75a7d8f | 126 | } |
374ca955 A |
127 | |
128 | ures_close(b); | |
46f4442e | 129 | |
374ca955 | 130 | if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple |
b75a7d8f | 131 | delete result; |
374ca955 A |
132 | return NULL; |
133 | } | |
134 | ||
135 | if (result == NULL) { | |
136 | udata_close(file); | |
137 | if (U_SUCCESS(status)) { | |
138 | status = U_MEMORY_ALLOCATION_ERROR; | |
139 | } | |
b75a7d8f A |
140 | } |
141 | ||
142 | return result; | |
143 | } | |
144 | ||
374ca955 A |
145 | // Creates a break iterator for word breaks. |
146 | BreakIterator* U_EXPORT2 | |
147 | BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) | |
148 | { | |
149 | return createInstance(key, UBRK_WORD, status); | |
150 | } | |
151 | ||
b75a7d8f A |
152 | // ------------------------------------- |
153 | ||
154 | // Creates a break iterator for line breaks. | |
374ca955 | 155 | BreakIterator* U_EXPORT2 |
b75a7d8f A |
156 | BreakIterator::createLineInstance(const Locale& key, UErrorCode& status) |
157 | { | |
374ca955 | 158 | return createInstance(key, UBRK_LINE, status); |
b75a7d8f A |
159 | } |
160 | ||
161 | // ------------------------------------- | |
162 | ||
163 | // Creates a break iterator for character breaks. | |
374ca955 | 164 | BreakIterator* U_EXPORT2 |
b75a7d8f A |
165 | BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status) |
166 | { | |
374ca955 | 167 | return createInstance(key, UBRK_CHARACTER, status); |
b75a7d8f A |
168 | } |
169 | ||
170 | // ------------------------------------- | |
171 | ||
172 | // Creates a break iterator for sentence breaks. | |
374ca955 | 173 | BreakIterator* U_EXPORT2 |
b75a7d8f A |
174 | BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status) |
175 | { | |
374ca955 | 176 | return createInstance(key, UBRK_SENTENCE, status); |
b75a7d8f A |
177 | } |
178 | ||
179 | // ------------------------------------- | |
180 | ||
181 | // Creates a break iterator for title casing breaks. | |
374ca955 | 182 | BreakIterator* U_EXPORT2 |
b75a7d8f A |
183 | BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status) |
184 | { | |
374ca955 | 185 | return createInstance(key, UBRK_TITLE, status); |
b75a7d8f A |
186 | } |
187 | ||
188 | // ------------------------------------- | |
189 | ||
190 | // Gets all the available locales that has localized text boundary data. | |
374ca955 | 191 | const Locale* U_EXPORT2 |
b75a7d8f A |
192 | BreakIterator::getAvailableLocales(int32_t& count) |
193 | { | |
194 | return Locale::getAvailableLocales(count); | |
195 | } | |
196 | ||
b75a7d8f A |
197 | // ------------------------------------------ |
198 | // | |
199 | // Default constructor and destructor | |
200 | // | |
201 | //------------------------------------------- | |
202 | ||
203 | BreakIterator::BreakIterator() | |
2ca993e8 | 204 | : fKeepAll(FALSE) |
b75a7d8f | 205 | { |
374ca955 | 206 | *validLocale = *actualLocale = 0; |
b75a7d8f A |
207 | } |
208 | ||
209 | BreakIterator::~BreakIterator() | |
210 | { | |
211 | } | |
212 | ||
213 | // ------------------------------------------ | |
214 | // | |
215 | // Registration | |
216 | // | |
217 | //------------------------------------------- | |
374ca955 | 218 | #if !UCONFIG_NO_SERVICE |
b75a7d8f | 219 | |
b75a7d8f A |
220 | // ------------------------------------- |
221 | ||
222 | class ICUBreakIteratorFactory : public ICUResourceBundleFactory { | |
4388f060 A |
223 | public: |
224 | virtual ~ICUBreakIteratorFactory(); | |
b75a7d8f | 225 | protected: |
374ca955 A |
226 | virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const { |
227 | return BreakIterator::makeInstance(loc, kind, status); | |
228 | } | |
b75a7d8f A |
229 | }; |
230 | ||
4388f060 A |
231 | ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {} |
232 | ||
b75a7d8f A |
233 | // ------------------------------------- |
234 | ||
235 | class ICUBreakIteratorService : public ICULocaleService { | |
236 | public: | |
374ca955 A |
237 | ICUBreakIteratorService() |
238 | : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) | |
239 | { | |
240 | UErrorCode status = U_ZERO_ERROR; | |
241 | registerFactory(new ICUBreakIteratorFactory(), status); | |
242 | } | |
46f4442e | 243 | |
4388f060 A |
244 | virtual ~ICUBreakIteratorService(); |
245 | ||
374ca955 A |
246 | virtual UObject* cloneInstance(UObject* instance) const { |
247 | return ((BreakIterator*)instance)->clone(); | |
248 | } | |
46f4442e | 249 | |
374ca955 A |
250 | virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { |
251 | LocaleKey& lkey = (LocaleKey&)key; | |
252 | int32_t kind = lkey.kind(); | |
253 | Locale loc; | |
254 | lkey.currentLocale(loc); | |
255 | return BreakIterator::makeInstance(loc, kind, status); | |
256 | } | |
46f4442e | 257 | |
374ca955 A |
258 | virtual UBool isDefault() const { |
259 | return countFactories() == 1; | |
260 | } | |
b75a7d8f A |
261 | }; |
262 | ||
4388f060 A |
263 | ICUBreakIteratorService::~ICUBreakIteratorService() {} |
264 | ||
b75a7d8f A |
265 | // ------------------------------------- |
266 | ||
374ca955 | 267 | // defined in ucln_cmn.h |
57a6839d | 268 | U_NAMESPACE_END |
374ca955 | 269 | |
57a6839d | 270 | static icu::UInitOnce gInitOnce; |
4388f060 | 271 | static icu::ICULocaleService* gService = NULL; |
46f4442e | 272 | |
57a6839d A |
273 | |
274 | ||
374ca955 | 275 | /** |
46f4442e | 276 | * Release all static memory held by breakiterator. |
374ca955 A |
277 | */ |
278 | U_CDECL_BEGIN | |
279 | static UBool U_CALLCONV breakiterator_cleanup(void) { | |
280 | #if !UCONFIG_NO_SERVICE | |
281 | if (gService) { | |
282 | delete gService; | |
283 | gService = NULL; | |
284 | } | |
57a6839d | 285 | gInitOnce.reset(); |
374ca955 A |
286 | #endif |
287 | return TRUE; | |
288 | } | |
289 | U_CDECL_END | |
290 | U_NAMESPACE_BEGIN | |
291 | ||
57a6839d A |
292 | static void U_CALLCONV |
293 | initService(void) { | |
294 | gService = new ICUBreakIteratorService(); | |
295 | ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup); | |
296 | } | |
297 | ||
46f4442e | 298 | static ICULocaleService* |
b75a7d8f A |
299 | getService(void) |
300 | { | |
57a6839d | 301 | umtx_initOnce(gInitOnce, &initService); |
b75a7d8f A |
302 | return gService; |
303 | } | |
304 | ||
57a6839d | 305 | |
b75a7d8f A |
306 | // ------------------------------------- |
307 | ||
46f4442e A |
308 | static inline UBool |
309 | hasService(void) | |
b75a7d8f | 310 | { |
57a6839d | 311 | return !gInitOnce.isReset() && getService() != NULL; |
b75a7d8f A |
312 | } |
313 | ||
314 | // ------------------------------------- | |
315 | ||
374ca955 | 316 | URegistryKey U_EXPORT2 |
46f4442e | 317 | BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) |
b75a7d8f | 318 | { |
46f4442e A |
319 | ICULocaleService *service = getService(); |
320 | if (service == NULL) { | |
321 | status = U_MEMORY_ALLOCATION_ERROR; | |
322 | return NULL; | |
323 | } | |
324 | return service->registerInstance(toAdopt, locale, kind, status); | |
b75a7d8f A |
325 | } |
326 | ||
327 | // ------------------------------------- | |
328 | ||
374ca955 | 329 | UBool U_EXPORT2 |
46f4442e | 330 | BreakIterator::unregister(URegistryKey key, UErrorCode& status) |
b75a7d8f A |
331 | { |
332 | if (U_SUCCESS(status)) { | |
333 | if (hasService()) { | |
334 | return gService->unregister(key, status); | |
335 | } | |
46f4442e | 336 | status = U_MEMORY_ALLOCATION_ERROR; |
b75a7d8f A |
337 | } |
338 | return FALSE; | |
339 | } | |
340 | ||
341 | // ------------------------------------- | |
342 | ||
374ca955 | 343 | StringEnumeration* U_EXPORT2 |
b75a7d8f A |
344 | BreakIterator::getAvailableLocales(void) |
345 | { | |
46f4442e A |
346 | ICULocaleService *service = getService(); |
347 | if (service == NULL) { | |
348 | return NULL; | |
349 | } | |
350 | return service->getAvailableLocales(); | |
b75a7d8f | 351 | } |
374ca955 A |
352 | #endif /* UCONFIG_NO_SERVICE */ |
353 | ||
354 | // ------------------------------------- | |
355 | ||
356 | BreakIterator* | |
73c04bcf | 357 | BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status) |
374ca955 A |
358 | { |
359 | if (U_FAILURE(status)) { | |
360 | return NULL; | |
361 | } | |
46f4442e | 362 | |
374ca955 A |
363 | #if !UCONFIG_NO_SERVICE |
364 | if (hasService()) { | |
73c04bcf | 365 | Locale actualLoc(""); |
374ca955 A |
366 | BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status); |
367 | // TODO: The way the service code works in ICU 2.8 is that if | |
368 | // there is a real registered break iterator, the actualLoc | |
369 | // will be populated, but if the handleDefault path is taken | |
370 | // (because nothing is registered that can handle the | |
371 | // requested locale) then the actualLoc comes back empty. In | |
372 | // that case, the returned object already has its actual/valid | |
373 | // locale data populated (by makeInstance, which is what | |
374 | // handleDefault calls), so we don't touch it. YES, A COMMENT | |
375 | // THIS LONG is a sign of bad code -- so the action item is to | |
376 | // revisit this in ICU 3.0 and clean it up/fix it/remove it. | |
377 | if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) { | |
378 | U_LOCALE_BASED(locBased, *result); | |
379 | locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); | |
380 | } | |
381 | return result; | |
382 | } | |
383 | else | |
384 | #endif | |
385 | { | |
386 | return makeInstance(loc, kind, status); | |
387 | } | |
388 | } | |
b75a7d8f A |
389 | |
390 | // ------------------------------------- | |
b331163b | 391 | enum { kKeyValueLenMax = 32 }; |
b75a7d8f | 392 | |
46f4442e | 393 | BreakIterator* |
b75a7d8f A |
394 | BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) |
395 | { | |
374ca955 A |
396 | |
397 | if (U_FAILURE(status)) { | |
398 | return NULL; | |
399 | } | |
b331163b | 400 | char lbType[kKeyValueLenMax]; |
374ca955 A |
401 | |
402 | BreakIterator *result = NULL; | |
b75a7d8f | 403 | switch (kind) { |
46f4442e | 404 | case UBRK_CHARACTER: |
73c04bcf | 405 | result = BreakIterator::buildInstance(loc, "grapheme", kind, status); |
374ca955 A |
406 | break; |
407 | case UBRK_WORD: | |
73c04bcf | 408 | result = BreakIterator::buildInstance(loc, "word", kind, status); |
374ca955 A |
409 | break; |
410 | case UBRK_LINE: | |
08b89b0a A |
411 | uprv_strcpy(lbType, "line"); |
412 | { | |
b331163b | 413 | char lbKeyValue[kKeyValueLenMax] = {0}; |
08b89b0a | 414 | UErrorCode kvStatus = U_ZERO_ERROR; |
b331163b A |
415 | int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus); |
416 | if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) { | |
08b89b0a A |
417 | uprv_strcat(lbType, "_"); |
418 | uprv_strcat(lbType, lbKeyValue); | |
419 | } | |
420 | } | |
421 | result = BreakIterator::buildInstance(loc, lbType, kind, status); | |
2ca993e8 A |
422 | if (U_SUCCESS(status) && result != NULL) { |
423 | char lwKeyValue[kKeyValueLenMax] = {0}; | |
424 | UErrorCode kvStatus = U_ZERO_ERROR; | |
425 | int32_t kLen = loc.getKeywordValue("lw", lwKeyValue, kKeyValueLenMax, kvStatus); | |
9f1b1155 | 426 | result->setKeepAll(U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(lwKeyValue,"keepall")==0); |
2ca993e8 | 427 | } |
374ca955 A |
428 | break; |
429 | case UBRK_SENTENCE: | |
73c04bcf | 430 | result = BreakIterator::buildInstance(loc, "sentence", kind, status); |
f3c0d7a5 | 431 | #if !UCONFIG_NO_FILTERED_BREAK_ITERATION |
b331163b A |
432 | { |
433 | char ssKeyValue[kKeyValueLenMax] = {0}; | |
434 | UErrorCode kvStatus = U_ZERO_ERROR; | |
435 | int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus); | |
436 | if (U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(ssKeyValue,"standard")==0) { | |
437 | FilteredBreakIteratorBuilder* fbiBuilder = FilteredBreakIteratorBuilder::createInstance(loc, kvStatus); | |
438 | if (U_SUCCESS(kvStatus)) { | |
439 | result = fbiBuilder->build(result, status); | |
440 | delete fbiBuilder; | |
441 | } | |
442 | } | |
443 | } | |
f3c0d7a5 | 444 | #endif |
374ca955 A |
445 | break; |
446 | case UBRK_TITLE: | |
73c04bcf | 447 | result = BreakIterator::buildInstance(loc, "title", kind, status); |
374ca955 | 448 | break; |
b75a7d8f | 449 | default: |
374ca955 | 450 | status = U_ILLEGAL_ARGUMENT_ERROR; |
b75a7d8f | 451 | } |
b75a7d8f | 452 | |
374ca955 A |
453 | if (U_FAILURE(status)) { |
454 | return NULL; | |
455 | } | |
b75a7d8f | 456 | |
374ca955 A |
457 | return result; |
458 | } | |
b75a7d8f | 459 | |
46f4442e | 460 | Locale |
374ca955 A |
461 | BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { |
462 | U_LOCALE_BASED(locBased, *this); | |
463 | return locBased.getLocale(type, status); | |
464 | } | |
465 | ||
466 | const char * | |
467 | BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { | |
468 | U_LOCALE_BASED(locBased, *this); | |
469 | return locBased.getLocaleID(type, status); | |
b75a7d8f A |
470 | } |
471 | ||
57a6839d A |
472 | |
473 | // This implementation of getRuleStatus is a do-nothing stub, here to | |
474 | // provide a default implementation for any derived BreakIterator classes that | |
475 | // do not implement it themselves. | |
476 | int32_t BreakIterator::getRuleStatus() const { | |
477 | return 0; | |
478 | } | |
479 | ||
480 | // This implementation of getRuleStatusVec is a do-nothing stub, here to | |
481 | // provide a default implementation for any derived BreakIterator classes that | |
482 | // do not implement it themselves. | |
483 | int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) { | |
484 | if (U_FAILURE(status)) { | |
485 | return 0; | |
486 | } | |
487 | if (capacity < 1) { | |
488 | status = U_BUFFER_OVERFLOW_ERROR; | |
489 | return 1; | |
490 | } | |
491 | *fillInVec = 0; | |
492 | return 1; | |
493 | } | |
494 | ||
495 | BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) { | |
496 | U_LOCALE_BASED(locBased, (*this)); | |
497 | locBased.setLocaleIDs(valid, actual); | |
498 | } | |
499 | ||
374ca955 A |
500 | U_NAMESPACE_END |
501 | ||
b75a7d8f A |
502 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
503 | ||
504 | //eof |