]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
4388f060 | 3 | * Copyright (C) 1997-2011, International Business Machines Corporation and |
729e4ab9 | 4 | * others. All Rights Reserved. |
b75a7d8f A |
5 | ******************************************************************************* |
6 | * | |
7 | * File TXTBDRY.CPP | |
8 | * | |
9 | * Modification History: | |
10 | * | |
11 | * Date Name Description | |
12 | * 02/18/97 aliu Converted from OpenClass. Added DONE. | |
13 | * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. | |
14 | ***************************************************************************************** | |
15 | */ | |
16 | ||
17 | // ***************************************************************************** | |
18 | // This file was generated from the java source file BreakIterator.java | |
19 | // ***************************************************************************** | |
20 | ||
21 | #include "unicode/utypes.h" | |
22 | ||
23 | #if !UCONFIG_NO_BREAK_ITERATION | |
24 | ||
73c04bcf | 25 | #include "unicode/rbbi.h" |
b75a7d8f A |
26 | #include "unicode/brkiter.h" |
27 | #include "unicode/udata.h" | |
374ca955 | 28 | #include "unicode/ures.h" |
73c04bcf | 29 | #include "unicode/ustring.h" |
374ca955 | 30 | #include "ucln_cmn.h" |
b75a7d8f | 31 | #include "cstring.h" |
46f4442e | 32 | #include "umutex.h" |
73c04bcf | 33 | #include "servloc.h" |
374ca955 A |
34 | #include "locbased.h" |
35 | #include "uresimp.h" | |
73c04bcf A |
36 | #include "uassert.h" |
37 | #include "ubrkimpl.h" | |
b75a7d8f A |
38 | |
39 | // ***************************************************************************** | |
40 | // class BreakIterator | |
41 | // This class implements methods for finding the location of boundaries in text. | |
42 | // Instances of BreakIterator maintain a current position and scan over text | |
43 | // returning the index of characters where boundaries occur. | |
44 | // ***************************************************************************** | |
45 | ||
46 | U_NAMESPACE_BEGIN | |
47 | ||
b75a7d8f A |
48 | // ------------------------------------- |
49 | ||
b75a7d8f | 50 | BreakIterator* |
73c04bcf | 51 | BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status) |
b75a7d8f | 52 | { |
374ca955 | 53 | char fnbuff[256]; |
73c04bcf | 54 | char ext[4]={'\0'}; |
374ca955 A |
55 | char actualLocale[ULOC_FULLNAME_CAPACITY]; |
56 | int32_t size; | |
57 | const UChar* brkfname = NULL; | |
73c04bcf A |
58 | UResourceBundle brkRulesStack; |
59 | UResourceBundle brkNameStack; | |
60 | UResourceBundle *brkRules = &brkRulesStack; | |
61 | UResourceBundle *brkName = &brkNameStack; | |
62 | RuleBasedBreakIterator *result = NULL; | |
46f4442e | 63 | |
b75a7d8f A |
64 | if (U_FAILURE(status)) |
65 | return NULL; | |
66 | ||
374ca955 A |
67 | ures_initStackObject(brkRules); |
68 | ures_initStackObject(brkName); | |
69 | ||
70 | // Get the locale | |
73c04bcf A |
71 | UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, loc.getName(), &status); |
72 | /* this is a hack for now. Should be fixed when the data is fetched from | |
73 | brk_index.txt */ | |
74 | if(status==U_USING_DEFAULT_WARNING){ | |
75 | status=U_ZERO_ERROR; | |
76 | ures_openFillIn(b, U_ICUDATA_BRKITR, "", &status); | |
77 | } | |
374ca955 A |
78 | |
79 | // Get the "boundaries" array. | |
80 | if (U_SUCCESS(status)) { | |
81 | brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status); | |
82 | // Get the string object naming the rules file | |
83 | brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status); | |
84 | // Get the actual string | |
85 | brkfname = ures_getString(brkName, &size, &status); | |
73c04bcf A |
86 | U_ASSERT((size_t)size<sizeof(fnbuff)); |
87 | if ((size_t)size>=sizeof(fnbuff)) { | |
88 | size=0; | |
89 | if (U_SUCCESS(status)) { | |
90 | status = U_BUFFER_OVERFLOW_ERROR; | |
91 | } | |
92 | } | |
374ca955 A |
93 | |
94 | // Use the string if we found it | |
95 | if (U_SUCCESS(status) && brkfname) { | |
96 | uprv_strncpy(actualLocale, | |
4388f060 | 97 | ures_getLocaleInternal(brkName, &status), |
374ca955 | 98 | sizeof(actualLocale)/sizeof(actualLocale[0])); |
46f4442e | 99 | |
73c04bcf A |
100 | UChar* extStart=u_strchr(brkfname, 0x002e); |
101 | int len = 0; | |
102 | if(extStart!=NULL){ | |
729e4ab9 | 103 | len = (int)(extStart-brkfname); |
73c04bcf A |
104 | u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff |
105 | u_UCharsToChars(brkfname, fnbuff, len); | |
106 | } | |
107 | fnbuff[len]=0; // nul terminate | |
374ca955 | 108 | } |
b75a7d8f A |
109 | } |
110 | ||
374ca955 A |
111 | ures_close(brkRules); |
112 | ures_close(brkName); | |
46f4442e | 113 | |
73c04bcf | 114 | UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status); |
b75a7d8f | 115 | if (U_FAILURE(status)) { |
374ca955 | 116 | ures_close(b); |
b75a7d8f A |
117 | return NULL; |
118 | } | |
b75a7d8f | 119 | |
73c04bcf A |
120 | // Create a RuleBasedBreakIterator |
121 | result = new RuleBasedBreakIterator(file, status); | |
374ca955 | 122 | |
73c04bcf | 123 | // If there is a result, set the valid locale and actual locale, and the kind |
374ca955 | 124 | if (U_SUCCESS(status) && result != NULL) { |
46f4442e | 125 | U_LOCALE_BASED(locBased, *(BreakIterator*)result); |
374ca955 | 126 | locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale); |
73c04bcf | 127 | result->setBreakType(kind); |
b75a7d8f | 128 | } |
374ca955 A |
129 | |
130 | ures_close(b); | |
46f4442e | 131 | |
374ca955 | 132 | if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple |
b75a7d8f | 133 | delete result; |
374ca955 A |
134 | return NULL; |
135 | } | |
136 | ||
137 | if (result == NULL) { | |
138 | udata_close(file); | |
139 | if (U_SUCCESS(status)) { | |
140 | status = U_MEMORY_ALLOCATION_ERROR; | |
141 | } | |
b75a7d8f A |
142 | } |
143 | ||
144 | return result; | |
145 | } | |
146 | ||
374ca955 A |
147 | // Creates a break iterator for word breaks. |
148 | BreakIterator* U_EXPORT2 | |
149 | BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) | |
150 | { | |
151 | return createInstance(key, UBRK_WORD, status); | |
152 | } | |
153 | ||
b75a7d8f A |
154 | // ------------------------------------- |
155 | ||
156 | // Creates a break iterator for line breaks. | |
374ca955 | 157 | BreakIterator* U_EXPORT2 |
b75a7d8f A |
158 | BreakIterator::createLineInstance(const Locale& key, UErrorCode& status) |
159 | { | |
374ca955 | 160 | return createInstance(key, UBRK_LINE, status); |
b75a7d8f A |
161 | } |
162 | ||
163 | // ------------------------------------- | |
164 | ||
165 | // Creates a break iterator for character breaks. | |
374ca955 | 166 | BreakIterator* U_EXPORT2 |
b75a7d8f A |
167 | BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status) |
168 | { | |
374ca955 | 169 | return createInstance(key, UBRK_CHARACTER, status); |
b75a7d8f A |
170 | } |
171 | ||
172 | // ------------------------------------- | |
173 | ||
174 | // Creates a break iterator for sentence breaks. | |
374ca955 | 175 | BreakIterator* U_EXPORT2 |
b75a7d8f A |
176 | BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status) |
177 | { | |
374ca955 | 178 | return createInstance(key, UBRK_SENTENCE, status); |
b75a7d8f A |
179 | } |
180 | ||
181 | // ------------------------------------- | |
182 | ||
183 | // Creates a break iterator for title casing breaks. | |
374ca955 | 184 | BreakIterator* U_EXPORT2 |
b75a7d8f A |
185 | BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status) |
186 | { | |
374ca955 | 187 | return createInstance(key, UBRK_TITLE, status); |
b75a7d8f A |
188 | } |
189 | ||
190 | // ------------------------------------- | |
191 | ||
192 | // Gets all the available locales that has localized text boundary data. | |
374ca955 | 193 | const Locale* U_EXPORT2 |
b75a7d8f A |
194 | BreakIterator::getAvailableLocales(int32_t& count) |
195 | { | |
196 | return Locale::getAvailableLocales(count); | |
197 | } | |
198 | ||
b75a7d8f A |
199 | // ------------------------------------------ |
200 | // | |
201 | // Default constructor and destructor | |
202 | // | |
203 | //------------------------------------------- | |
204 | ||
205 | BreakIterator::BreakIterator() | |
206 | { | |
207 | fBufferClone = FALSE; | |
374ca955 | 208 | *validLocale = *actualLocale = 0; |
b75a7d8f A |
209 | } |
210 | ||
211 | BreakIterator::~BreakIterator() | |
212 | { | |
213 | } | |
214 | ||
215 | // ------------------------------------------ | |
216 | // | |
217 | // Registration | |
218 | // | |
219 | //------------------------------------------- | |
374ca955 | 220 | #if !UCONFIG_NO_SERVICE |
b75a7d8f | 221 | |
b75a7d8f A |
222 | // ------------------------------------- |
223 | ||
224 | class ICUBreakIteratorFactory : public ICUResourceBundleFactory { | |
4388f060 A |
225 | public: |
226 | virtual ~ICUBreakIteratorFactory(); | |
b75a7d8f | 227 | protected: |
374ca955 A |
228 | virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const { |
229 | return BreakIterator::makeInstance(loc, kind, status); | |
230 | } | |
b75a7d8f A |
231 | }; |
232 | ||
4388f060 A |
233 | ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {} |
234 | ||
b75a7d8f A |
235 | // ------------------------------------- |
236 | ||
237 | class ICUBreakIteratorService : public ICULocaleService { | |
238 | public: | |
374ca955 A |
239 | ICUBreakIteratorService() |
240 | : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) | |
241 | { | |
242 | UErrorCode status = U_ZERO_ERROR; | |
243 | registerFactory(new ICUBreakIteratorFactory(), status); | |
244 | } | |
46f4442e | 245 | |
4388f060 A |
246 | virtual ~ICUBreakIteratorService(); |
247 | ||
374ca955 A |
248 | virtual UObject* cloneInstance(UObject* instance) const { |
249 | return ((BreakIterator*)instance)->clone(); | |
250 | } | |
46f4442e | 251 | |
374ca955 A |
252 | virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { |
253 | LocaleKey& lkey = (LocaleKey&)key; | |
254 | int32_t kind = lkey.kind(); | |
255 | Locale loc; | |
256 | lkey.currentLocale(loc); | |
257 | return BreakIterator::makeInstance(loc, kind, status); | |
258 | } | |
46f4442e | 259 | |
374ca955 A |
260 | virtual UBool isDefault() const { |
261 | return countFactories() == 1; | |
262 | } | |
b75a7d8f A |
263 | }; |
264 | ||
4388f060 A |
265 | ICUBreakIteratorService::~ICUBreakIteratorService() {} |
266 | ||
b75a7d8f A |
267 | // ------------------------------------- |
268 | ||
374ca955 A |
269 | U_NAMESPACE_END |
270 | ||
271 | // defined in ucln_cmn.h | |
272 | ||
4388f060 | 273 | static icu::ICULocaleService* gService = NULL; |
46f4442e | 274 | |
374ca955 | 275 | /** |
46f4442e | 276 | * Release all static memory held by breakiterator. |
374ca955 A |
277 | */ |
278 | U_CDECL_BEGIN | |
279 | static UBool U_CALLCONV breakiterator_cleanup(void) { | |
280 | #if !UCONFIG_NO_SERVICE | |
281 | if (gService) { | |
282 | delete gService; | |
283 | gService = NULL; | |
284 | } | |
285 | #endif | |
286 | return TRUE; | |
287 | } | |
288 | U_CDECL_END | |
289 | U_NAMESPACE_BEGIN | |
290 | ||
46f4442e | 291 | static ICULocaleService* |
b75a7d8f A |
292 | getService(void) |
293 | { | |
294 | UBool needsInit; | |
46f4442e A |
295 | UMTX_CHECK(NULL, (UBool)(gService == NULL), needsInit); |
296 | ||
b75a7d8f A |
297 | if (needsInit) { |
298 | ICULocaleService *tService = new ICUBreakIteratorService(); | |
299 | umtx_lock(NULL); | |
300 | if (gService == NULL) { | |
301 | gService = tService; | |
302 | tService = NULL; | |
374ca955 | 303 | ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup); |
b75a7d8f A |
304 | } |
305 | umtx_unlock(NULL); | |
306 | delete tService; | |
307 | } | |
308 | return gService; | |
309 | } | |
310 | ||
311 | // ------------------------------------- | |
312 | ||
46f4442e A |
313 | static inline UBool |
314 | hasService(void) | |
b75a7d8f | 315 | { |
46f4442e A |
316 | UBool retVal; |
317 | UMTX_CHECK(NULL, gService != NULL, retVal); | |
318 | return retVal; | |
b75a7d8f A |
319 | } |
320 | ||
321 | // ------------------------------------- | |
322 | ||
374ca955 | 323 | URegistryKey U_EXPORT2 |
46f4442e | 324 | BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) |
b75a7d8f | 325 | { |
46f4442e A |
326 | ICULocaleService *service = getService(); |
327 | if (service == NULL) { | |
328 | status = U_MEMORY_ALLOCATION_ERROR; | |
329 | return NULL; | |
330 | } | |
331 | return service->registerInstance(toAdopt, locale, kind, status); | |
b75a7d8f A |
332 | } |
333 | ||
334 | // ------------------------------------- | |
335 | ||
374ca955 | 336 | UBool U_EXPORT2 |
46f4442e | 337 | BreakIterator::unregister(URegistryKey key, UErrorCode& status) |
b75a7d8f A |
338 | { |
339 | if (U_SUCCESS(status)) { | |
340 | if (hasService()) { | |
341 | return gService->unregister(key, status); | |
342 | } | |
46f4442e | 343 | status = U_MEMORY_ALLOCATION_ERROR; |
b75a7d8f A |
344 | } |
345 | return FALSE; | |
346 | } | |
347 | ||
348 | // ------------------------------------- | |
349 | ||
374ca955 | 350 | StringEnumeration* U_EXPORT2 |
b75a7d8f A |
351 | BreakIterator::getAvailableLocales(void) |
352 | { | |
46f4442e A |
353 | ICULocaleService *service = getService(); |
354 | if (service == NULL) { | |
355 | return NULL; | |
356 | } | |
357 | return service->getAvailableLocales(); | |
b75a7d8f | 358 | } |
374ca955 A |
359 | #endif /* UCONFIG_NO_SERVICE */ |
360 | ||
361 | // ------------------------------------- | |
362 | ||
363 | BreakIterator* | |
73c04bcf | 364 | BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status) |
374ca955 A |
365 | { |
366 | if (U_FAILURE(status)) { | |
367 | return NULL; | |
368 | } | |
46f4442e | 369 | |
374ca955 A |
370 | #if !UCONFIG_NO_SERVICE |
371 | if (hasService()) { | |
73c04bcf | 372 | Locale actualLoc(""); |
374ca955 A |
373 | BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status); |
374 | // TODO: The way the service code works in ICU 2.8 is that if | |
375 | // there is a real registered break iterator, the actualLoc | |
376 | // will be populated, but if the handleDefault path is taken | |
377 | // (because nothing is registered that can handle the | |
378 | // requested locale) then the actualLoc comes back empty. In | |
379 | // that case, the returned object already has its actual/valid | |
380 | // locale data populated (by makeInstance, which is what | |
381 | // handleDefault calls), so we don't touch it. YES, A COMMENT | |
382 | // THIS LONG is a sign of bad code -- so the action item is to | |
383 | // revisit this in ICU 3.0 and clean it up/fix it/remove it. | |
384 | if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) { | |
385 | U_LOCALE_BASED(locBased, *result); | |
386 | locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); | |
387 | } | |
388 | return result; | |
389 | } | |
390 | else | |
391 | #endif | |
392 | { | |
393 | return makeInstance(loc, kind, status); | |
394 | } | |
395 | } | |
b75a7d8f A |
396 | |
397 | // ------------------------------------- | |
398 | ||
46f4442e | 399 | BreakIterator* |
b75a7d8f A |
400 | BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) |
401 | { | |
374ca955 A |
402 | |
403 | if (U_FAILURE(status)) { | |
404 | return NULL; | |
405 | } | |
406 | ||
407 | BreakIterator *result = NULL; | |
b75a7d8f | 408 | switch (kind) { |
46f4442e | 409 | case UBRK_CHARACTER: |
73c04bcf | 410 | result = BreakIterator::buildInstance(loc, "grapheme", kind, status); |
374ca955 A |
411 | break; |
412 | case UBRK_WORD: | |
73c04bcf | 413 | result = BreakIterator::buildInstance(loc, "word", kind, status); |
374ca955 A |
414 | break; |
415 | case UBRK_LINE: | |
73c04bcf | 416 | result = BreakIterator::buildInstance(loc, "line", kind, status); |
374ca955 A |
417 | break; |
418 | case UBRK_SENTENCE: | |
73c04bcf | 419 | result = BreakIterator::buildInstance(loc, "sentence", kind, status); |
374ca955 A |
420 | break; |
421 | case UBRK_TITLE: | |
73c04bcf | 422 | result = BreakIterator::buildInstance(loc, "title", kind, status); |
374ca955 | 423 | break; |
b75a7d8f | 424 | default: |
374ca955 | 425 | status = U_ILLEGAL_ARGUMENT_ERROR; |
b75a7d8f | 426 | } |
b75a7d8f | 427 | |
374ca955 A |
428 | if (U_FAILURE(status)) { |
429 | return NULL; | |
430 | } | |
b75a7d8f | 431 | |
374ca955 A |
432 | return result; |
433 | } | |
b75a7d8f | 434 | |
46f4442e | 435 | Locale |
374ca955 A |
436 | BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { |
437 | U_LOCALE_BASED(locBased, *this); | |
438 | return locBased.getLocale(type, status); | |
439 | } | |
440 | ||
441 | const char * | |
442 | BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { | |
443 | U_LOCALE_BASED(locBased, *this); | |
444 | return locBased.getLocaleID(type, status); | |
b75a7d8f A |
445 | } |
446 | ||
374ca955 A |
447 | U_NAMESPACE_END |
448 | ||
b75a7d8f A |
449 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
450 | ||
451 | //eof |