]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
374ca955 | 3 | * Copyright (C) 1997-2004, International Business Machines Corporation and * |
b75a7d8f A |
4 | * others. All Rights Reserved. * |
5 | ******************************************************************************* | |
6 | * | |
7 | * File TXTBDRY.CPP | |
8 | * | |
9 | * Modification History: | |
10 | * | |
11 | * Date Name Description | |
12 | * 02/18/97 aliu Converted from OpenClass. Added DONE. | |
13 | * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. | |
14 | ***************************************************************************************** | |
15 | */ | |
16 | ||
17 | // ***************************************************************************** | |
18 | // This file was generated from the java source file BreakIterator.java | |
19 | // ***************************************************************************** | |
20 | ||
21 | #include "unicode/utypes.h" | |
22 | ||
23 | #if !UCONFIG_NO_BREAK_ITERATION | |
24 | ||
25 | #include "unicode/dbbi.h" | |
26 | #include "unicode/brkiter.h" | |
27 | #include "unicode/udata.h" | |
374ca955 A |
28 | #include "unicode/ures.h" |
29 | #include "ucln_cmn.h" | |
b75a7d8f A |
30 | #include "cstring.h" |
31 | #include "mutex.h" | |
32 | #include "iculserv.h" | |
374ca955 A |
33 | #include "locbased.h" |
34 | #include "uresimp.h" | |
b75a7d8f A |
35 | |
36 | // ***************************************************************************** | |
37 | // class BreakIterator | |
38 | // This class implements methods for finding the location of boundaries in text. | |
39 | // Instances of BreakIterator maintain a current position and scan over text | |
40 | // returning the index of characters where boundaries occur. | |
41 | // ***************************************************************************** | |
42 | ||
43 | U_NAMESPACE_BEGIN | |
44 | ||
45 | const int32_t BreakIterator::DONE = (int32_t)-1; | |
46 | ||
47 | // ------------------------------------- | |
48 | ||
b75a7d8f | 49 | BreakIterator* |
374ca955 | 50 | BreakIterator::buildInstance(const Locale& loc, const char *type, UBool dict, UErrorCode &status) |
b75a7d8f | 51 | { |
374ca955 A |
52 | char fnbuff[256]; |
53 | char actualLocale[ULOC_FULLNAME_CAPACITY]; | |
54 | int32_t size; | |
55 | const UChar* brkfname = NULL; | |
56 | UResourceBundle brkRulesStack, brkNameStack; | |
57 | UResourceBundle *brkRules = &brkRulesStack, *brkName = &brkNameStack; | |
58 | BreakIterator *result = NULL; | |
59 | ||
b75a7d8f A |
60 | if (U_FAILURE(status)) |
61 | return NULL; | |
62 | ||
374ca955 A |
63 | ures_initStackObject(brkRules); |
64 | ures_initStackObject(brkName); | |
65 | ||
66 | // Get the locale | |
67 | UResourceBundle *b = ures_open(NULL, loc.getName(), &status); | |
68 | ||
69 | // Get the "boundaries" array. | |
70 | if (U_SUCCESS(status)) { | |
71 | brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status); | |
72 | // Get the string object naming the rules file | |
73 | brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status); | |
74 | // Get the actual string | |
75 | brkfname = ures_getString(brkName, &size, &status); | |
76 | ||
77 | // Use the string if we found it | |
78 | if (U_SUCCESS(status) && brkfname) { | |
79 | uprv_strncpy(actualLocale, | |
80 | ures_getLocale(brkName, &status), | |
81 | sizeof(actualLocale)/sizeof(actualLocale[0])); | |
82 | u_UCharsToChars(brkfname, fnbuff, size+1); | |
83 | } | |
b75a7d8f A |
84 | } |
85 | ||
374ca955 A |
86 | ures_close(brkRules); |
87 | ures_close(brkName); | |
88 | ||
89 | UDataMemory* file = udata_open(NULL, "brk", fnbuff, &status); | |
b75a7d8f | 90 | if (U_FAILURE(status)) { |
374ca955 | 91 | ures_close(b); |
b75a7d8f A |
92 | return NULL; |
93 | } | |
b75a7d8f | 94 | |
374ca955 A |
95 | // We found the break rules; now see if a dictionary is needed |
96 | if (dict) | |
97 | { | |
98 | UErrorCode localStatus = U_ZERO_ERROR; | |
99 | brkName = &brkNameStack; | |
100 | ures_initStackObject(brkName); | |
101 | brkName = ures_getByKeyWithFallback(b, "BreakDictionaryData", brkName, &localStatus); | |
102 | #if 0 | |
103 | if (U_SUCCESS(localStatus)) { | |
104 | brkfname = ures_getString(&brkname, &size, &localStatus); | |
105 | } | |
106 | #endif | |
107 | if (U_SUCCESS(localStatus)) { | |
108 | #if 0 | |
109 | u_UCharsToChars(brkfname, fnbuff, size); | |
110 | fnbuff[size] = '\0'; | |
111 | #endif | |
112 | result = new DictionaryBasedBreakIterator(file, "thaidict.brk", status); | |
113 | } | |
114 | ures_close(brkName); | |
b75a7d8f | 115 | } |
374ca955 A |
116 | |
117 | // If there is still no result but we haven't had an error, no dictionary, | |
118 | // so make a non-dictionary break iterator | |
119 | if (U_SUCCESS(status) && result == NULL) { | |
b75a7d8f A |
120 | result = new RuleBasedBreakIterator(file, status); |
121 | } | |
374ca955 A |
122 | |
123 | // If there is a result, set the valid locale and actual locale | |
124 | if (U_SUCCESS(status) && result != NULL) { | |
125 | U_LOCALE_BASED(locBased, *result); | |
126 | locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale); | |
b75a7d8f | 127 | } |
374ca955 A |
128 | |
129 | ures_close(b); | |
130 | ||
131 | if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple | |
b75a7d8f | 132 | delete result; |
374ca955 A |
133 | return NULL; |
134 | } | |
135 | ||
136 | if (result == NULL) { | |
137 | udata_close(file); | |
138 | if (U_SUCCESS(status)) { | |
139 | status = U_MEMORY_ALLOCATION_ERROR; | |
140 | } | |
b75a7d8f A |
141 | } |
142 | ||
143 | return result; | |
144 | } | |
145 | ||
374ca955 A |
146 | // Creates a break iterator for word breaks. |
147 | BreakIterator* U_EXPORT2 | |
148 | BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) | |
149 | { | |
150 | return createInstance(key, UBRK_WORD, status); | |
151 | } | |
152 | ||
b75a7d8f A |
153 | // ------------------------------------- |
154 | ||
155 | // Creates a break iterator for line breaks. | |
374ca955 | 156 | BreakIterator* U_EXPORT2 |
b75a7d8f A |
157 | BreakIterator::createLineInstance(const Locale& key, UErrorCode& status) |
158 | { | |
374ca955 | 159 | return createInstance(key, UBRK_LINE, status); |
b75a7d8f A |
160 | } |
161 | ||
162 | // ------------------------------------- | |
163 | ||
164 | // Creates a break iterator for character breaks. | |
374ca955 | 165 | BreakIterator* U_EXPORT2 |
b75a7d8f A |
166 | BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status) |
167 | { | |
374ca955 | 168 | return createInstance(key, UBRK_CHARACTER, status); |
b75a7d8f A |
169 | } |
170 | ||
171 | // ------------------------------------- | |
172 | ||
173 | // Creates a break iterator for sentence breaks. | |
374ca955 | 174 | BreakIterator* U_EXPORT2 |
b75a7d8f A |
175 | BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status) |
176 | { | |
374ca955 | 177 | return createInstance(key, UBRK_SENTENCE, status); |
b75a7d8f A |
178 | } |
179 | ||
180 | // ------------------------------------- | |
181 | ||
182 | // Creates a break iterator for title casing breaks. | |
374ca955 | 183 | BreakIterator* U_EXPORT2 |
b75a7d8f A |
184 | BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status) |
185 | { | |
374ca955 | 186 | return createInstance(key, UBRK_TITLE, status); |
b75a7d8f A |
187 | } |
188 | ||
189 | // ------------------------------------- | |
190 | ||
191 | // Gets all the available locales that has localized text boundary data. | |
374ca955 | 192 | const Locale* U_EXPORT2 |
b75a7d8f A |
193 | BreakIterator::getAvailableLocales(int32_t& count) |
194 | { | |
195 | return Locale::getAvailableLocales(count); | |
196 | } | |
197 | ||
198 | // ------------------------------------- | |
199 | // Gets the objectLocale display name in the default locale language. | |
374ca955 | 200 | UnicodeString& U_EXPORT2 |
b75a7d8f A |
201 | BreakIterator::getDisplayName(const Locale& objectLocale, |
202 | UnicodeString& name) | |
203 | { | |
204 | return objectLocale.getDisplayName(name); | |
205 | } | |
206 | ||
207 | // ------------------------------------- | |
208 | // Gets the objectLocale display name in the displayLocale language. | |
374ca955 | 209 | UnicodeString& U_EXPORT2 |
b75a7d8f A |
210 | BreakIterator::getDisplayName(const Locale& objectLocale, |
211 | const Locale& displayLocale, | |
212 | UnicodeString& name) | |
213 | { | |
214 | return objectLocale.getDisplayName(displayLocale, name); | |
215 | } | |
216 | ||
217 | // ------------------------------------------ | |
218 | // | |
219 | // Default constructor and destructor | |
220 | // | |
221 | //------------------------------------------- | |
222 | ||
223 | BreakIterator::BreakIterator() | |
224 | { | |
225 | fBufferClone = FALSE; | |
374ca955 | 226 | *validLocale = *actualLocale = 0; |
b75a7d8f A |
227 | } |
228 | ||
229 | BreakIterator::~BreakIterator() | |
230 | { | |
231 | } | |
232 | ||
233 | // ------------------------------------------ | |
234 | // | |
235 | // Registration | |
236 | // | |
237 | //------------------------------------------- | |
374ca955 | 238 | #if !UCONFIG_NO_SERVICE |
b75a7d8f A |
239 | |
240 | static ICULocaleService* gService = NULL; | |
241 | ||
242 | // ------------------------------------- | |
243 | ||
244 | class ICUBreakIteratorFactory : public ICUResourceBundleFactory { | |
245 | protected: | |
374ca955 A |
246 | virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const { |
247 | return BreakIterator::makeInstance(loc, kind, status); | |
248 | } | |
b75a7d8f A |
249 | }; |
250 | ||
251 | // ------------------------------------- | |
252 | ||
253 | class ICUBreakIteratorService : public ICULocaleService { | |
254 | public: | |
374ca955 A |
255 | ICUBreakIteratorService() |
256 | : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) | |
257 | { | |
258 | UErrorCode status = U_ZERO_ERROR; | |
259 | registerFactory(new ICUBreakIteratorFactory(), status); | |
260 | } | |
261 | ||
262 | virtual UObject* cloneInstance(UObject* instance) const { | |
263 | return ((BreakIterator*)instance)->clone(); | |
264 | } | |
265 | ||
266 | virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { | |
267 | LocaleKey& lkey = (LocaleKey&)key; | |
268 | int32_t kind = lkey.kind(); | |
269 | Locale loc; | |
270 | lkey.currentLocale(loc); | |
271 | return BreakIterator::makeInstance(loc, kind, status); | |
272 | } | |
273 | ||
274 | virtual UBool isDefault() const { | |
275 | return countFactories() == 1; | |
276 | } | |
b75a7d8f A |
277 | }; |
278 | ||
279 | // ------------------------------------- | |
280 | ||
374ca955 A |
281 | U_NAMESPACE_END |
282 | ||
283 | // defined in ucln_cmn.h | |
284 | ||
285 | /** | |
286 | * Release all static memory held by breakiterator. | |
287 | */ | |
288 | U_CDECL_BEGIN | |
289 | static UBool U_CALLCONV breakiterator_cleanup(void) { | |
290 | #if !UCONFIG_NO_SERVICE | |
291 | if (gService) { | |
292 | delete gService; | |
293 | gService = NULL; | |
294 | } | |
295 | #endif | |
296 | return TRUE; | |
297 | } | |
298 | U_CDECL_END | |
299 | U_NAMESPACE_BEGIN | |
300 | ||
b75a7d8f A |
301 | static ICULocaleService* |
302 | getService(void) | |
303 | { | |
304 | UBool needsInit; | |
305 | umtx_lock(NULL); | |
306 | needsInit = (UBool)(gService == NULL); | |
307 | umtx_unlock(NULL); | |
308 | ||
309 | if (needsInit) { | |
310 | ICULocaleService *tService = new ICUBreakIteratorService(); | |
311 | umtx_lock(NULL); | |
312 | if (gService == NULL) { | |
313 | gService = tService; | |
314 | tService = NULL; | |
374ca955 | 315 | ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup); |
b75a7d8f A |
316 | } |
317 | umtx_unlock(NULL); | |
318 | delete tService; | |
319 | } | |
320 | return gService; | |
321 | } | |
322 | ||
323 | // ------------------------------------- | |
324 | ||
325 | static UBool | |
326 | hasService(void) | |
327 | { | |
374ca955 A |
328 | Mutex mutex; |
329 | return gService != NULL; | |
b75a7d8f A |
330 | } |
331 | ||
332 | // ------------------------------------- | |
333 | ||
374ca955 | 334 | URegistryKey U_EXPORT2 |
b75a7d8f A |
335 | BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) |
336 | { | |
374ca955 | 337 | return getService()->registerInstance(toAdopt, locale, kind, status); |
b75a7d8f A |
338 | } |
339 | ||
340 | // ------------------------------------- | |
341 | ||
374ca955 | 342 | UBool U_EXPORT2 |
b75a7d8f A |
343 | BreakIterator::unregister(URegistryKey key, UErrorCode& status) |
344 | { | |
345 | if (U_SUCCESS(status)) { | |
346 | if (hasService()) { | |
347 | return gService->unregister(key, status); | |
348 | } | |
349 | status = U_ILLEGAL_ARGUMENT_ERROR; | |
350 | } | |
351 | return FALSE; | |
352 | } | |
353 | ||
354 | // ------------------------------------- | |
355 | ||
374ca955 | 356 | StringEnumeration* U_EXPORT2 |
b75a7d8f A |
357 | BreakIterator::getAvailableLocales(void) |
358 | { | |
359 | return getService()->getAvailableLocales(); | |
360 | } | |
374ca955 A |
361 | #endif /* UCONFIG_NO_SERVICE */ |
362 | ||
363 | // ------------------------------------- | |
364 | ||
365 | BreakIterator* | |
366 | BreakIterator::createInstance(const Locale& loc, UBreakIteratorType kind, UErrorCode& status) | |
367 | { | |
368 | if (U_FAILURE(status)) { | |
369 | return NULL; | |
370 | } | |
371 | ||
372 | u_init(&status); | |
373 | #if !UCONFIG_NO_SERVICE | |
374 | if (hasService()) { | |
375 | Locale actualLoc; | |
376 | BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status); | |
377 | // TODO: The way the service code works in ICU 2.8 is that if | |
378 | // there is a real registered break iterator, the actualLoc | |
379 | // will be populated, but if the handleDefault path is taken | |
380 | // (because nothing is registered that can handle the | |
381 | // requested locale) then the actualLoc comes back empty. In | |
382 | // that case, the returned object already has its actual/valid | |
383 | // locale data populated (by makeInstance, which is what | |
384 | // handleDefault calls), so we don't touch it. YES, A COMMENT | |
385 | // THIS LONG is a sign of bad code -- so the action item is to | |
386 | // revisit this in ICU 3.0 and clean it up/fix it/remove it. | |
387 | if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) { | |
388 | U_LOCALE_BASED(locBased, *result); | |
389 | locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); | |
390 | } | |
391 | return result; | |
392 | } | |
393 | else | |
394 | #endif | |
395 | { | |
396 | return makeInstance(loc, kind, status); | |
397 | } | |
398 | } | |
b75a7d8f A |
399 | |
400 | // ------------------------------------- | |
401 | ||
402 | BreakIterator* | |
403 | BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) | |
404 | { | |
374ca955 A |
405 | |
406 | if (U_FAILURE(status)) { | |
407 | return NULL; | |
408 | } | |
409 | ||
410 | BreakIterator *result = NULL; | |
b75a7d8f | 411 | switch (kind) { |
374ca955 A |
412 | case UBRK_CHARACTER: |
413 | result = BreakIterator::buildInstance(loc, "grapheme", FALSE, status); | |
414 | break; | |
415 | case UBRK_WORD: | |
416 | result = BreakIterator::buildInstance(loc, "word", TRUE, status); | |
417 | break; | |
418 | case UBRK_LINE: | |
419 | result = BreakIterator::buildInstance(loc, "line", TRUE, status); | |
420 | break; | |
421 | case UBRK_SENTENCE: | |
422 | result = BreakIterator::buildInstance(loc, "sentence", FALSE, status); | |
423 | break; | |
424 | case UBRK_TITLE: | |
425 | result = BreakIterator::buildInstance(loc, "title", FALSE, status); | |
426 | break; | |
b75a7d8f | 427 | default: |
374ca955 | 428 | status = U_ILLEGAL_ARGUMENT_ERROR; |
b75a7d8f | 429 | } |
b75a7d8f | 430 | |
374ca955 A |
431 | if (U_FAILURE(status)) { |
432 | return NULL; | |
433 | } | |
b75a7d8f | 434 | |
374ca955 A |
435 | return result; |
436 | } | |
b75a7d8f | 437 | |
374ca955 A |
438 | Locale |
439 | BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { | |
440 | U_LOCALE_BASED(locBased, *this); | |
441 | return locBased.getLocale(type, status); | |
442 | } | |
443 | ||
444 | const char * | |
445 | BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { | |
446 | U_LOCALE_BASED(locBased, *this); | |
447 | return locBased.getLocaleID(type, status); | |
b75a7d8f A |
448 | } |
449 | ||
374ca955 A |
450 | U_NAMESPACE_END |
451 | ||
b75a7d8f A |
452 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
453 | ||
454 | //eof |