2 ************************************************************************************
3 * Copyright (C) 2006-2011, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 ************************************************************************************
8 #include "unicode/utypes.h"
10 #if !UCONFIG_NO_BREAK_ITERATION
15 #include "unicode/uchar.h"
16 #include "unicode/uniset.h"
17 #include "unicode/chariter.h"
18 #include "unicode/ures.h"
19 #include "unicode/udata.h"
20 #include "unicode/putil.h"
21 #include "unicode/ustring.h"
22 #include "unicode/uscript.h"
31 ******************************************************************
34 LanguageBreakEngine::LanguageBreakEngine() {
37 LanguageBreakEngine::~LanguageBreakEngine() {
41 ******************************************************************
44 LanguageBreakFactory::LanguageBreakFactory() {
47 LanguageBreakFactory::~LanguageBreakFactory() {
51 ******************************************************************
54 UnhandledEngine::UnhandledEngine(UErrorCode
&/*status*/) {
55 for (int32_t i
= 0; i
< (int32_t)(sizeof(fHandled
)/sizeof(fHandled
[0])); ++i
) {
60 UnhandledEngine::~UnhandledEngine() {
61 for (int32_t i
= 0; i
< (int32_t)(sizeof(fHandled
)/sizeof(fHandled
[0])); ++i
) {
62 if (fHandled
[i
] != 0) {
69 UnhandledEngine::handles(UChar32 c
, int32_t breakType
) const {
70 return (breakType
>= 0 && breakType
< (int32_t)(sizeof(fHandled
)/sizeof(fHandled
[0]))
71 && fHandled
[breakType
] != 0 && fHandled
[breakType
]->contains(c
));
75 UnhandledEngine::findBreaks( UText
*text
,
80 UStack
&/*foundBreaks*/ ) const {
81 if (breakType
>= 0 && breakType
< (int32_t)(sizeof(fHandled
)/sizeof(fHandled
[0]))) {
82 UChar32 c
= utext_current32(text
);
84 while((int32_t)utext_getNativeIndex(text
) > startPos
&& fHandled
[breakType
]->contains(c
)) {
85 c
= utext_previous32(text
);
89 while((int32_t)utext_getNativeIndex(text
) < endPos
&& fHandled
[breakType
]->contains(c
)) {
90 utext_next32(text
); // TODO: recast loop to work with post-increment operations.
91 c
= utext_current32(text
);
99 UnhandledEngine::handleCharacter(UChar32 c
, int32_t breakType
) {
100 if (breakType
>= 0 && breakType
< (int32_t)(sizeof(fHandled
)/sizeof(fHandled
[0]))) {
101 if (fHandled
[breakType
] == 0) {
102 fHandled
[breakType
] = new UnicodeSet();
103 if (fHandled
[breakType
] == 0) {
107 if (!fHandled
[breakType
]->contains(c
)) {
108 UErrorCode status
= U_ZERO_ERROR
;
109 // Apply the entire script of the character.
110 int32_t script
= u_getIntPropertyValue(c
, UCHAR_SCRIPT
);
111 fHandled
[breakType
]->applyIntPropertyValue(UCHAR_SCRIPT
, script
, status
);
117 ******************************************************************
120 ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode
&/*status*/) {
124 ICULanguageBreakFactory::~ICULanguageBreakFactory() {
132 static void U_CALLCONV
_deleteEngine(void *obj
) {
133 delete (const icu::LanguageBreakEngine
*) obj
;
138 const LanguageBreakEngine
*
139 ICULanguageBreakFactory::getEngineFor(UChar32 c
, int32_t breakType
) {
142 const LanguageBreakEngine
*lbe
= NULL
;
143 UErrorCode status
= U_ZERO_ERROR
;
145 // TODO: The global mutex should not be used.
146 // The global mutex should only be used for short periods.
147 // A ICULanguageBreakFactory specific mutex should be used.
149 needsInit
= (UBool
)(fEngines
== NULL
);
151 i
= fEngines
->size();
153 lbe
= (const LanguageBreakEngine
*)(fEngines
->elementAt(i
));
154 if (lbe
!= NULL
&& lbe
->handles(c
, breakType
)) {
167 UStack
*engines
= new UStack(_deleteEngine
, NULL
, status
);
168 if (U_SUCCESS(status
) && engines
== NULL
) {
169 status
= U_MEMORY_ALLOCATION_ERROR
;
171 else if (U_FAILURE(status
)) {
177 if (fEngines
== NULL
) {
186 if (fEngines
== NULL
) {
190 // We didn't find an engine the first time through, or there was no
191 // stack. Create an engine.
192 const LanguageBreakEngine
*newlbe
= loadEngineFor(c
, breakType
);
194 // Now get the lock, and see if someone else has created it in the
197 i
= fEngines
->size();
199 lbe
= (const LanguageBreakEngine
*)(fEngines
->elementAt(i
));
200 if (lbe
!= NULL
&& lbe
->handles(c
, breakType
)) {
205 if (lbe
== NULL
&& newlbe
!= NULL
) {
206 fEngines
->push((void *)newlbe
, status
);
217 const LanguageBreakEngine
*
218 ICULanguageBreakFactory::loadEngineFor(UChar32 c
, int32_t breakType
) {
219 UErrorCode status
= U_ZERO_ERROR
;
220 UScriptCode code
= uscript_getScript(c
, &status
);
221 if (U_SUCCESS(status
)) {
222 const CompactTrieDictionary
*dict
= loadDictionaryFor(code
, breakType
);
224 const LanguageBreakEngine
*engine
= NULL
;
227 engine
= new ThaiBreakEngine(dict
, status
);
230 engine
= new KhmerBreakEngine(dict
, status
);
235 if (engine
== NULL
) {
238 else if (U_FAILURE(status
)) {
248 const CompactTrieDictionary
*
249 ICULanguageBreakFactory::loadDictionaryFor(UScriptCode script
, int32_t /*breakType*/) {
250 UErrorCode status
= U_ZERO_ERROR
;
251 // Open root from brkitr tree.
255 UResourceBundle
*b
= ures_open(U_ICUDATA_BRKITR
, "", &status
);
256 b
= ures_getByKeyWithFallback(b
, "dictionaries", b
, &status
);
257 b
= ures_getByKeyWithFallback(b
, uscript_getShortName(script
), b
, &status
);
258 int32_t dictnlength
= 0;
259 const UChar
*dictfname
= ures_getString(b
, &dictnlength
, &status
);
260 if (U_SUCCESS(status
) && (size_t)dictnlength
>= sizeof(dictnbuff
)) {
262 status
= U_BUFFER_OVERFLOW_ERROR
;
264 if (U_SUCCESS(status
) && dictfname
) {
265 UChar
* extStart
=u_strchr(dictfname
, 0x002e);
268 len
= (int)(extStart
-dictfname
);
269 u_UCharsToChars(extStart
+1, ext
, sizeof(ext
)); // nul terminates the buff
270 u_UCharsToChars(dictfname
, dictnbuff
, len
);
272 dictnbuff
[len
]=0; // nul terminate
275 UDataMemory
*file
= udata_open(U_ICUDATA_BRKITR
, ext
, dictnbuff
, &status
);
276 if (U_SUCCESS(status
)) {
277 const CompactTrieDictionary
*dict
= new CompactTrieDictionary(
279 if (U_SUCCESS(status
) && dict
== NULL
) {
280 status
= U_MEMORY_ALLOCATION_ERROR
;
282 if (U_FAILURE(status
)) {
293 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */