2 *******************************************************************************
3 * Copyright (C) 2006, International Business Machines Corporation and others. *
4 * All Rights Reserved. *
5 *******************************************************************************
8 #include "unicode/utypes.h"
10 #if !UCONFIG_NO_BREAK_ITERATION
15 #include "unicode/uchar.h"
16 #include "unicode/uniset.h"
17 #include "unicode/chariter.h"
18 #include "unicode/ures.h"
19 #include "unicode/udata.h"
20 #include "unicode/putil.h"
21 #include "unicode/ustring.h"
22 #include "unicode/uscript.h"
31 ******************************************************************
34 LanguageBreakEngine::LanguageBreakEngine() {
37 LanguageBreakEngine::~LanguageBreakEngine() {
41 ******************************************************************
44 LanguageBreakFactory::LanguageBreakFactory() {
47 LanguageBreakFactory::~LanguageBreakFactory() {
51 ******************************************************************
54 UnhandledEngine::UnhandledEngine(UErrorCode
&/*status*/) {
55 for (int32_t i
= 0; i
< (int32_t)(sizeof(fHandled
)/sizeof(fHandled
[0])); ++i
) {
60 UnhandledEngine::~UnhandledEngine() {
61 for (int32_t i
= 0; i
< (int32_t)(sizeof(fHandled
)/sizeof(fHandled
[0])); ++i
) {
62 if (fHandled
[i
] != 0) {
69 UnhandledEngine::handles(UChar32 c
, int32_t breakType
) const {
70 return (breakType
>= 0 && breakType
< (int32_t)(sizeof(fHandled
)/sizeof(fHandled
[0]))
71 && fHandled
[breakType
] != 0 && fHandled
[breakType
]->contains(c
));
75 UnhandledEngine::findBreaks( UText
*text
,
80 UStack
&/*foundBreaks*/ ) const {
81 if (breakType
>= 0 && breakType
< (int32_t)(sizeof(fHandled
)/sizeof(fHandled
[0]))) {
82 UChar32 c
= utext_current32(text
);
84 while((int32_t)utext_getNativeIndex(text
) > startPos
&& fHandled
[breakType
]->contains(c
)) {
85 c
= utext_previous32(text
);
89 while((int32_t)utext_getNativeIndex(text
) < endPos
&& fHandled
[breakType
]->contains(c
)) {
90 utext_next32(text
); // TODO: recast loop to work with post-increment operations.
91 c
= utext_current32(text
);
99 UnhandledEngine::handleCharacter(UChar32 c
, int32_t breakType
) {
100 if (breakType
>= 0 && breakType
< (int32_t)(sizeof(fHandled
)/sizeof(fHandled
[0]))) {
101 if (fHandled
[breakType
] == 0) {
102 fHandled
[breakType
] = new UnicodeSet();
103 if (fHandled
[breakType
] == 0) {
107 if (!fHandled
[breakType
]->contains(c
)) {
108 UErrorCode status
= U_ZERO_ERROR
;
109 // Apply the entire script of the character.
110 int32_t script
= u_getIntPropertyValue(c
, UCHAR_SCRIPT
);
111 fHandled
[breakType
]->applyIntPropertyValue(UCHAR_SCRIPT
, script
, status
);
117 ******************************************************************
120 ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode
&/*status*/) {
124 ICULanguageBreakFactory::~ICULanguageBreakFactory() {
132 static void U_CALLCONV
_deleteEngine(void *obj
) {
133 delete (const LanguageBreakEngine
*) obj
;
138 const LanguageBreakEngine
*
139 ICULanguageBreakFactory::getEngineFor(UChar32 c
, int32_t breakType
) {
142 const LanguageBreakEngine
*lbe
= NULL
;
143 UErrorCode status
= U_ZERO_ERROR
;
146 needsInit
= (UBool
)(fEngines
== NULL
);
148 i
= fEngines
->size();
150 lbe
= (const LanguageBreakEngine
*)(fEngines
->elementAt(i
));
151 if (lbe
!= NULL
&& lbe
->handles(c
, breakType
)) {
164 UStack
*engines
= new UStack(_deleteEngine
, NULL
, status
);
165 if (U_SUCCESS(status
) && engines
== NULL
) {
166 status
= U_MEMORY_ALLOCATION_ERROR
;
168 else if (U_FAILURE(status
)) {
174 if (fEngines
== NULL
) {
183 if (fEngines
== NULL
) {
187 // We didn't find an engine the first time through, or there was no
188 // stack. Create an engine.
189 const LanguageBreakEngine
*newlbe
= loadEngineFor(c
, breakType
);
191 // Now get the lock, and see if someone else has created it in the
194 i
= fEngines
->size();
196 lbe
= (const LanguageBreakEngine
*)(fEngines
->elementAt(i
));
197 if (lbe
!= NULL
&& lbe
->handles(c
, breakType
)) {
202 if (lbe
== NULL
&& newlbe
!= NULL
) {
203 fEngines
->push((void *)newlbe
, status
);
214 const LanguageBreakEngine
*
215 ICULanguageBreakFactory::loadEngineFor(UChar32 c
, int32_t breakType
) {
216 UErrorCode status
= U_ZERO_ERROR
;
217 UScriptCode code
= uscript_getScript(c
, &status
);
218 if (U_SUCCESS(status
)) {
219 const CompactTrieDictionary
*dict
= loadDictionaryFor(code
, breakType
);
221 const LanguageBreakEngine
*engine
= NULL
;
224 engine
= new ThaiBreakEngine(dict
, status
);
229 if (engine
== NULL
) {
232 else if (U_FAILURE(status
)) {
242 const CompactTrieDictionary
*
243 ICULanguageBreakFactory::loadDictionaryFor(UScriptCode script
, int32_t breakType
) {
244 UErrorCode status
= U_ZERO_ERROR
;
245 // Open root from brkitr tree.
249 UResourceBundle
*b
= ures_open(U_ICUDATA_BRKITR
, "", &status
);
250 b
= ures_getByKeyWithFallback(b
, "dictionaries", b
, &status
);
251 b
= ures_getByKeyWithFallback(b
, uscript_getShortName(script
), b
, &status
);
252 int32_t dictnlength
= 0;
253 const UChar
*dictfname
= ures_getString(b
, &dictnlength
, &status
);
254 if (U_SUCCESS(status
) && (size_t)dictnlength
>= sizeof(dictnbuff
)) {
256 status
= U_BUFFER_OVERFLOW_ERROR
;
258 if (U_SUCCESS(status
) && dictfname
) {
259 UChar
* extStart
=u_strchr(dictfname
, 0x002e);
262 len
= extStart
-dictfname
;
263 u_UCharsToChars(extStart
+1, ext
, sizeof(ext
)); // nul terminates the buff
264 u_UCharsToChars(dictfname
, dictnbuff
, len
);
266 dictnbuff
[len
]=0; // nul terminate
269 UDataMemory
*file
= udata_open(U_ICUDATA_BRKITR
, ext
, dictnbuff
, &status
);
270 if (U_SUCCESS(status
)) {
271 const CompactTrieDictionary
*dict
= new CompactTrieDictionary(
273 if (U_SUCCESS(status
) && dict
== NULL
) {
274 status
= U_MEMORY_ALLOCATION_ERROR
;
276 if (U_FAILURE(status
)) {
287 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */