]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/brkeng.cpp
ICU-8.11.tar.gz
[apple/icu.git] / icuSources / common / brkeng.cpp
1 /**
2 *******************************************************************************
3 * Copyright (C) 2006, International Business Machines Corporation and others. *
4 * All Rights Reserved. *
5 *******************************************************************************
6 */
7
8 #include "unicode/utypes.h"
9
10 #if !UCONFIG_NO_BREAK_ITERATION
11
12 #include "brkeng.h"
13 #include "dictbe.h"
14 #include "triedict.h"
15 #include "unicode/uchar.h"
16 #include "unicode/uniset.h"
17 #include "unicode/chariter.h"
18 #include "unicode/ures.h"
19 #include "unicode/udata.h"
20 #include "unicode/putil.h"
21 #include "unicode/ustring.h"
22 #include "unicode/uscript.h"
23 #include "uvector.h"
24 #include "mutex.h"
25 #include "uresimp.h"
26 #include "ubrkimpl.h"
27
28 U_NAMESPACE_BEGIN
29
30 /*
31 ******************************************************************
32 */
33
34 LanguageBreakEngine::LanguageBreakEngine() {
35 }
36
37 LanguageBreakEngine::~LanguageBreakEngine() {
38 }
39
40 /*
41 ******************************************************************
42 */
43
44 LanguageBreakFactory::LanguageBreakFactory() {
45 }
46
47 LanguageBreakFactory::~LanguageBreakFactory() {
48 }
49
50 /*
51 ******************************************************************
52 */
53
54 UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) {
55 for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
56 fHandled[i] = 0;
57 }
58 }
59
60 UnhandledEngine::~UnhandledEngine() {
61 for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
62 if (fHandled[i] != 0) {
63 delete fHandled[i];
64 }
65 }
66 }
67
68 UBool
69 UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
70 return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))
71 && fHandled[breakType] != 0 && fHandled[breakType]->contains(c));
72 }
73
74 int32_t
75 UnhandledEngine::findBreaks( UText *text,
76 int32_t startPos,
77 int32_t endPos,
78 UBool reverse,
79 int32_t breakType,
80 UStack &/*foundBreaks*/ ) const {
81 if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
82 UChar32 c = utext_current32(text);
83 if (reverse) {
84 while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) {
85 c = utext_previous32(text);
86 }
87 }
88 else {
89 while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
90 utext_next32(text); // TODO: recast loop to work with post-increment operations.
91 c = utext_current32(text);
92 }
93 }
94 }
95 return 0;
96 }
97
98 void
99 UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) {
100 if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
101 if (fHandled[breakType] == 0) {
102 fHandled[breakType] = new UnicodeSet();
103 if (fHandled[breakType] == 0) {
104 return;
105 }
106 }
107 if (!fHandled[breakType]->contains(c)) {
108 UErrorCode status = U_ZERO_ERROR;
109 // Apply the entire script of the character.
110 int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
111 fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
112 }
113 }
114 }
115
116 /*
117 ******************************************************************
118 */
119
120 ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
121 fEngines = 0;
122 }
123
124 ICULanguageBreakFactory::~ICULanguageBreakFactory() {
125 if (fEngines != 0) {
126 delete fEngines;
127 }
128 }
129
130 U_NAMESPACE_END
131 U_CDECL_BEGIN
132 static void U_CALLCONV _deleteEngine(void *obj) {
133 delete (const LanguageBreakEngine *) obj;
134 }
135 U_CDECL_END
136 U_NAMESPACE_BEGIN
137
138 const LanguageBreakEngine *
139 ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
140 UBool needsInit;
141 int32_t i;
142 const LanguageBreakEngine *lbe = NULL;
143 UErrorCode status = U_ZERO_ERROR;
144
145 umtx_lock(NULL);
146 needsInit = (UBool)(fEngines == NULL);
147 if (!needsInit) {
148 i = fEngines->size();
149 while (--i >= 0) {
150 lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
151 if (lbe != NULL && lbe->handles(c, breakType)) {
152 break;
153 }
154 lbe = NULL;
155 }
156 }
157 umtx_unlock(NULL);
158
159 if (lbe != NULL) {
160 return lbe;
161 }
162
163 if (needsInit) {
164 UStack *engines = new UStack(_deleteEngine, NULL, status);
165 if (U_SUCCESS(status) && engines == NULL) {
166 status = U_MEMORY_ALLOCATION_ERROR;
167 }
168 else if (U_FAILURE(status)) {
169 delete engines;
170 engines = NULL;
171 }
172 else {
173 umtx_lock(NULL);
174 if (fEngines == NULL) {
175 fEngines = engines;
176 engines = NULL;
177 }
178 umtx_unlock(NULL);
179 delete engines;
180 }
181 }
182
183 if (fEngines == NULL) {
184 return NULL;
185 }
186
187 // We didn't find an engine the first time through, or there was no
188 // stack. Create an engine.
189 const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType);
190
191 // Now get the lock, and see if someone else has created it in the
192 // meantime
193 umtx_lock(NULL);
194 i = fEngines->size();
195 while (--i >= 0) {
196 lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
197 if (lbe != NULL && lbe->handles(c, breakType)) {
198 break;
199 }
200 lbe = NULL;
201 }
202 if (lbe == NULL && newlbe != NULL) {
203 fEngines->push((void *)newlbe, status);
204 lbe = newlbe;
205 newlbe = NULL;
206 }
207 umtx_unlock(NULL);
208
209 delete newlbe;
210
211 return lbe;
212 }
213
214 const LanguageBreakEngine *
215 ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
216 UErrorCode status = U_ZERO_ERROR;
217 UScriptCode code = uscript_getScript(c, &status);
218 if (U_SUCCESS(status)) {
219 const CompactTrieDictionary *dict = loadDictionaryFor(code, breakType);
220 if (dict != NULL) {
221 const LanguageBreakEngine *engine = NULL;
222 switch(code) {
223 case USCRIPT_THAI:
224 engine = new ThaiBreakEngine(dict, status);
225 break;
226 default:
227 break;
228 }
229 if (engine == NULL) {
230 delete dict;
231 }
232 else if (U_FAILURE(status)) {
233 delete engine;
234 engine = NULL;
235 }
236 return engine;
237 }
238 }
239 return NULL;
240 }
241
242 const CompactTrieDictionary *
243 ICULanguageBreakFactory::loadDictionaryFor(UScriptCode script, int32_t breakType) {
244 UErrorCode status = U_ZERO_ERROR;
245 // Open root from brkitr tree.
246 char dictnbuff[256];
247 char ext[4]={'\0'};
248
249 UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
250 b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
251 b = ures_getByKeyWithFallback(b, uscript_getShortName(script), b, &status);
252 int32_t dictnlength = 0;
253 const UChar *dictfname = ures_getString(b, &dictnlength, &status);
254 if (U_SUCCESS(status) && (size_t)dictnlength >= sizeof(dictnbuff)) {
255 dictnlength = 0;
256 status = U_BUFFER_OVERFLOW_ERROR;
257 }
258 if (U_SUCCESS(status) && dictfname) {
259 UChar* extStart=u_strchr(dictfname, 0x002e);
260 int len = 0;
261 if(extStart!=NULL){
262 len = extStart-dictfname;
263 u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
264 u_UCharsToChars(dictfname, dictnbuff, len);
265 }
266 dictnbuff[len]=0; // nul terminate
267 }
268 ures_close(b);
269 UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext, dictnbuff, &status);
270 if (U_SUCCESS(status)) {
271 const CompactTrieDictionary *dict = new CompactTrieDictionary(
272 file, status);
273 if (U_SUCCESS(status) && dict == NULL) {
274 status = U_MEMORY_ALLOCATION_ERROR;
275 }
276 if (U_FAILURE(status)) {
277 delete dict;
278 dict = NULL;
279 }
280 return dict;
281 }
282 return NULL;
283 }
284
285 U_NAMESPACE_END
286
287 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */