]>
Commit | Line | Data |
---|---|---|
73c04bcf | 1 | /** |
46f4442e | 2 | ************************************************************************************ |
729e4ab9 | 3 | * Copyright (C) 2006-2009, International Business Machines Corporation and others. * |
46f4442e A |
4 | * All Rights Reserved. * |
5 | ************************************************************************************ | |
73c04bcf A |
6 | */ |
7 | ||
8 | #include "unicode/utypes.h" | |
9 | ||
10 | #if !UCONFIG_NO_BREAK_ITERATION | |
11 | ||
12 | #include "brkeng.h" | |
13 | #include "dictbe.h" | |
14 | #include "triedict.h" | |
15 | #include "unicode/uchar.h" | |
16 | #include "unicode/uniset.h" | |
17 | #include "unicode/chariter.h" | |
18 | #include "unicode/ures.h" | |
19 | #include "unicode/udata.h" | |
20 | #include "unicode/putil.h" | |
21 | #include "unicode/ustring.h" | |
22 | #include "unicode/uscript.h" | |
23 | #include "uvector.h" | |
46f4442e | 24 | #include "umutex.h" |
73c04bcf A |
25 | #include "uresimp.h" |
26 | #include "ubrkimpl.h" | |
27 | ||
28 | U_NAMESPACE_BEGIN | |
29 | ||
30 | /* | |
31 | ****************************************************************** | |
32 | */ | |
33 | ||
34 | LanguageBreakEngine::LanguageBreakEngine() { | |
35 | } | |
36 | ||
37 | LanguageBreakEngine::~LanguageBreakEngine() { | |
38 | } | |
39 | ||
40 | /* | |
41 | ****************************************************************** | |
42 | */ | |
43 | ||
44 | LanguageBreakFactory::LanguageBreakFactory() { | |
45 | } | |
46 | ||
47 | LanguageBreakFactory::~LanguageBreakFactory() { | |
48 | } | |
49 | ||
50 | /* | |
51 | ****************************************************************** | |
52 | */ | |
53 | ||
54 | UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) { | |
55 | for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) { | |
56 | fHandled[i] = 0; | |
57 | } | |
58 | } | |
59 | ||
60 | UnhandledEngine::~UnhandledEngine() { | |
61 | for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) { | |
62 | if (fHandled[i] != 0) { | |
63 | delete fHandled[i]; | |
64 | } | |
65 | } | |
66 | } | |
67 | ||
68 | UBool | |
69 | UnhandledEngine::handles(UChar32 c, int32_t breakType) const { | |
70 | return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])) | |
71 | && fHandled[breakType] != 0 && fHandled[breakType]->contains(c)); | |
72 | } | |
73 | ||
74 | int32_t | |
75 | UnhandledEngine::findBreaks( UText *text, | |
76 | int32_t startPos, | |
77 | int32_t endPos, | |
78 | UBool reverse, | |
79 | int32_t breakType, | |
80 | UStack &/*foundBreaks*/ ) const { | |
81 | if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) { | |
82 | UChar32 c = utext_current32(text); | |
83 | if (reverse) { | |
84 | while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) { | |
85 | c = utext_previous32(text); | |
86 | } | |
87 | } | |
88 | else { | |
89 | while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) { | |
90 | utext_next32(text); // TODO: recast loop to work with post-increment operations. | |
91 | c = utext_current32(text); | |
92 | } | |
93 | } | |
94 | } | |
95 | return 0; | |
96 | } | |
97 | ||
98 | void | |
99 | UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) { | |
100 | if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) { | |
101 | if (fHandled[breakType] == 0) { | |
102 | fHandled[breakType] = new UnicodeSet(); | |
103 | if (fHandled[breakType] == 0) { | |
104 | return; | |
105 | } | |
106 | } | |
107 | if (!fHandled[breakType]->contains(c)) { | |
108 | UErrorCode status = U_ZERO_ERROR; | |
109 | // Apply the entire script of the character. | |
110 | int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); | |
111 | fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status); | |
112 | } | |
113 | } | |
114 | } | |
115 | ||
116 | /* | |
117 | ****************************************************************** | |
118 | */ | |
119 | ||
120 | ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) { | |
121 | fEngines = 0; | |
122 | } | |
123 | ||
124 | ICULanguageBreakFactory::~ICULanguageBreakFactory() { | |
125 | if (fEngines != 0) { | |
126 | delete fEngines; | |
127 | } | |
128 | } | |
129 | ||
130 | U_NAMESPACE_END | |
131 | U_CDECL_BEGIN | |
132 | static void U_CALLCONV _deleteEngine(void *obj) { | |
46f4442e | 133 | delete (const U_NAMESPACE_QUALIFIER LanguageBreakEngine *) obj; |
73c04bcf A |
134 | } |
135 | U_CDECL_END | |
136 | U_NAMESPACE_BEGIN | |
137 | ||
138 | const LanguageBreakEngine * | |
139 | ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { | |
140 | UBool needsInit; | |
141 | int32_t i; | |
142 | const LanguageBreakEngine *lbe = NULL; | |
143 | UErrorCode status = U_ZERO_ERROR; | |
144 | ||
46f4442e A |
145 | // TODO: The global mutex should not be used. |
146 | // The global mutex should only be used for short periods. | |
147 | // A ICULanguageBreakFactory specific mutex should be used. | |
73c04bcf A |
148 | umtx_lock(NULL); |
149 | needsInit = (UBool)(fEngines == NULL); | |
150 | if (!needsInit) { | |
151 | i = fEngines->size(); | |
152 | while (--i >= 0) { | |
153 | lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); | |
154 | if (lbe != NULL && lbe->handles(c, breakType)) { | |
155 | break; | |
156 | } | |
157 | lbe = NULL; | |
158 | } | |
159 | } | |
160 | umtx_unlock(NULL); | |
161 | ||
162 | if (lbe != NULL) { | |
163 | return lbe; | |
164 | } | |
165 | ||
166 | if (needsInit) { | |
167 | UStack *engines = new UStack(_deleteEngine, NULL, status); | |
168 | if (U_SUCCESS(status) && engines == NULL) { | |
169 | status = U_MEMORY_ALLOCATION_ERROR; | |
170 | } | |
171 | else if (U_FAILURE(status)) { | |
172 | delete engines; | |
173 | engines = NULL; | |
174 | } | |
175 | else { | |
176 | umtx_lock(NULL); | |
177 | if (fEngines == NULL) { | |
178 | fEngines = engines; | |
179 | engines = NULL; | |
180 | } | |
181 | umtx_unlock(NULL); | |
182 | delete engines; | |
183 | } | |
184 | } | |
185 | ||
186 | if (fEngines == NULL) { | |
187 | return NULL; | |
188 | } | |
189 | ||
190 | // We didn't find an engine the first time through, or there was no | |
191 | // stack. Create an engine. | |
192 | const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType); | |
193 | ||
194 | // Now get the lock, and see if someone else has created it in the | |
195 | // meantime | |
196 | umtx_lock(NULL); | |
197 | i = fEngines->size(); | |
198 | while (--i >= 0) { | |
199 | lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); | |
200 | if (lbe != NULL && lbe->handles(c, breakType)) { | |
201 | break; | |
202 | } | |
203 | lbe = NULL; | |
204 | } | |
205 | if (lbe == NULL && newlbe != NULL) { | |
206 | fEngines->push((void *)newlbe, status); | |
207 | lbe = newlbe; | |
208 | newlbe = NULL; | |
209 | } | |
210 | umtx_unlock(NULL); | |
211 | ||
212 | delete newlbe; | |
213 | ||
214 | return lbe; | |
215 | } | |
216 | ||
217 | const LanguageBreakEngine * | |
218 | ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { | |
219 | UErrorCode status = U_ZERO_ERROR; | |
220 | UScriptCode code = uscript_getScript(c, &status); | |
221 | if (U_SUCCESS(status)) { | |
222 | const CompactTrieDictionary *dict = loadDictionaryFor(code, breakType); | |
223 | if (dict != NULL) { | |
224 | const LanguageBreakEngine *engine = NULL; | |
225 | switch(code) { | |
226 | case USCRIPT_THAI: | |
227 | engine = new ThaiBreakEngine(dict, status); | |
228 | break; | |
229 | default: | |
230 | break; | |
231 | } | |
232 | if (engine == NULL) { | |
233 | delete dict; | |
234 | } | |
235 | else if (U_FAILURE(status)) { | |
236 | delete engine; | |
237 | engine = NULL; | |
238 | } | |
239 | return engine; | |
240 | } | |
241 | } | |
242 | return NULL; | |
243 | } | |
244 | ||
245 | const CompactTrieDictionary * | |
46f4442e | 246 | ICULanguageBreakFactory::loadDictionaryFor(UScriptCode script, int32_t /*breakType*/) { |
73c04bcf A |
247 | UErrorCode status = U_ZERO_ERROR; |
248 | // Open root from brkitr tree. | |
249 | char dictnbuff[256]; | |
250 | char ext[4]={'\0'}; | |
251 | ||
252 | UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status); | |
253 | b = ures_getByKeyWithFallback(b, "dictionaries", b, &status); | |
254 | b = ures_getByKeyWithFallback(b, uscript_getShortName(script), b, &status); | |
255 | int32_t dictnlength = 0; | |
256 | const UChar *dictfname = ures_getString(b, &dictnlength, &status); | |
257 | if (U_SUCCESS(status) && (size_t)dictnlength >= sizeof(dictnbuff)) { | |
258 | dictnlength = 0; | |
259 | status = U_BUFFER_OVERFLOW_ERROR; | |
260 | } | |
261 | if (U_SUCCESS(status) && dictfname) { | |
262 | UChar* extStart=u_strchr(dictfname, 0x002e); | |
263 | int len = 0; | |
264 | if(extStart!=NULL){ | |
729e4ab9 | 265 | len = (int)(extStart-dictfname); |
73c04bcf A |
266 | u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff |
267 | u_UCharsToChars(dictfname, dictnbuff, len); | |
268 | } | |
269 | dictnbuff[len]=0; // nul terminate | |
270 | } | |
271 | ures_close(b); | |
272 | UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext, dictnbuff, &status); | |
273 | if (U_SUCCESS(status)) { | |
274 | const CompactTrieDictionary *dict = new CompactTrieDictionary( | |
275 | file, status); | |
276 | if (U_SUCCESS(status) && dict == NULL) { | |
277 | status = U_MEMORY_ALLOCATION_ERROR; | |
278 | } | |
279 | if (U_FAILURE(status)) { | |
280 | delete dict; | |
281 | dict = NULL; | |
282 | } | |
283 | return dict; | |
284 | } | |
285 | return NULL; | |
286 | } | |
287 | ||
288 | U_NAMESPACE_END | |
289 | ||
290 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |