]>
Commit | Line | Data |
---|---|---|
4388f060 | 1 | /* |
46f4442e | 2 | ************************************************************************************ |
4388f060 A |
3 | * Copyright (C) 2006-2011, International Business Machines Corporation |
4 | * and others. All Rights Reserved. | |
46f4442e | 5 | ************************************************************************************ |
73c04bcf A |
6 | */ |
7 | ||
8 | #include "unicode/utypes.h" | |
9 | ||
10 | #if !UCONFIG_NO_BREAK_ITERATION | |
11 | ||
12 | #include "brkeng.h" | |
13 | #include "dictbe.h" | |
14 | #include "triedict.h" | |
15 | #include "unicode/uchar.h" | |
16 | #include "unicode/uniset.h" | |
17 | #include "unicode/chariter.h" | |
18 | #include "unicode/ures.h" | |
19 | #include "unicode/udata.h" | |
20 | #include "unicode/putil.h" | |
21 | #include "unicode/ustring.h" | |
22 | #include "unicode/uscript.h" | |
23 | #include "uvector.h" | |
46f4442e | 24 | #include "umutex.h" |
73c04bcf A |
25 | #include "uresimp.h" |
26 | #include "ubrkimpl.h" | |
27 | ||
28 | U_NAMESPACE_BEGIN | |
29 | ||
30 | /* | |
31 | ****************************************************************** | |
32 | */ | |
33 | ||
34 | LanguageBreakEngine::LanguageBreakEngine() { | |
35 | } | |
36 | ||
37 | LanguageBreakEngine::~LanguageBreakEngine() { | |
38 | } | |
39 | ||
40 | /* | |
41 | ****************************************************************** | |
42 | */ | |
43 | ||
44 | LanguageBreakFactory::LanguageBreakFactory() { | |
45 | } | |
46 | ||
47 | LanguageBreakFactory::~LanguageBreakFactory() { | |
48 | } | |
49 | ||
50 | /* | |
51 | ****************************************************************** | |
52 | */ | |
53 | ||
54 | UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) { | |
55 | for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) { | |
56 | fHandled[i] = 0; | |
57 | } | |
58 | } | |
59 | ||
60 | UnhandledEngine::~UnhandledEngine() { | |
61 | for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) { | |
62 | if (fHandled[i] != 0) { | |
63 | delete fHandled[i]; | |
64 | } | |
65 | } | |
66 | } | |
67 | ||
68 | UBool | |
69 | UnhandledEngine::handles(UChar32 c, int32_t breakType) const { | |
70 | return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])) | |
71 | && fHandled[breakType] != 0 && fHandled[breakType]->contains(c)); | |
72 | } | |
73 | ||
74 | int32_t | |
75 | UnhandledEngine::findBreaks( UText *text, | |
76 | int32_t startPos, | |
77 | int32_t endPos, | |
78 | UBool reverse, | |
79 | int32_t breakType, | |
80 | UStack &/*foundBreaks*/ ) const { | |
81 | if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) { | |
82 | UChar32 c = utext_current32(text); | |
83 | if (reverse) { | |
84 | while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) { | |
85 | c = utext_previous32(text); | |
86 | } | |
87 | } | |
88 | else { | |
89 | while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) { | |
90 | utext_next32(text); // TODO: recast loop to work with post-increment operations. | |
91 | c = utext_current32(text); | |
92 | } | |
93 | } | |
94 | } | |
95 | return 0; | |
96 | } | |
97 | ||
98 | void | |
99 | UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) { | |
100 | if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) { | |
101 | if (fHandled[breakType] == 0) { | |
102 | fHandled[breakType] = new UnicodeSet(); | |
103 | if (fHandled[breakType] == 0) { | |
104 | return; | |
105 | } | |
106 | } | |
107 | if (!fHandled[breakType]->contains(c)) { | |
108 | UErrorCode status = U_ZERO_ERROR; | |
109 | // Apply the entire script of the character. | |
110 | int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); | |
111 | fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status); | |
112 | } | |
113 | } | |
114 | } | |
115 | ||
116 | /* | |
117 | ****************************************************************** | |
118 | */ | |
119 | ||
120 | ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) { | |
121 | fEngines = 0; | |
122 | } | |
123 | ||
124 | ICULanguageBreakFactory::~ICULanguageBreakFactory() { | |
125 | if (fEngines != 0) { | |
126 | delete fEngines; | |
127 | } | |
128 | } | |
129 | ||
130 | U_NAMESPACE_END | |
131 | U_CDECL_BEGIN | |
132 | static void U_CALLCONV _deleteEngine(void *obj) { | |
4388f060 | 133 | delete (const icu::LanguageBreakEngine *) obj; |
73c04bcf A |
134 | } |
135 | U_CDECL_END | |
136 | U_NAMESPACE_BEGIN | |
137 | ||
138 | const LanguageBreakEngine * | |
139 | ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) { | |
140 | UBool needsInit; | |
141 | int32_t i; | |
142 | const LanguageBreakEngine *lbe = NULL; | |
143 | UErrorCode status = U_ZERO_ERROR; | |
144 | ||
46f4442e A |
145 | // TODO: The global mutex should not be used. |
146 | // The global mutex should only be used for short periods. | |
147 | // A ICULanguageBreakFactory specific mutex should be used. | |
73c04bcf A |
148 | umtx_lock(NULL); |
149 | needsInit = (UBool)(fEngines == NULL); | |
150 | if (!needsInit) { | |
151 | i = fEngines->size(); | |
152 | while (--i >= 0) { | |
153 | lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); | |
154 | if (lbe != NULL && lbe->handles(c, breakType)) { | |
155 | break; | |
156 | } | |
157 | lbe = NULL; | |
158 | } | |
159 | } | |
160 | umtx_unlock(NULL); | |
161 | ||
162 | if (lbe != NULL) { | |
163 | return lbe; | |
164 | } | |
165 | ||
166 | if (needsInit) { | |
167 | UStack *engines = new UStack(_deleteEngine, NULL, status); | |
168 | if (U_SUCCESS(status) && engines == NULL) { | |
169 | status = U_MEMORY_ALLOCATION_ERROR; | |
170 | } | |
171 | else if (U_FAILURE(status)) { | |
172 | delete engines; | |
173 | engines = NULL; | |
174 | } | |
175 | else { | |
176 | umtx_lock(NULL); | |
177 | if (fEngines == NULL) { | |
178 | fEngines = engines; | |
179 | engines = NULL; | |
180 | } | |
181 | umtx_unlock(NULL); | |
182 | delete engines; | |
183 | } | |
184 | } | |
185 | ||
186 | if (fEngines == NULL) { | |
187 | return NULL; | |
188 | } | |
189 | ||
190 | // We didn't find an engine the first time through, or there was no | |
191 | // stack. Create an engine. | |
192 | const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType); | |
193 | ||
194 | // Now get the lock, and see if someone else has created it in the | |
195 | // meantime | |
196 | umtx_lock(NULL); | |
197 | i = fEngines->size(); | |
198 | while (--i >= 0) { | |
199 | lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); | |
200 | if (lbe != NULL && lbe->handles(c, breakType)) { | |
201 | break; | |
202 | } | |
203 | lbe = NULL; | |
204 | } | |
205 | if (lbe == NULL && newlbe != NULL) { | |
206 | fEngines->push((void *)newlbe, status); | |
207 | lbe = newlbe; | |
208 | newlbe = NULL; | |
209 | } | |
210 | umtx_unlock(NULL); | |
211 | ||
212 | delete newlbe; | |
213 | ||
214 | return lbe; | |
215 | } | |
216 | ||
217 | const LanguageBreakEngine * | |
218 | ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) { | |
219 | UErrorCode status = U_ZERO_ERROR; | |
220 | UScriptCode code = uscript_getScript(c, &status); | |
221 | if (U_SUCCESS(status)) { | |
222 | const CompactTrieDictionary *dict = loadDictionaryFor(code, breakType); | |
223 | if (dict != NULL) { | |
224 | const LanguageBreakEngine *engine = NULL; | |
225 | switch(code) { | |
226 | case USCRIPT_THAI: | |
227 | engine = new ThaiBreakEngine(dict, status); | |
228 | break; | |
4388f060 A |
229 | case USCRIPT_KHMER: |
230 | engine = new KhmerBreakEngine(dict, status); | |
231 | break; | |
73c04bcf A |
232 | default: |
233 | break; | |
234 | } | |
235 | if (engine == NULL) { | |
236 | delete dict; | |
237 | } | |
238 | else if (U_FAILURE(status)) { | |
239 | delete engine; | |
240 | engine = NULL; | |
241 | } | |
242 | return engine; | |
243 | } | |
244 | } | |
245 | return NULL; | |
246 | } | |
247 | ||
248 | const CompactTrieDictionary * | |
46f4442e | 249 | ICULanguageBreakFactory::loadDictionaryFor(UScriptCode script, int32_t /*breakType*/) { |
73c04bcf A |
250 | UErrorCode status = U_ZERO_ERROR; |
251 | // Open root from brkitr tree. | |
252 | char dictnbuff[256]; | |
253 | char ext[4]={'\0'}; | |
254 | ||
255 | UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status); | |
256 | b = ures_getByKeyWithFallback(b, "dictionaries", b, &status); | |
257 | b = ures_getByKeyWithFallback(b, uscript_getShortName(script), b, &status); | |
258 | int32_t dictnlength = 0; | |
259 | const UChar *dictfname = ures_getString(b, &dictnlength, &status); | |
260 | if (U_SUCCESS(status) && (size_t)dictnlength >= sizeof(dictnbuff)) { | |
261 | dictnlength = 0; | |
262 | status = U_BUFFER_OVERFLOW_ERROR; | |
263 | } | |
264 | if (U_SUCCESS(status) && dictfname) { | |
265 | UChar* extStart=u_strchr(dictfname, 0x002e); | |
266 | int len = 0; | |
267 | if(extStart!=NULL){ | |
729e4ab9 | 268 | len = (int)(extStart-dictfname); |
73c04bcf A |
269 | u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff |
270 | u_UCharsToChars(dictfname, dictnbuff, len); | |
271 | } | |
272 | dictnbuff[len]=0; // nul terminate | |
273 | } | |
274 | ures_close(b); | |
275 | UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext, dictnbuff, &status); | |
276 | if (U_SUCCESS(status)) { | |
277 | const CompactTrieDictionary *dict = new CompactTrieDictionary( | |
278 | file, status); | |
279 | if (U_SUCCESS(status) && dict == NULL) { | |
280 | status = U_MEMORY_ALLOCATION_ERROR; | |
281 | } | |
282 | if (U_FAILURE(status)) { | |
283 | delete dict; | |
284 | dict = NULL; | |
285 | } | |
286 | return dict; | |
287 | } | |
288 | return NULL; | |
289 | } | |
290 | ||
291 | U_NAMESPACE_END | |
292 | ||
293 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |