]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/brkeng.cpp
ICU-461.17.tar.gz
[apple/icu.git] / icuSources / common / brkeng.cpp
CommitLineData
73c04bcf 1/**
46f4442e 2 ************************************************************************************
729e4ab9 3 * Copyright (C) 2006-2009, International Business Machines Corporation and others. *
46f4442e
A
4 * All Rights Reserved. *
5 ************************************************************************************
73c04bcf
A
6 */
7
8#include "unicode/utypes.h"
9
10#if !UCONFIG_NO_BREAK_ITERATION
11
12#include "brkeng.h"
13#include "dictbe.h"
14#include "triedict.h"
15#include "unicode/uchar.h"
16#include "unicode/uniset.h"
17#include "unicode/chariter.h"
18#include "unicode/ures.h"
19#include "unicode/udata.h"
20#include "unicode/putil.h"
21#include "unicode/ustring.h"
22#include "unicode/uscript.h"
23#include "uvector.h"
46f4442e 24#include "umutex.h"
73c04bcf
A
25#include "uresimp.h"
26#include "ubrkimpl.h"
27
28U_NAMESPACE_BEGIN
29
30/*
31 ******************************************************************
32 */
33
34LanguageBreakEngine::LanguageBreakEngine() {
35}
36
37LanguageBreakEngine::~LanguageBreakEngine() {
38}
39
40/*
41 ******************************************************************
42 */
43
44LanguageBreakFactory::LanguageBreakFactory() {
45}
46
47LanguageBreakFactory::~LanguageBreakFactory() {
48}
49
50/*
51 ******************************************************************
52 */
53
54UnhandledEngine::UnhandledEngine(UErrorCode &/*status*/) {
55 for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
56 fHandled[i] = 0;
57 }
58}
59
60UnhandledEngine::~UnhandledEngine() {
61 for (int32_t i = 0; i < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0])); ++i) {
62 if (fHandled[i] != 0) {
63 delete fHandled[i];
64 }
65 }
66}
67
68UBool
69UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
70 return (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))
71 && fHandled[breakType] != 0 && fHandled[breakType]->contains(c));
72}
73
74int32_t
75UnhandledEngine::findBreaks( UText *text,
76 int32_t startPos,
77 int32_t endPos,
78 UBool reverse,
79 int32_t breakType,
80 UStack &/*foundBreaks*/ ) const {
81 if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
82 UChar32 c = utext_current32(text);
83 if (reverse) {
84 while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) {
85 c = utext_previous32(text);
86 }
87 }
88 else {
89 while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
90 utext_next32(text); // TODO: recast loop to work with post-increment operations.
91 c = utext_current32(text);
92 }
93 }
94 }
95 return 0;
96}
97
98void
99UnhandledEngine::handleCharacter(UChar32 c, int32_t breakType) {
100 if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
101 if (fHandled[breakType] == 0) {
102 fHandled[breakType] = new UnicodeSet();
103 if (fHandled[breakType] == 0) {
104 return;
105 }
106 }
107 if (!fHandled[breakType]->contains(c)) {
108 UErrorCode status = U_ZERO_ERROR;
109 // Apply the entire script of the character.
110 int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
111 fHandled[breakType]->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
112 }
113 }
114}
115
116/*
117 ******************************************************************
118 */
119
120ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
121 fEngines = 0;
122}
123
124ICULanguageBreakFactory::~ICULanguageBreakFactory() {
125 if (fEngines != 0) {
126 delete fEngines;
127 }
128}
129
130U_NAMESPACE_END
131U_CDECL_BEGIN
132static void U_CALLCONV _deleteEngine(void *obj) {
46f4442e 133 delete (const U_NAMESPACE_QUALIFIER LanguageBreakEngine *) obj;
73c04bcf
A
134}
135U_CDECL_END
136U_NAMESPACE_BEGIN
137
138const LanguageBreakEngine *
139ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
140 UBool needsInit;
141 int32_t i;
142 const LanguageBreakEngine *lbe = NULL;
143 UErrorCode status = U_ZERO_ERROR;
144
46f4442e
A
145 // TODO: The global mutex should not be used.
146 // The global mutex should only be used for short periods.
147 // A ICULanguageBreakFactory specific mutex should be used.
73c04bcf
A
148 umtx_lock(NULL);
149 needsInit = (UBool)(fEngines == NULL);
150 if (!needsInit) {
151 i = fEngines->size();
152 while (--i >= 0) {
153 lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
154 if (lbe != NULL && lbe->handles(c, breakType)) {
155 break;
156 }
157 lbe = NULL;
158 }
159 }
160 umtx_unlock(NULL);
161
162 if (lbe != NULL) {
163 return lbe;
164 }
165
166 if (needsInit) {
167 UStack *engines = new UStack(_deleteEngine, NULL, status);
168 if (U_SUCCESS(status) && engines == NULL) {
169 status = U_MEMORY_ALLOCATION_ERROR;
170 }
171 else if (U_FAILURE(status)) {
172 delete engines;
173 engines = NULL;
174 }
175 else {
176 umtx_lock(NULL);
177 if (fEngines == NULL) {
178 fEngines = engines;
179 engines = NULL;
180 }
181 umtx_unlock(NULL);
182 delete engines;
183 }
184 }
185
186 if (fEngines == NULL) {
187 return NULL;
188 }
189
190 // We didn't find an engine the first time through, or there was no
191 // stack. Create an engine.
192 const LanguageBreakEngine *newlbe = loadEngineFor(c, breakType);
193
194 // Now get the lock, and see if someone else has created it in the
195 // meantime
196 umtx_lock(NULL);
197 i = fEngines->size();
198 while (--i >= 0) {
199 lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
200 if (lbe != NULL && lbe->handles(c, breakType)) {
201 break;
202 }
203 lbe = NULL;
204 }
205 if (lbe == NULL && newlbe != NULL) {
206 fEngines->push((void *)newlbe, status);
207 lbe = newlbe;
208 newlbe = NULL;
209 }
210 umtx_unlock(NULL);
211
212 delete newlbe;
213
214 return lbe;
215}
216
217const LanguageBreakEngine *
218ICULanguageBreakFactory::loadEngineFor(UChar32 c, int32_t breakType) {
219 UErrorCode status = U_ZERO_ERROR;
220 UScriptCode code = uscript_getScript(c, &status);
221 if (U_SUCCESS(status)) {
222 const CompactTrieDictionary *dict = loadDictionaryFor(code, breakType);
223 if (dict != NULL) {
224 const LanguageBreakEngine *engine = NULL;
225 switch(code) {
226 case USCRIPT_THAI:
227 engine = new ThaiBreakEngine(dict, status);
228 break;
229 default:
230 break;
231 }
232 if (engine == NULL) {
233 delete dict;
234 }
235 else if (U_FAILURE(status)) {
236 delete engine;
237 engine = NULL;
238 }
239 return engine;
240 }
241 }
242 return NULL;
243}
244
245const CompactTrieDictionary *
46f4442e 246ICULanguageBreakFactory::loadDictionaryFor(UScriptCode script, int32_t /*breakType*/) {
73c04bcf
A
247 UErrorCode status = U_ZERO_ERROR;
248 // Open root from brkitr tree.
249 char dictnbuff[256];
250 char ext[4]={'\0'};
251
252 UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
253 b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
254 b = ures_getByKeyWithFallback(b, uscript_getShortName(script), b, &status);
255 int32_t dictnlength = 0;
256 const UChar *dictfname = ures_getString(b, &dictnlength, &status);
257 if (U_SUCCESS(status) && (size_t)dictnlength >= sizeof(dictnbuff)) {
258 dictnlength = 0;
259 status = U_BUFFER_OVERFLOW_ERROR;
260 }
261 if (U_SUCCESS(status) && dictfname) {
262 UChar* extStart=u_strchr(dictfname, 0x002e);
263 int len = 0;
264 if(extStart!=NULL){
729e4ab9 265 len = (int)(extStart-dictfname);
73c04bcf
A
266 u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
267 u_UCharsToChars(dictfname, dictnbuff, len);
268 }
269 dictnbuff[len]=0; // nul terminate
270 }
271 ures_close(b);
272 UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext, dictnbuff, &status);
273 if (U_SUCCESS(status)) {
274 const CompactTrieDictionary *dict = new CompactTrieDictionary(
275 file, status);
276 if (U_SUCCESS(status) && dict == NULL) {
277 status = U_MEMORY_ALLOCATION_ERROR;
278 }
279 if (U_FAILURE(status)) {
280 delete dict;
281 dict = NULL;
282 }
283 return dict;
284 }
285 return NULL;
286}
287
288U_NAMESPACE_END
289
290#endif /* #if !UCONFIG_NO_BREAK_ITERATION */