]>
Commit | Line | Data |
---|---|---|
729e4ab9 A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 2009-2010, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: normalizer2.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2009nov22 | |
14 | * created by: Markus W. Scherer | |
15 | */ | |
16 | ||
17 | #include "unicode/utypes.h" | |
18 | ||
19 | #if !UCONFIG_NO_NORMALIZATION | |
20 | ||
21 | #include "unicode/localpointer.h" | |
22 | #include "unicode/normalizer2.h" | |
23 | #include "unicode/unistr.h" | |
24 | #include "unicode/unorm.h" | |
25 | #include "cpputils.h" | |
26 | #include "cstring.h" | |
27 | #include "mutex.h" | |
28 | #include "normalizer2impl.h" | |
29 | #include "ucln_cmn.h" | |
30 | #include "uhash.h" | |
31 | ||
32 | U_NAMESPACE_BEGIN | |
33 | ||
34 | // Public API dispatch via Normalizer2 subclasses -------------------------- *** | |
35 | ||
36 | // Normalizer2 implementation for the old UNORM_NONE. | |
37 | class NoopNormalizer2 : public Normalizer2 { | |
38 | virtual UnicodeString & | |
39 | normalize(const UnicodeString &src, | |
40 | UnicodeString &dest, | |
41 | UErrorCode &errorCode) const { | |
42 | if(U_SUCCESS(errorCode)) { | |
43 | if(&dest!=&src) { | |
44 | dest=src; | |
45 | } else { | |
46 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
47 | } | |
48 | } | |
49 | return dest; | |
50 | } | |
51 | virtual UnicodeString & | |
52 | normalizeSecondAndAppend(UnicodeString &first, | |
53 | const UnicodeString &second, | |
54 | UErrorCode &errorCode) const { | |
55 | if(U_SUCCESS(errorCode)) { | |
56 | if(&first!=&second) { | |
57 | first.append(second); | |
58 | } else { | |
59 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
60 | } | |
61 | } | |
62 | return first; | |
63 | } | |
64 | virtual UnicodeString & | |
65 | append(UnicodeString &first, | |
66 | const UnicodeString &second, | |
67 | UErrorCode &errorCode) const { | |
68 | if(U_SUCCESS(errorCode)) { | |
69 | if(&first!=&second) { | |
70 | first.append(second); | |
71 | } else { | |
72 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
73 | } | |
74 | } | |
75 | return first; | |
76 | } | |
77 | virtual UBool | |
78 | getDecomposition(UChar32, UnicodeString &) const { | |
79 | return FALSE; | |
80 | } | |
81 | virtual UBool | |
82 | isNormalized(const UnicodeString &, UErrorCode &) const { | |
83 | return TRUE; | |
84 | } | |
85 | virtual UNormalizationCheckResult | |
86 | quickCheck(const UnicodeString &, UErrorCode &) const { | |
87 | return UNORM_YES; | |
88 | } | |
89 | virtual int32_t | |
90 | spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { | |
91 | return s.length(); | |
92 | } | |
93 | virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; } | |
94 | virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; } | |
95 | virtual UBool isInert(UChar32) const { return TRUE; } | |
96 | }; | |
97 | ||
98 | // Intermediate class: | |
99 | // Has Normalizer2Impl and does boilerplate argument checking and setup. | |
100 | class Normalizer2WithImpl : public Normalizer2 { | |
101 | public: | |
102 | Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} | |
103 | ||
104 | // normalize | |
105 | virtual UnicodeString & | |
106 | normalize(const UnicodeString &src, | |
107 | UnicodeString &dest, | |
108 | UErrorCode &errorCode) const { | |
109 | if(U_FAILURE(errorCode)) { | |
110 | dest.setToBogus(); | |
111 | return dest; | |
112 | } | |
113 | const UChar *sArray=src.getBuffer(); | |
114 | if(&dest==&src || sArray==NULL) { | |
115 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
116 | dest.setToBogus(); | |
117 | return dest; | |
118 | } | |
119 | dest.remove(); | |
120 | ReorderingBuffer buffer(impl, dest); | |
121 | if(buffer.init(src.length(), errorCode)) { | |
122 | normalize(sArray, sArray+src.length(), buffer, errorCode); | |
123 | } | |
124 | return dest; | |
125 | } | |
126 | virtual void | |
127 | normalize(const UChar *src, const UChar *limit, | |
128 | ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; | |
129 | ||
130 | // normalize and append | |
131 | virtual UnicodeString & | |
132 | normalizeSecondAndAppend(UnicodeString &first, | |
133 | const UnicodeString &second, | |
134 | UErrorCode &errorCode) const { | |
135 | return normalizeSecondAndAppend(first, second, TRUE, errorCode); | |
136 | } | |
137 | virtual UnicodeString & | |
138 | append(UnicodeString &first, | |
139 | const UnicodeString &second, | |
140 | UErrorCode &errorCode) const { | |
141 | return normalizeSecondAndAppend(first, second, FALSE, errorCode); | |
142 | } | |
143 | UnicodeString & | |
144 | normalizeSecondAndAppend(UnicodeString &first, | |
145 | const UnicodeString &second, | |
146 | UBool doNormalize, | |
147 | UErrorCode &errorCode) const { | |
148 | uprv_checkCanGetBuffer(first, errorCode); | |
149 | if(U_FAILURE(errorCode)) { | |
150 | return first; | |
151 | } | |
152 | const UChar *secondArray=second.getBuffer(); | |
153 | if(&first==&second || secondArray==NULL) { | |
154 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
155 | return first; | |
156 | } | |
157 | ReorderingBuffer buffer(impl, first); | |
158 | if(buffer.init(first.length()+second.length(), errorCode)) { | |
159 | normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, | |
160 | buffer, errorCode); | |
161 | } | |
162 | return first; | |
163 | } | |
164 | virtual void | |
165 | normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, | |
166 | ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; | |
167 | virtual UBool | |
168 | getDecomposition(UChar32 c, UnicodeString &decomposition) const { | |
169 | UChar buffer[4]; | |
170 | int32_t length; | |
171 | const UChar *d=impl.getDecomposition(c, buffer, length); | |
172 | if(d==NULL) { | |
173 | return FALSE; | |
174 | } | |
175 | if(d==buffer) { | |
176 | decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) | |
177 | } else { | |
178 | decomposition.setTo(FALSE, d, length); // read-only alias | |
179 | } | |
180 | return TRUE; | |
181 | } | |
182 | ||
183 | // quick checks | |
184 | virtual UBool | |
185 | isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { | |
186 | if(U_FAILURE(errorCode)) { | |
187 | return FALSE; | |
188 | } | |
189 | const UChar *sArray=s.getBuffer(); | |
190 | if(sArray==NULL) { | |
191 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
192 | return FALSE; | |
193 | } | |
194 | const UChar *sLimit=sArray+s.length(); | |
195 | return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); | |
196 | } | |
197 | virtual UNormalizationCheckResult | |
198 | quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { | |
199 | return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; | |
200 | } | |
201 | virtual int32_t | |
202 | spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { | |
203 | if(U_FAILURE(errorCode)) { | |
204 | return 0; | |
205 | } | |
206 | const UChar *sArray=s.getBuffer(); | |
207 | if(sArray==NULL) { | |
208 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
209 | return 0; | |
210 | } | |
211 | return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); | |
212 | } | |
213 | virtual const UChar * | |
214 | spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; | |
215 | ||
216 | virtual UNormalizationCheckResult getQuickCheck(UChar32) const { | |
217 | return UNORM_YES; | |
218 | } | |
219 | ||
220 | const Normalizer2Impl &impl; | |
221 | }; | |
222 | ||
223 | class DecomposeNormalizer2 : public Normalizer2WithImpl { | |
224 | public: | |
225 | DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} | |
226 | ||
227 | private: | |
228 | virtual void | |
229 | normalize(const UChar *src, const UChar *limit, | |
230 | ReorderingBuffer &buffer, UErrorCode &errorCode) const { | |
231 | impl.decompose(src, limit, &buffer, errorCode); | |
232 | } | |
233 | using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. | |
234 | virtual void | |
235 | normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, | |
236 | ReorderingBuffer &buffer, UErrorCode &errorCode) const { | |
237 | impl.decomposeAndAppend(src, limit, doNormalize, buffer, errorCode); | |
238 | } | |
239 | virtual const UChar * | |
240 | spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { | |
241 | return impl.decompose(src, limit, NULL, errorCode); | |
242 | } | |
243 | using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. | |
244 | virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { | |
245 | return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; | |
246 | } | |
247 | virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } | |
248 | virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } | |
249 | virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } | |
250 | }; | |
251 | ||
252 | class ComposeNormalizer2 : public Normalizer2WithImpl { | |
253 | public: | |
254 | ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : | |
255 | Normalizer2WithImpl(ni), onlyContiguous(fcc) {} | |
256 | ||
257 | private: | |
258 | virtual void | |
259 | normalize(const UChar *src, const UChar *limit, | |
260 | ReorderingBuffer &buffer, UErrorCode &errorCode) const { | |
261 | impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); | |
262 | } | |
263 | using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. | |
264 | virtual void | |
265 | normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, | |
266 | ReorderingBuffer &buffer, UErrorCode &errorCode) const { | |
267 | impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, buffer, errorCode); | |
268 | } | |
269 | ||
270 | virtual UBool | |
271 | isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { | |
272 | if(U_FAILURE(errorCode)) { | |
273 | return FALSE; | |
274 | } | |
275 | const UChar *sArray=s.getBuffer(); | |
276 | if(sArray==NULL) { | |
277 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
278 | return FALSE; | |
279 | } | |
280 | UnicodeString temp; | |
281 | ReorderingBuffer buffer(impl, temp); | |
282 | if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization | |
283 | return FALSE; | |
284 | } | |
285 | return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); | |
286 | } | |
287 | virtual UNormalizationCheckResult | |
288 | quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { | |
289 | if(U_FAILURE(errorCode)) { | |
290 | return UNORM_MAYBE; | |
291 | } | |
292 | const UChar *sArray=s.getBuffer(); | |
293 | if(sArray==NULL) { | |
294 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
295 | return UNORM_MAYBE; | |
296 | } | |
297 | UNormalizationCheckResult qcResult=UNORM_YES; | |
298 | impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); | |
299 | return qcResult; | |
300 | } | |
301 | virtual const UChar * | |
302 | spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { | |
303 | return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); | |
304 | } | |
305 | using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. | |
306 | virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { | |
307 | return impl.getCompQuickCheck(impl.getNorm16(c)); | |
308 | } | |
309 | virtual UBool hasBoundaryBefore(UChar32 c) const { | |
310 | return impl.hasCompBoundaryBefore(c); | |
311 | } | |
312 | virtual UBool hasBoundaryAfter(UChar32 c) const { | |
313 | return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); | |
314 | } | |
315 | virtual UBool isInert(UChar32 c) const { | |
316 | return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); | |
317 | } | |
318 | ||
319 | const UBool onlyContiguous; | |
320 | }; | |
321 | ||
322 | class FCDNormalizer2 : public Normalizer2WithImpl { | |
323 | public: | |
324 | FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} | |
325 | ||
326 | private: | |
327 | virtual void | |
328 | normalize(const UChar *src, const UChar *limit, | |
329 | ReorderingBuffer &buffer, UErrorCode &errorCode) const { | |
330 | impl.makeFCD(src, limit, &buffer, errorCode); | |
331 | } | |
332 | using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. | |
333 | virtual void | |
334 | normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, | |
335 | ReorderingBuffer &buffer, UErrorCode &errorCode) const { | |
336 | impl.makeFCDAndAppend(src, limit, doNormalize, buffer, errorCode); | |
337 | } | |
338 | virtual const UChar * | |
339 | spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { | |
340 | return impl.makeFCD(src, limit, NULL, errorCode); | |
341 | } | |
342 | using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. | |
343 | virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } | |
344 | virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } | |
345 | virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } | |
346 | }; | |
347 | ||
348 | // instance cache ---------------------------------------------------------- *** | |
349 | ||
350 | struct Norm2AllModes : public UMemory { | |
351 | static Norm2AllModes *createInstance(const char *packageName, | |
352 | const char *name, | |
353 | UErrorCode &errorCode); | |
354 | Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {} | |
355 | ||
356 | Normalizer2Impl impl; | |
357 | ComposeNormalizer2 comp; | |
358 | DecomposeNormalizer2 decomp; | |
359 | FCDNormalizer2 fcd; | |
360 | ComposeNormalizer2 fcc; | |
361 | }; | |
362 | ||
363 | Norm2AllModes * | |
364 | Norm2AllModes::createInstance(const char *packageName, | |
365 | const char *name, | |
366 | UErrorCode &errorCode) { | |
367 | if(U_FAILURE(errorCode)) { | |
368 | return NULL; | |
369 | } | |
370 | LocalPointer<Norm2AllModes> allModes(new Norm2AllModes); | |
371 | if(allModes.isNull()) { | |
372 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
373 | return NULL; | |
374 | } | |
375 | allModes->impl.load(packageName, name, errorCode); | |
376 | return U_SUCCESS(errorCode) ? allModes.orphan() : NULL; | |
377 | } | |
378 | ||
379 | U_CDECL_BEGIN | |
380 | static UBool U_CALLCONV uprv_normalizer2_cleanup(); | |
381 | U_CDECL_END | |
382 | ||
383 | class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> { | |
384 | public: | |
385 | Norm2AllModesSingleton(TriStateSingleton &s, const char *n) : | |
386 | TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {} | |
387 | Norm2AllModes *getInstance(UErrorCode &errorCode) { | |
388 | return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode); | |
389 | } | |
390 | private: | |
391 | static void *createInstance(const void *context, UErrorCode &errorCode) { | |
392 | ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); | |
393 | return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode); | |
394 | } | |
395 | ||
396 | const char *name; | |
397 | }; | |
398 | ||
399 | STATIC_TRI_STATE_SINGLETON(nfcSingleton); | |
400 | STATIC_TRI_STATE_SINGLETON(nfkcSingleton); | |
401 | STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton); | |
402 | ||
403 | class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> { | |
404 | public: | |
405 | Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {} | |
406 | Normalizer2 *getInstance(UErrorCode &errorCode) { | |
407 | return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode); | |
408 | } | |
409 | private: | |
410 | static void *createInstance(const void *, UErrorCode &errorCode) { | |
411 | Normalizer2 *noop=new NoopNormalizer2; | |
412 | if(noop==NULL) { | |
413 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
414 | } | |
415 | ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); | |
416 | return noop; | |
417 | } | |
418 | }; | |
419 | ||
420 | STATIC_SIMPLE_SINGLETON(noopSingleton); | |
421 | ||
422 | static UHashtable *cache=NULL; | |
423 | ||
424 | U_CDECL_BEGIN | |
425 | ||
426 | static void U_CALLCONV deleteNorm2AllModes(void *allModes) { | |
427 | delete (Norm2AllModes *)allModes; | |
428 | } | |
429 | ||
430 | static UBool U_CALLCONV uprv_normalizer2_cleanup() { | |
431 | Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance(); | |
432 | Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance(); | |
433 | Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance(); | |
434 | Norm2Singleton(noopSingleton).deleteInstance(); | |
435 | uhash_close(cache); | |
436 | cache=NULL; | |
437 | return TRUE; | |
438 | } | |
439 | ||
440 | U_CDECL_END | |
441 | ||
442 | const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) { | |
443 | Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); | |
444 | return allModes!=NULL ? &allModes->comp : NULL; | |
445 | } | |
446 | ||
447 | const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) { | |
448 | Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); | |
449 | return allModes!=NULL ? &allModes->decomp : NULL; | |
450 | } | |
451 | ||
452 | const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { | |
453 | Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); | |
454 | if(allModes!=NULL) { | |
455 | allModes->impl.getFCDTrie(errorCode); | |
456 | return &allModes->fcd; | |
457 | } else { | |
458 | return NULL; | |
459 | } | |
460 | } | |
461 | ||
462 | const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { | |
463 | Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); | |
464 | return allModes!=NULL ? &allModes->fcc : NULL; | |
465 | } | |
466 | ||
467 | const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) { | |
468 | Norm2AllModes *allModes= | |
469 | Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); | |
470 | return allModes!=NULL ? &allModes->comp : NULL; | |
471 | } | |
472 | ||
473 | const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) { | |
474 | Norm2AllModes *allModes= | |
475 | Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); | |
476 | return allModes!=NULL ? &allModes->decomp : NULL; | |
477 | } | |
478 | ||
479 | const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) { | |
480 | Norm2AllModes *allModes= | |
481 | Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); | |
482 | return allModes!=NULL ? &allModes->comp : NULL; | |
483 | } | |
484 | ||
485 | const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { | |
486 | return Norm2Singleton(noopSingleton).getInstance(errorCode); | |
487 | } | |
488 | ||
489 | const Normalizer2 * | |
490 | Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { | |
491 | if(U_FAILURE(errorCode)) { | |
492 | return NULL; | |
493 | } | |
494 | switch(mode) { | |
495 | case UNORM_NFD: | |
496 | return getNFDInstance(errorCode); | |
497 | case UNORM_NFKD: | |
498 | return getNFKDInstance(errorCode); | |
499 | case UNORM_NFC: | |
500 | return getNFCInstance(errorCode); | |
501 | case UNORM_NFKC: | |
502 | return getNFKCInstance(errorCode); | |
503 | case UNORM_FCD: | |
504 | return getFCDInstance(errorCode); | |
505 | default: // UNORM_NONE | |
506 | return getNoopInstance(errorCode); | |
507 | } | |
508 | } | |
509 | ||
510 | const Normalizer2Impl * | |
511 | Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { | |
512 | Norm2AllModes *allModes= | |
513 | Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); | |
514 | return allModes!=NULL ? &allModes->impl : NULL; | |
515 | } | |
516 | ||
517 | const Normalizer2Impl * | |
518 | Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { | |
519 | Norm2AllModes *allModes= | |
520 | Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); | |
521 | return allModes!=NULL ? &allModes->impl : NULL; | |
522 | } | |
523 | ||
524 | const Normalizer2Impl * | |
525 | Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { | |
526 | Norm2AllModes *allModes= | |
527 | Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); | |
528 | return allModes!=NULL ? &allModes->impl : NULL; | |
529 | } | |
530 | ||
531 | const Normalizer2Impl * | |
532 | Normalizer2Factory::getImpl(const Normalizer2 *norm2) { | |
533 | return &((Normalizer2WithImpl *)norm2)->impl; | |
534 | } | |
535 | ||
536 | const UTrie2 * | |
537 | Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) { | |
538 | Norm2AllModes *allModes= | |
539 | Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); | |
540 | if(allModes!=NULL) { | |
541 | return allModes->impl.getFCDTrie(errorCode); | |
542 | } else { | |
543 | return NULL; | |
544 | } | |
545 | } | |
546 | ||
547 | const Normalizer2 * | |
548 | Normalizer2::getInstance(const char *packageName, | |
549 | const char *name, | |
550 | UNormalization2Mode mode, | |
551 | UErrorCode &errorCode) { | |
552 | if(U_FAILURE(errorCode)) { | |
553 | return NULL; | |
554 | } | |
555 | if(name==NULL || *name==0) { | |
556 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
557 | } | |
558 | Norm2AllModes *allModes=NULL; | |
559 | if(packageName==NULL) { | |
560 | if(0==uprv_strcmp(name, "nfc")) { | |
561 | allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); | |
562 | } else if(0==uprv_strcmp(name, "nfkc")) { | |
563 | allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); | |
564 | } else if(0==uprv_strcmp(name, "nfkc_cf")) { | |
565 | allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); | |
566 | } | |
567 | } | |
568 | if(allModes==NULL && U_SUCCESS(errorCode)) { | |
569 | { | |
570 | Mutex lock; | |
571 | if(cache!=NULL) { | |
572 | allModes=(Norm2AllModes *)uhash_get(cache, name); | |
573 | } | |
574 | } | |
575 | if(allModes==NULL) { | |
576 | LocalPointer<Norm2AllModes> localAllModes( | |
577 | Norm2AllModes::createInstance(packageName, name, errorCode)); | |
578 | if(U_SUCCESS(errorCode)) { | |
579 | Mutex lock; | |
580 | if(cache==NULL) { | |
581 | cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); | |
582 | if(U_FAILURE(errorCode)) { | |
583 | return NULL; | |
584 | } | |
585 | uhash_setKeyDeleter(cache, uprv_free); | |
586 | uhash_setValueDeleter(cache, deleteNorm2AllModes); | |
587 | } | |
588 | void *temp=uhash_get(cache, name); | |
589 | if(temp==NULL) { | |
590 | int32_t keyLength=uprv_strlen(name)+1; | |
591 | char *nameCopy=(char *)uprv_malloc(keyLength); | |
592 | if(nameCopy==NULL) { | |
593 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
594 | return NULL; | |
595 | } | |
596 | uprv_memcpy(nameCopy, name, keyLength); | |
597 | uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode); | |
598 | } else { | |
599 | // race condition | |
600 | allModes=(Norm2AllModes *)temp; | |
601 | } | |
602 | } | |
603 | } | |
604 | } | |
605 | if(allModes!=NULL && U_SUCCESS(errorCode)) { | |
606 | switch(mode) { | |
607 | case UNORM2_COMPOSE: | |
608 | return &allModes->comp; | |
609 | case UNORM2_DECOMPOSE: | |
610 | return &allModes->decomp; | |
611 | case UNORM2_FCD: | |
612 | allModes->impl.getFCDTrie(errorCode); | |
613 | return &allModes->fcd; | |
614 | case UNORM2_COMPOSE_CONTIGUOUS: | |
615 | return &allModes->fcc; | |
616 | default: | |
617 | break; // do nothing | |
618 | } | |
619 | } | |
620 | return NULL; | |
621 | } | |
622 | ||
623 | UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2) | |
624 | ||
625 | U_NAMESPACE_END | |
626 | ||
627 | // C API ------------------------------------------------------------------- *** | |
628 | ||
629 | U_NAMESPACE_USE | |
630 | ||
631 | U_DRAFT const UNormalizer2 * U_EXPORT2 | |
632 | unorm2_getInstance(const char *packageName, | |
633 | const char *name, | |
634 | UNormalization2Mode mode, | |
635 | UErrorCode *pErrorCode) { | |
636 | return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); | |
637 | } | |
638 | ||
639 | U_DRAFT void U_EXPORT2 | |
640 | unorm2_close(UNormalizer2 *norm2) { | |
641 | delete (Normalizer2 *)norm2; | |
642 | } | |
643 | ||
644 | U_DRAFT int32_t U_EXPORT2 | |
645 | unorm2_normalize(const UNormalizer2 *norm2, | |
646 | const UChar *src, int32_t length, | |
647 | UChar *dest, int32_t capacity, | |
648 | UErrorCode *pErrorCode) { | |
649 | if(U_FAILURE(*pErrorCode)) { | |
650 | return 0; | |
651 | } | |
652 | if( (src==NULL ? length!=0 : length<-1) || | |
653 | (dest==NULL ? capacity!=0 : capacity<0) || | |
654 | (src==dest && src!=NULL) | |
655 | ) { | |
656 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
657 | return 0; | |
658 | } | |
659 | UnicodeString destString(dest, 0, capacity); | |
660 | // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash. | |
661 | if(length!=0) { | |
662 | const Normalizer2 *n2=(const Normalizer2 *)norm2; | |
663 | const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); | |
664 | if(n2wi!=NULL) { | |
665 | // Avoid duplicate argument checking and support NUL-terminated src. | |
666 | ReorderingBuffer buffer(n2wi->impl, destString); | |
667 | if(buffer.init(length, *pErrorCode)) { | |
668 | n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode); | |
669 | } | |
670 | } else { | |
671 | UnicodeString srcString(length<0, src, length); | |
672 | n2->normalize(srcString, destString, *pErrorCode); | |
673 | } | |
674 | } | |
675 | return destString.extract(dest, capacity, *pErrorCode); | |
676 | } | |
677 | ||
678 | static int32_t | |
679 | normalizeSecondAndAppend(const UNormalizer2 *norm2, | |
680 | UChar *first, int32_t firstLength, int32_t firstCapacity, | |
681 | const UChar *second, int32_t secondLength, | |
682 | UBool doNormalize, | |
683 | UErrorCode *pErrorCode) { | |
684 | if(U_FAILURE(*pErrorCode)) { | |
685 | return 0; | |
686 | } | |
687 | if( (second==NULL ? secondLength!=0 : secondLength<-1) || | |
688 | (first==NULL ? (firstCapacity!=0 || firstLength!=0) : | |
689 | (firstCapacity<0 || firstLength<-1)) || | |
690 | (first==second && first!=NULL) | |
691 | ) { | |
692 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
693 | return 0; | |
694 | } | |
695 | UnicodeString firstString(first, firstLength, firstCapacity); | |
696 | // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash. | |
697 | if(secondLength!=0) { | |
698 | const Normalizer2 *n2=(const Normalizer2 *)norm2; | |
699 | const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); | |
700 | if(n2wi!=NULL) { | |
701 | // Avoid duplicate argument checking and support NUL-terminated src. | |
702 | ReorderingBuffer buffer(n2wi->impl, firstString); | |
703 | if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1 | |
704 | n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL, | |
705 | doNormalize, buffer, *pErrorCode); | |
706 | } | |
707 | } else { | |
708 | UnicodeString secondString(secondLength<0, second, secondLength); | |
709 | if(doNormalize) { | |
710 | n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode); | |
711 | } else { | |
712 | n2->append(firstString, secondString, *pErrorCode); | |
713 | } | |
714 | } | |
715 | } | |
716 | return firstString.extract(first, firstCapacity, *pErrorCode); | |
717 | } | |
718 | ||
719 | U_DRAFT int32_t U_EXPORT2 | |
720 | unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, | |
721 | UChar *first, int32_t firstLength, int32_t firstCapacity, | |
722 | const UChar *second, int32_t secondLength, | |
723 | UErrorCode *pErrorCode) { | |
724 | return normalizeSecondAndAppend(norm2, | |
725 | first, firstLength, firstCapacity, | |
726 | second, secondLength, | |
727 | TRUE, pErrorCode); | |
728 | } | |
729 | ||
730 | U_DRAFT int32_t U_EXPORT2 | |
731 | unorm2_append(const UNormalizer2 *norm2, | |
732 | UChar *first, int32_t firstLength, int32_t firstCapacity, | |
733 | const UChar *second, int32_t secondLength, | |
734 | UErrorCode *pErrorCode) { | |
735 | return normalizeSecondAndAppend(norm2, | |
736 | first, firstLength, firstCapacity, | |
737 | second, secondLength, | |
738 | FALSE, pErrorCode); | |
739 | } | |
740 | ||
741 | U_DRAFT int32_t U_EXPORT2 | |
742 | unorm2_getDecomposition(const UNormalizer2 *norm2, | |
743 | UChar32 c, UChar *decomposition, int32_t capacity, | |
744 | UErrorCode *pErrorCode) { | |
745 | if(U_FAILURE(*pErrorCode)) { | |
746 | return 0; | |
747 | } | |
748 | if(decomposition==NULL ? capacity!=0 : capacity<0) { | |
749 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
750 | return 0; | |
751 | } | |
752 | UnicodeString destString(decomposition, 0, capacity); | |
753 | if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) { | |
754 | return destString.extract(decomposition, capacity, *pErrorCode); | |
755 | } else { | |
756 | return -1; | |
757 | } | |
758 | } | |
759 | ||
760 | U_DRAFT UBool U_EXPORT2 | |
761 | unorm2_isNormalized(const UNormalizer2 *norm2, | |
762 | const UChar *s, int32_t length, | |
763 | UErrorCode *pErrorCode) { | |
764 | if(U_FAILURE(*pErrorCode)) { | |
765 | return 0; | |
766 | } | |
767 | if((s==NULL && length!=0) || length<-1) { | |
768 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
769 | return 0; | |
770 | } | |
771 | UnicodeString sString(length<0, s, length); | |
772 | return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); | |
773 | } | |
774 | ||
775 | U_DRAFT UNormalizationCheckResult U_EXPORT2 | |
776 | unorm2_quickCheck(const UNormalizer2 *norm2, | |
777 | const UChar *s, int32_t length, | |
778 | UErrorCode *pErrorCode) { | |
779 | if(U_FAILURE(*pErrorCode)) { | |
780 | return UNORM_NO; | |
781 | } | |
782 | if((s==NULL && length!=0) || length<-1) { | |
783 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
784 | return UNORM_NO; | |
785 | } | |
786 | UnicodeString sString(length<0, s, length); | |
787 | return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); | |
788 | } | |
789 | ||
790 | U_DRAFT int32_t U_EXPORT2 | |
791 | unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, | |
792 | const UChar *s, int32_t length, | |
793 | UErrorCode *pErrorCode) { | |
794 | if(U_FAILURE(*pErrorCode)) { | |
795 | return 0; | |
796 | } | |
797 | if((s==NULL && length!=0) || length<-1) { | |
798 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; | |
799 | return 0; | |
800 | } | |
801 | UnicodeString sString(length<0, s, length); | |
802 | return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode); | |
803 | } | |
804 | ||
805 | U_DRAFT UBool U_EXPORT2 | |
806 | unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { | |
807 | return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); | |
808 | } | |
809 | ||
810 | U_DRAFT UBool U_EXPORT2 | |
811 | unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { | |
812 | return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); | |
813 | } | |
814 | ||
815 | U_DRAFT UBool U_EXPORT2 | |
816 | unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { | |
817 | return ((const Normalizer2 *)norm2)->isInert(c); | |
818 | } | |
819 | ||
820 | // Some properties APIs ---------------------------------------------------- *** | |
821 | ||
822 | U_CFUNC UNormalizationCheckResult U_EXPORT2 | |
823 | unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { | |
824 | if(mode<=UNORM_NONE || UNORM_FCD<=mode) { | |
825 | return UNORM_YES; | |
826 | } | |
827 | UErrorCode errorCode=U_ZERO_ERROR; | |
828 | const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); | |
829 | if(U_SUCCESS(errorCode)) { | |
830 | return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); | |
831 | } else { | |
832 | return UNORM_MAYBE; | |
833 | } | |
834 | } | |
835 | ||
836 | U_CAPI const uint16_t * U_EXPORT2 | |
837 | unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) { | |
838 | const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode); | |
839 | if(U_SUCCESS(*pErrorCode)) { | |
840 | fcdHighStart=trie->highStart; | |
841 | return trie->index; | |
842 | } else { | |
843 | return NULL; | |
844 | } | |
845 | } | |
846 | ||
847 | #endif // !UCONFIG_NO_NORMALIZATION |