]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/normalizer2.cpp
ICU-461.18.tar.gz
[apple/icu.git] / icuSources / common / normalizer2.cpp
CommitLineData
729e4ab9
A
1/*
2*******************************************************************************
3*
4* Copyright (C) 2009-2010, International Business Machines
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: normalizer2.cpp
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2009nov22
14* created by: Markus W. Scherer
15*/
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_NORMALIZATION
20
21#include "unicode/localpointer.h"
22#include "unicode/normalizer2.h"
23#include "unicode/unistr.h"
24#include "unicode/unorm.h"
25#include "cpputils.h"
26#include "cstring.h"
27#include "mutex.h"
28#include "normalizer2impl.h"
29#include "ucln_cmn.h"
30#include "uhash.h"
31
32U_NAMESPACE_BEGIN
33
34// Public API dispatch via Normalizer2 subclasses -------------------------- ***
35
36// Normalizer2 implementation for the old UNORM_NONE.
37class NoopNormalizer2 : public Normalizer2 {
38 virtual UnicodeString &
39 normalize(const UnicodeString &src,
40 UnicodeString &dest,
41 UErrorCode &errorCode) const {
42 if(U_SUCCESS(errorCode)) {
43 if(&dest!=&src) {
44 dest=src;
45 } else {
46 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
47 }
48 }
49 return dest;
50 }
51 virtual UnicodeString &
52 normalizeSecondAndAppend(UnicodeString &first,
53 const UnicodeString &second,
54 UErrorCode &errorCode) const {
55 if(U_SUCCESS(errorCode)) {
56 if(&first!=&second) {
57 first.append(second);
58 } else {
59 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
60 }
61 }
62 return first;
63 }
64 virtual UnicodeString &
65 append(UnicodeString &first,
66 const UnicodeString &second,
67 UErrorCode &errorCode) const {
68 if(U_SUCCESS(errorCode)) {
69 if(&first!=&second) {
70 first.append(second);
71 } else {
72 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
73 }
74 }
75 return first;
76 }
77 virtual UBool
78 getDecomposition(UChar32, UnicodeString &) const {
79 return FALSE;
80 }
81 virtual UBool
82 isNormalized(const UnicodeString &, UErrorCode &) const {
83 return TRUE;
84 }
85 virtual UNormalizationCheckResult
86 quickCheck(const UnicodeString &, UErrorCode &) const {
87 return UNORM_YES;
88 }
89 virtual int32_t
90 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
91 return s.length();
92 }
93 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
94 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
95 virtual UBool isInert(UChar32) const { return TRUE; }
96};
97
98// Intermediate class:
99// Has Normalizer2Impl and does boilerplate argument checking and setup.
100class Normalizer2WithImpl : public Normalizer2 {
101public:
102 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
103
104 // normalize
105 virtual UnicodeString &
106 normalize(const UnicodeString &src,
107 UnicodeString &dest,
108 UErrorCode &errorCode) const {
109 if(U_FAILURE(errorCode)) {
110 dest.setToBogus();
111 return dest;
112 }
113 const UChar *sArray=src.getBuffer();
114 if(&dest==&src || sArray==NULL) {
115 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
116 dest.setToBogus();
117 return dest;
118 }
119 dest.remove();
120 ReorderingBuffer buffer(impl, dest);
121 if(buffer.init(src.length(), errorCode)) {
122 normalize(sArray, sArray+src.length(), buffer, errorCode);
123 }
124 return dest;
125 }
126 virtual void
127 normalize(const UChar *src, const UChar *limit,
128 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
129
130 // normalize and append
131 virtual UnicodeString &
132 normalizeSecondAndAppend(UnicodeString &first,
133 const UnicodeString &second,
134 UErrorCode &errorCode) const {
135 return normalizeSecondAndAppend(first, second, TRUE, errorCode);
136 }
137 virtual UnicodeString &
138 append(UnicodeString &first,
139 const UnicodeString &second,
140 UErrorCode &errorCode) const {
141 return normalizeSecondAndAppend(first, second, FALSE, errorCode);
142 }
143 UnicodeString &
144 normalizeSecondAndAppend(UnicodeString &first,
145 const UnicodeString &second,
146 UBool doNormalize,
147 UErrorCode &errorCode) const {
148 uprv_checkCanGetBuffer(first, errorCode);
149 if(U_FAILURE(errorCode)) {
150 return first;
151 }
152 const UChar *secondArray=second.getBuffer();
153 if(&first==&second || secondArray==NULL) {
154 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
155 return first;
156 }
157 ReorderingBuffer buffer(impl, first);
158 if(buffer.init(first.length()+second.length(), errorCode)) {
159 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
160 buffer, errorCode);
161 }
162 return first;
163 }
164 virtual void
165 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
166 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
167 virtual UBool
168 getDecomposition(UChar32 c, UnicodeString &decomposition) const {
169 UChar buffer[4];
170 int32_t length;
171 const UChar *d=impl.getDecomposition(c, buffer, length);
172 if(d==NULL) {
173 return FALSE;
174 }
175 if(d==buffer) {
176 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
177 } else {
178 decomposition.setTo(FALSE, d, length); // read-only alias
179 }
180 return TRUE;
181 }
182
183 // quick checks
184 virtual UBool
185 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
186 if(U_FAILURE(errorCode)) {
187 return FALSE;
188 }
189 const UChar *sArray=s.getBuffer();
190 if(sArray==NULL) {
191 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
192 return FALSE;
193 }
194 const UChar *sLimit=sArray+s.length();
195 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
196 }
197 virtual UNormalizationCheckResult
198 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
199 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
200 }
201 virtual int32_t
202 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
203 if(U_FAILURE(errorCode)) {
204 return 0;
205 }
206 const UChar *sArray=s.getBuffer();
207 if(sArray==NULL) {
208 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
209 return 0;
210 }
211 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
212 }
213 virtual const UChar *
214 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
215
216 virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
217 return UNORM_YES;
218 }
219
220 const Normalizer2Impl &impl;
221};
222
223class DecomposeNormalizer2 : public Normalizer2WithImpl {
224public:
225 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
226
227private:
228 virtual void
229 normalize(const UChar *src, const UChar *limit,
230 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
231 impl.decompose(src, limit, &buffer, errorCode);
232 }
233 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
234 virtual void
235 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
236 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
237 impl.decomposeAndAppend(src, limit, doNormalize, buffer, errorCode);
238 }
239 virtual const UChar *
240 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
241 return impl.decompose(src, limit, NULL, errorCode);
242 }
243 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
244 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
245 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
246 }
247 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
248 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
249 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
250};
251
252class ComposeNormalizer2 : public Normalizer2WithImpl {
253public:
254 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
255 Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
256
257private:
258 virtual void
259 normalize(const UChar *src, const UChar *limit,
260 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
261 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
262 }
263 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
264 virtual void
265 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
266 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
267 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, buffer, errorCode);
268 }
269
270 virtual UBool
271 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
272 if(U_FAILURE(errorCode)) {
273 return FALSE;
274 }
275 const UChar *sArray=s.getBuffer();
276 if(sArray==NULL) {
277 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
278 return FALSE;
279 }
280 UnicodeString temp;
281 ReorderingBuffer buffer(impl, temp);
282 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
283 return FALSE;
284 }
285 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
286 }
287 virtual UNormalizationCheckResult
288 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
289 if(U_FAILURE(errorCode)) {
290 return UNORM_MAYBE;
291 }
292 const UChar *sArray=s.getBuffer();
293 if(sArray==NULL) {
294 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
295 return UNORM_MAYBE;
296 }
297 UNormalizationCheckResult qcResult=UNORM_YES;
298 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
299 return qcResult;
300 }
301 virtual const UChar *
302 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
303 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
304 }
305 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
306 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
307 return impl.getCompQuickCheck(impl.getNorm16(c));
308 }
309 virtual UBool hasBoundaryBefore(UChar32 c) const {
310 return impl.hasCompBoundaryBefore(c);
311 }
312 virtual UBool hasBoundaryAfter(UChar32 c) const {
313 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
314 }
315 virtual UBool isInert(UChar32 c) const {
316 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
317 }
318
319 const UBool onlyContiguous;
320};
321
322class FCDNormalizer2 : public Normalizer2WithImpl {
323public:
324 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
325
326private:
327 virtual void
328 normalize(const UChar *src, const UChar *limit,
329 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
330 impl.makeFCD(src, limit, &buffer, errorCode);
331 }
332 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
333 virtual void
334 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
335 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
336 impl.makeFCDAndAppend(src, limit, doNormalize, buffer, errorCode);
337 }
338 virtual const UChar *
339 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
340 return impl.makeFCD(src, limit, NULL, errorCode);
341 }
342 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
343 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
344 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
345 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
346};
347
348// instance cache ---------------------------------------------------------- ***
349
350struct Norm2AllModes : public UMemory {
351 static Norm2AllModes *createInstance(const char *packageName,
352 const char *name,
353 UErrorCode &errorCode);
354 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
355
356 Normalizer2Impl impl;
357 ComposeNormalizer2 comp;
358 DecomposeNormalizer2 decomp;
359 FCDNormalizer2 fcd;
360 ComposeNormalizer2 fcc;
361};
362
363Norm2AllModes *
364Norm2AllModes::createInstance(const char *packageName,
365 const char *name,
366 UErrorCode &errorCode) {
367 if(U_FAILURE(errorCode)) {
368 return NULL;
369 }
370 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
371 if(allModes.isNull()) {
372 errorCode=U_MEMORY_ALLOCATION_ERROR;
373 return NULL;
374 }
375 allModes->impl.load(packageName, name, errorCode);
376 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
377}
378
379U_CDECL_BEGIN
380static UBool U_CALLCONV uprv_normalizer2_cleanup();
381U_CDECL_END
382
383class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> {
384public:
385 Norm2AllModesSingleton(TriStateSingleton &s, const char *n) :
386 TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {}
387 Norm2AllModes *getInstance(UErrorCode &errorCode) {
388 return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode);
389 }
390private:
391 static void *createInstance(const void *context, UErrorCode &errorCode) {
392 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
393 return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode);
394 }
395
396 const char *name;
397};
398
399STATIC_TRI_STATE_SINGLETON(nfcSingleton);
400STATIC_TRI_STATE_SINGLETON(nfkcSingleton);
401STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton);
402
403class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> {
404public:
405 Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {}
406 Normalizer2 *getInstance(UErrorCode &errorCode) {
407 return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode);
408 }
409private:
410 static void *createInstance(const void *, UErrorCode &errorCode) {
411 Normalizer2 *noop=new NoopNormalizer2;
412 if(noop==NULL) {
413 errorCode=U_MEMORY_ALLOCATION_ERROR;
414 }
415 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
416 return noop;
417 }
418};
419
420STATIC_SIMPLE_SINGLETON(noopSingleton);
421
422static UHashtable *cache=NULL;
423
424U_CDECL_BEGIN
425
426static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
427 delete (Norm2AllModes *)allModes;
428}
429
430static UBool U_CALLCONV uprv_normalizer2_cleanup() {
431 Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance();
432 Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance();
433 Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance();
434 Norm2Singleton(noopSingleton).deleteInstance();
435 uhash_close(cache);
436 cache=NULL;
437 return TRUE;
438}
439
440U_CDECL_END
441
442const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
443 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
444 return allModes!=NULL ? &allModes->comp : NULL;
445}
446
447const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
448 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
449 return allModes!=NULL ? &allModes->decomp : NULL;
450}
451
452const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
453 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
454 if(allModes!=NULL) {
455 allModes->impl.getFCDTrie(errorCode);
456 return &allModes->fcd;
457 } else {
458 return NULL;
459 }
460}
461
462const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
463 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
464 return allModes!=NULL ? &allModes->fcc : NULL;
465}
466
467const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
468 Norm2AllModes *allModes=
469 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
470 return allModes!=NULL ? &allModes->comp : NULL;
471}
472
473const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
474 Norm2AllModes *allModes=
475 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
476 return allModes!=NULL ? &allModes->decomp : NULL;
477}
478
479const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
480 Norm2AllModes *allModes=
481 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
482 return allModes!=NULL ? &allModes->comp : NULL;
483}
484
485const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
486 return Norm2Singleton(noopSingleton).getInstance(errorCode);
487}
488
489const Normalizer2 *
490Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
491 if(U_FAILURE(errorCode)) {
492 return NULL;
493 }
494 switch(mode) {
495 case UNORM_NFD:
496 return getNFDInstance(errorCode);
497 case UNORM_NFKD:
498 return getNFKDInstance(errorCode);
499 case UNORM_NFC:
500 return getNFCInstance(errorCode);
501 case UNORM_NFKC:
502 return getNFKCInstance(errorCode);
503 case UNORM_FCD:
504 return getFCDInstance(errorCode);
505 default: // UNORM_NONE
506 return getNoopInstance(errorCode);
507 }
508}
509
510const Normalizer2Impl *
511Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
512 Norm2AllModes *allModes=
513 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
514 return allModes!=NULL ? &allModes->impl : NULL;
515}
516
517const Normalizer2Impl *
518Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
519 Norm2AllModes *allModes=
520 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
521 return allModes!=NULL ? &allModes->impl : NULL;
522}
523
524const Normalizer2Impl *
525Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
526 Norm2AllModes *allModes=
527 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
528 return allModes!=NULL ? &allModes->impl : NULL;
529}
530
531const Normalizer2Impl *
532Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
533 return &((Normalizer2WithImpl *)norm2)->impl;
534}
535
536const UTrie2 *
537Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) {
538 Norm2AllModes *allModes=
539 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
540 if(allModes!=NULL) {
541 return allModes->impl.getFCDTrie(errorCode);
542 } else {
543 return NULL;
544 }
545}
546
547const Normalizer2 *
548Normalizer2::getInstance(const char *packageName,
549 const char *name,
550 UNormalization2Mode mode,
551 UErrorCode &errorCode) {
552 if(U_FAILURE(errorCode)) {
553 return NULL;
554 }
555 if(name==NULL || *name==0) {
556 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
557 }
558 Norm2AllModes *allModes=NULL;
559 if(packageName==NULL) {
560 if(0==uprv_strcmp(name, "nfc")) {
561 allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
562 } else if(0==uprv_strcmp(name, "nfkc")) {
563 allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
564 } else if(0==uprv_strcmp(name, "nfkc_cf")) {
565 allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
566 }
567 }
568 if(allModes==NULL && U_SUCCESS(errorCode)) {
569 {
570 Mutex lock;
571 if(cache!=NULL) {
572 allModes=(Norm2AllModes *)uhash_get(cache, name);
573 }
574 }
575 if(allModes==NULL) {
576 LocalPointer<Norm2AllModes> localAllModes(
577 Norm2AllModes::createInstance(packageName, name, errorCode));
578 if(U_SUCCESS(errorCode)) {
579 Mutex lock;
580 if(cache==NULL) {
581 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
582 if(U_FAILURE(errorCode)) {
583 return NULL;
584 }
585 uhash_setKeyDeleter(cache, uprv_free);
586 uhash_setValueDeleter(cache, deleteNorm2AllModes);
587 }
588 void *temp=uhash_get(cache, name);
589 if(temp==NULL) {
590 int32_t keyLength=uprv_strlen(name)+1;
591 char *nameCopy=(char *)uprv_malloc(keyLength);
592 if(nameCopy==NULL) {
593 errorCode=U_MEMORY_ALLOCATION_ERROR;
594 return NULL;
595 }
596 uprv_memcpy(nameCopy, name, keyLength);
597 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
598 } else {
599 // race condition
600 allModes=(Norm2AllModes *)temp;
601 }
602 }
603 }
604 }
605 if(allModes!=NULL && U_SUCCESS(errorCode)) {
606 switch(mode) {
607 case UNORM2_COMPOSE:
608 return &allModes->comp;
609 case UNORM2_DECOMPOSE:
610 return &allModes->decomp;
611 case UNORM2_FCD:
612 allModes->impl.getFCDTrie(errorCode);
613 return &allModes->fcd;
614 case UNORM2_COMPOSE_CONTIGUOUS:
615 return &allModes->fcc;
616 default:
617 break; // do nothing
618 }
619 }
620 return NULL;
621}
622
623UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2)
624
625U_NAMESPACE_END
626
627// C API ------------------------------------------------------------------- ***
628
629U_NAMESPACE_USE
630
631U_DRAFT const UNormalizer2 * U_EXPORT2
632unorm2_getInstance(const char *packageName,
633 const char *name,
634 UNormalization2Mode mode,
635 UErrorCode *pErrorCode) {
636 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
637}
638
639U_DRAFT void U_EXPORT2
640unorm2_close(UNormalizer2 *norm2) {
641 delete (Normalizer2 *)norm2;
642}
643
644U_DRAFT int32_t U_EXPORT2
645unorm2_normalize(const UNormalizer2 *norm2,
646 const UChar *src, int32_t length,
647 UChar *dest, int32_t capacity,
648 UErrorCode *pErrorCode) {
649 if(U_FAILURE(*pErrorCode)) {
650 return 0;
651 }
652 if( (src==NULL ? length!=0 : length<-1) ||
653 (dest==NULL ? capacity!=0 : capacity<0) ||
654 (src==dest && src!=NULL)
655 ) {
656 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
657 return 0;
658 }
659 UnicodeString destString(dest, 0, capacity);
660 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
661 if(length!=0) {
662 const Normalizer2 *n2=(const Normalizer2 *)norm2;
663 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
664 if(n2wi!=NULL) {
665 // Avoid duplicate argument checking and support NUL-terminated src.
666 ReorderingBuffer buffer(n2wi->impl, destString);
667 if(buffer.init(length, *pErrorCode)) {
668 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
669 }
670 } else {
671 UnicodeString srcString(length<0, src, length);
672 n2->normalize(srcString, destString, *pErrorCode);
673 }
674 }
675 return destString.extract(dest, capacity, *pErrorCode);
676}
677
678static int32_t
679normalizeSecondAndAppend(const UNormalizer2 *norm2,
680 UChar *first, int32_t firstLength, int32_t firstCapacity,
681 const UChar *second, int32_t secondLength,
682 UBool doNormalize,
683 UErrorCode *pErrorCode) {
684 if(U_FAILURE(*pErrorCode)) {
685 return 0;
686 }
687 if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
688 (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
689 (firstCapacity<0 || firstLength<-1)) ||
690 (first==second && first!=NULL)
691 ) {
692 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
693 return 0;
694 }
695 UnicodeString firstString(first, firstLength, firstCapacity);
696 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
697 if(secondLength!=0) {
698 const Normalizer2 *n2=(const Normalizer2 *)norm2;
699 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
700 if(n2wi!=NULL) {
701 // Avoid duplicate argument checking and support NUL-terminated src.
702 ReorderingBuffer buffer(n2wi->impl, firstString);
703 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1
704 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
705 doNormalize, buffer, *pErrorCode);
706 }
707 } else {
708 UnicodeString secondString(secondLength<0, second, secondLength);
709 if(doNormalize) {
710 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
711 } else {
712 n2->append(firstString, secondString, *pErrorCode);
713 }
714 }
715 }
716 return firstString.extract(first, firstCapacity, *pErrorCode);
717}
718
719U_DRAFT int32_t U_EXPORT2
720unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
721 UChar *first, int32_t firstLength, int32_t firstCapacity,
722 const UChar *second, int32_t secondLength,
723 UErrorCode *pErrorCode) {
724 return normalizeSecondAndAppend(norm2,
725 first, firstLength, firstCapacity,
726 second, secondLength,
727 TRUE, pErrorCode);
728}
729
730U_DRAFT int32_t U_EXPORT2
731unorm2_append(const UNormalizer2 *norm2,
732 UChar *first, int32_t firstLength, int32_t firstCapacity,
733 const UChar *second, int32_t secondLength,
734 UErrorCode *pErrorCode) {
735 return normalizeSecondAndAppend(norm2,
736 first, firstLength, firstCapacity,
737 second, secondLength,
738 FALSE, pErrorCode);
739}
740
741U_DRAFT int32_t U_EXPORT2
742unorm2_getDecomposition(const UNormalizer2 *norm2,
743 UChar32 c, UChar *decomposition, int32_t capacity,
744 UErrorCode *pErrorCode) {
745 if(U_FAILURE(*pErrorCode)) {
746 return 0;
747 }
748 if(decomposition==NULL ? capacity!=0 : capacity<0) {
749 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
750 return 0;
751 }
752 UnicodeString destString(decomposition, 0, capacity);
753 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
754 return destString.extract(decomposition, capacity, *pErrorCode);
755 } else {
756 return -1;
757 }
758}
759
760U_DRAFT UBool U_EXPORT2
761unorm2_isNormalized(const UNormalizer2 *norm2,
762 const UChar *s, int32_t length,
763 UErrorCode *pErrorCode) {
764 if(U_FAILURE(*pErrorCode)) {
765 return 0;
766 }
767 if((s==NULL && length!=0) || length<-1) {
768 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
769 return 0;
770 }
771 UnicodeString sString(length<0, s, length);
772 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
773}
774
775U_DRAFT UNormalizationCheckResult U_EXPORT2
776unorm2_quickCheck(const UNormalizer2 *norm2,
777 const UChar *s, int32_t length,
778 UErrorCode *pErrorCode) {
779 if(U_FAILURE(*pErrorCode)) {
780 return UNORM_NO;
781 }
782 if((s==NULL && length!=0) || length<-1) {
783 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
784 return UNORM_NO;
785 }
786 UnicodeString sString(length<0, s, length);
787 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
788}
789
790U_DRAFT int32_t U_EXPORT2
791unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
792 const UChar *s, int32_t length,
793 UErrorCode *pErrorCode) {
794 if(U_FAILURE(*pErrorCode)) {
795 return 0;
796 }
797 if((s==NULL && length!=0) || length<-1) {
798 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
799 return 0;
800 }
801 UnicodeString sString(length<0, s, length);
802 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
803}
804
805U_DRAFT UBool U_EXPORT2
806unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
807 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
808}
809
810U_DRAFT UBool U_EXPORT2
811unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
812 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
813}
814
815U_DRAFT UBool U_EXPORT2
816unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
817 return ((const Normalizer2 *)norm2)->isInert(c);
818}
819
820// Some properties APIs ---------------------------------------------------- ***
821
822U_CFUNC UNormalizationCheckResult U_EXPORT2
823unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
824 if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
825 return UNORM_YES;
826 }
827 UErrorCode errorCode=U_ZERO_ERROR;
828 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
829 if(U_SUCCESS(errorCode)) {
830 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
831 } else {
832 return UNORM_MAYBE;
833 }
834}
835
836U_CAPI const uint16_t * U_EXPORT2
837unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) {
838 const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode);
839 if(U_SUCCESS(*pErrorCode)) {
840 fcdHighStart=trie->highStart;
841 return trie->index;
842 } else {
843 return NULL;
844 }
845}
846
847#endif // !UCONFIG_NO_NORMALIZATION