]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/normalizer2.cpp
ICU-531.31.tar.gz
[apple/icu.git] / icuSources / common / normalizer2.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2009-2013, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: normalizer2.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2009nov22
14 * created by: Markus W. Scherer
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_NORMALIZATION
20
21 #include "unicode/localpointer.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/unistr.h"
24 #include "unicode/unorm.h"
25 #include "cpputils.h"
26 #include "cstring.h"
27 #include "mutex.h"
28 #include "normalizer2impl.h"
29 #include "uassert.h"
30 #include "ucln_cmn.h"
31 #include "uhash.h"
32
33 U_NAMESPACE_BEGIN
34
35 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
36
37 Normalizer2::~Normalizer2() {}
38
39 UBool
40 Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
41 return FALSE;
42 }
43
44 UChar32
45 Normalizer2::composePair(UChar32, UChar32) const {
46 return U_SENTINEL;
47 }
48
49 uint8_t
50 Normalizer2::getCombiningClass(UChar32 /*c*/) const {
51 return 0;
52 }
53
54 // Normalizer2 implementation for the old UNORM_NONE.
55 class NoopNormalizer2 : public Normalizer2 {
56 virtual ~NoopNormalizer2();
57
58 virtual UnicodeString &
59 normalize(const UnicodeString &src,
60 UnicodeString &dest,
61 UErrorCode &errorCode) const {
62 if(U_SUCCESS(errorCode)) {
63 if(&dest!=&src) {
64 dest=src;
65 } else {
66 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
67 }
68 }
69 return dest;
70 }
71 virtual UnicodeString &
72 normalizeSecondAndAppend(UnicodeString &first,
73 const UnicodeString &second,
74 UErrorCode &errorCode) const {
75 if(U_SUCCESS(errorCode)) {
76 if(&first!=&second) {
77 first.append(second);
78 } else {
79 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
80 }
81 }
82 return first;
83 }
84 virtual UnicodeString &
85 append(UnicodeString &first,
86 const UnicodeString &second,
87 UErrorCode &errorCode) const {
88 if(U_SUCCESS(errorCode)) {
89 if(&first!=&second) {
90 first.append(second);
91 } else {
92 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
93 }
94 }
95 return first;
96 }
97 virtual UBool
98 getDecomposition(UChar32, UnicodeString &) const {
99 return FALSE;
100 }
101 // No need to override the default getRawDecomposition().
102 virtual UBool
103 isNormalized(const UnicodeString &, UErrorCode &) const {
104 return TRUE;
105 }
106 virtual UNormalizationCheckResult
107 quickCheck(const UnicodeString &, UErrorCode &) const {
108 return UNORM_YES;
109 }
110 virtual int32_t
111 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
112 return s.length();
113 }
114 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
115 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
116 virtual UBool isInert(UChar32) const { return TRUE; }
117 };
118
119 NoopNormalizer2::~NoopNormalizer2() {}
120
121 // Intermediate class:
122 // Has Normalizer2Impl and does boilerplate argument checking and setup.
123 class Normalizer2WithImpl : public Normalizer2 {
124 public:
125 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
126 virtual ~Normalizer2WithImpl();
127
128 // normalize
129 virtual UnicodeString &
130 normalize(const UnicodeString &src,
131 UnicodeString &dest,
132 UErrorCode &errorCode) const {
133 if(U_FAILURE(errorCode)) {
134 dest.setToBogus();
135 return dest;
136 }
137 const UChar *sArray=src.getBuffer();
138 if(&dest==&src || sArray==NULL) {
139 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
140 dest.setToBogus();
141 return dest;
142 }
143 dest.remove();
144 ReorderingBuffer buffer(impl, dest);
145 if(buffer.init(src.length(), errorCode)) {
146 normalize(sArray, sArray+src.length(), buffer, errorCode);
147 }
148 return dest;
149 }
150 virtual void
151 normalize(const UChar *src, const UChar *limit,
152 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
153
154 // normalize and append
155 virtual UnicodeString &
156 normalizeSecondAndAppend(UnicodeString &first,
157 const UnicodeString &second,
158 UErrorCode &errorCode) const {
159 return normalizeSecondAndAppend(first, second, TRUE, errorCode);
160 }
161 virtual UnicodeString &
162 append(UnicodeString &first,
163 const UnicodeString &second,
164 UErrorCode &errorCode) const {
165 return normalizeSecondAndAppend(first, second, FALSE, errorCode);
166 }
167 UnicodeString &
168 normalizeSecondAndAppend(UnicodeString &first,
169 const UnicodeString &second,
170 UBool doNormalize,
171 UErrorCode &errorCode) const {
172 uprv_checkCanGetBuffer(first, errorCode);
173 if(U_FAILURE(errorCode)) {
174 return first;
175 }
176 const UChar *secondArray=second.getBuffer();
177 if(&first==&second || secondArray==NULL) {
178 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
179 return first;
180 }
181 int32_t firstLength=first.length();
182 UnicodeString safeMiddle;
183 {
184 ReorderingBuffer buffer(impl, first);
185 if(buffer.init(firstLength+second.length(), errorCode)) {
186 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
187 safeMiddle, buffer, errorCode);
188 }
189 } // The ReorderingBuffer destructor finalizes the first string.
190 if(U_FAILURE(errorCode)) {
191 // Restore the modified suffix of the first string.
192 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
193 }
194 return first;
195 }
196 virtual void
197 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
198 UnicodeString &safeMiddle,
199 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
200 virtual UBool
201 getDecomposition(UChar32 c, UnicodeString &decomposition) const {
202 UChar buffer[4];
203 int32_t length;
204 const UChar *d=impl.getDecomposition(c, buffer, length);
205 if(d==NULL) {
206 return FALSE;
207 }
208 if(d==buffer) {
209 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
210 } else {
211 decomposition.setTo(FALSE, d, length); // read-only alias
212 }
213 return TRUE;
214 }
215 virtual UBool
216 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
217 UChar buffer[30];
218 int32_t length;
219 const UChar *d=impl.getRawDecomposition(c, buffer, length);
220 if(d==NULL) {
221 return FALSE;
222 }
223 if(d==buffer) {
224 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
225 } else {
226 decomposition.setTo(FALSE, d, length); // read-only alias
227 }
228 return TRUE;
229 }
230 virtual UChar32
231 composePair(UChar32 a, UChar32 b) const {
232 return impl.composePair(a, b);
233 }
234
235 virtual uint8_t
236 getCombiningClass(UChar32 c) const {
237 return impl.getCC(impl.getNorm16(c));
238 }
239
240 // quick checks
241 virtual UBool
242 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
243 if(U_FAILURE(errorCode)) {
244 return FALSE;
245 }
246 const UChar *sArray=s.getBuffer();
247 if(sArray==NULL) {
248 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
249 return FALSE;
250 }
251 const UChar *sLimit=sArray+s.length();
252 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
253 }
254 virtual UNormalizationCheckResult
255 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
256 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
257 }
258 virtual int32_t
259 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
260 if(U_FAILURE(errorCode)) {
261 return 0;
262 }
263 const UChar *sArray=s.getBuffer();
264 if(sArray==NULL) {
265 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
266 return 0;
267 }
268 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
269 }
270 virtual const UChar *
271 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
272
273 virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
274 return UNORM_YES;
275 }
276
277 const Normalizer2Impl &impl;
278 };
279
280 Normalizer2WithImpl::~Normalizer2WithImpl() {}
281
282 class DecomposeNormalizer2 : public Normalizer2WithImpl {
283 public:
284 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
285 virtual ~DecomposeNormalizer2();
286
287 private:
288 virtual void
289 normalize(const UChar *src, const UChar *limit,
290 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
291 impl.decompose(src, limit, &buffer, errorCode);
292 }
293 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
294 virtual void
295 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
296 UnicodeString &safeMiddle,
297 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
298 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
299 }
300 virtual const UChar *
301 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
302 return impl.decompose(src, limit, NULL, errorCode);
303 }
304 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
305 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
306 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
307 }
308 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
309 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
310 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
311 };
312
313 DecomposeNormalizer2::~DecomposeNormalizer2() {}
314
315 class ComposeNormalizer2 : public Normalizer2WithImpl {
316 public:
317 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
318 Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
319 virtual ~ComposeNormalizer2();
320
321 private:
322 virtual void
323 normalize(const UChar *src, const UChar *limit,
324 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
325 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
326 }
327 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
328 virtual void
329 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
330 UnicodeString &safeMiddle,
331 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
332 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
333 }
334
335 virtual UBool
336 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
337 if(U_FAILURE(errorCode)) {
338 return FALSE;
339 }
340 const UChar *sArray=s.getBuffer();
341 if(sArray==NULL) {
342 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
343 return FALSE;
344 }
345 UnicodeString temp;
346 ReorderingBuffer buffer(impl, temp);
347 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
348 return FALSE;
349 }
350 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
351 }
352 virtual UNormalizationCheckResult
353 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
354 if(U_FAILURE(errorCode)) {
355 return UNORM_MAYBE;
356 }
357 const UChar *sArray=s.getBuffer();
358 if(sArray==NULL) {
359 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
360 return UNORM_MAYBE;
361 }
362 UNormalizationCheckResult qcResult=UNORM_YES;
363 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
364 return qcResult;
365 }
366 virtual const UChar *
367 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
368 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
369 }
370 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
371 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
372 return impl.getCompQuickCheck(impl.getNorm16(c));
373 }
374 virtual UBool hasBoundaryBefore(UChar32 c) const {
375 return impl.hasCompBoundaryBefore(c);
376 }
377 virtual UBool hasBoundaryAfter(UChar32 c) const {
378 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
379 }
380 virtual UBool isInert(UChar32 c) const {
381 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
382 }
383
384 const UBool onlyContiguous;
385 };
386
387 ComposeNormalizer2::~ComposeNormalizer2() {}
388
389 class FCDNormalizer2 : public Normalizer2WithImpl {
390 public:
391 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
392 virtual ~FCDNormalizer2();
393
394 private:
395 virtual void
396 normalize(const UChar *src, const UChar *limit,
397 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
398 impl.makeFCD(src, limit, &buffer, errorCode);
399 }
400 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
401 virtual void
402 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
403 UnicodeString &safeMiddle,
404 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
405 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
406 }
407 virtual const UChar *
408 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
409 return impl.makeFCD(src, limit, NULL, errorCode);
410 }
411 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
412 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
413 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
414 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
415 };
416
417 FCDNormalizer2::~FCDNormalizer2() {}
418
419 // instance cache ---------------------------------------------------------- ***
420
421 struct Norm2AllModes : public UMemory {
422 static Norm2AllModes *createInstance(const char *packageName,
423 const char *name,
424 UErrorCode &errorCode);
425 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
426
427 Normalizer2Impl impl;
428 ComposeNormalizer2 comp;
429 DecomposeNormalizer2 decomp;
430 FCDNormalizer2 fcd;
431 ComposeNormalizer2 fcc;
432 };
433
434 Norm2AllModes *
435 Norm2AllModes::createInstance(const char *packageName,
436 const char *name,
437 UErrorCode &errorCode) {
438 if(U_FAILURE(errorCode)) {
439 return NULL;
440 }
441 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
442 if(allModes.isNull()) {
443 errorCode=U_MEMORY_ALLOCATION_ERROR;
444 return NULL;
445 }
446 allModes->impl.load(packageName, name, errorCode);
447 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
448 }
449
450 U_CDECL_BEGIN
451 static UBool U_CALLCONV uprv_normalizer2_cleanup();
452 U_CDECL_END
453
454
455 static Norm2AllModes *nfcSingleton;
456 static Norm2AllModes *nfkcSingleton;
457 static Norm2AllModes *nfkc_cfSingleton;
458 static Normalizer2 *noopSingleton;
459 static UHashtable *cache=NULL;
460
461 static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
462 static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
463 static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
464 static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER;
465
466 // UInitOnce singleton initialization function
467 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
468 if (uprv_strcmp(what, "nfc") == 0) {
469 nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
470 } else if (uprv_strcmp(what, "nfkc") == 0) {
471 nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
472 } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
473 nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
474 } else if (uprv_strcmp(what, "noop") == 0) {
475 noopSingleton = new NoopNormalizer2;
476 } else {
477 U_ASSERT(FALSE); // Unknown singleton
478 }
479 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
480 }
481
482 U_CDECL_BEGIN
483
484 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
485 delete (Norm2AllModes *)allModes;
486 }
487
488 static UBool U_CALLCONV uprv_normalizer2_cleanup() {
489 delete nfcSingleton;
490 nfcSingleton = NULL;
491 delete nfkcSingleton;
492 nfkcSingleton = NULL;
493 delete nfkc_cfSingleton;
494 nfkc_cfSingleton = NULL;
495 delete noopSingleton;
496 noopSingleton = NULL;
497 uhash_close(cache);
498 cache=NULL;
499 nfcInitOnce.reset();
500 nfkcInitOnce.reset();
501 nfkc_cfInitOnce.reset();
502 noopInitOnce.reset();
503 return TRUE;
504 }
505
506 U_CDECL_END
507
508 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
509 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
510 return nfcSingleton!=NULL ? &nfcSingleton->comp : NULL;
511 }
512
513 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
514 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
515 return nfcSingleton!=NULL ? &nfcSingleton->decomp : NULL;
516 }
517
518 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
519 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
520 return nfcSingleton!=NULL ? &nfcSingleton->fcd : NULL;
521 }
522
523 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
524 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
525 return nfcSingleton!=NULL ? &nfcSingleton->fcc : NULL;
526 }
527
528 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
529 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
530 return nfkcSingleton!=NULL ? &nfkcSingleton->comp : NULL;
531 }
532
533 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
534 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
535 return nfkcSingleton!=NULL ? &nfkcSingleton->decomp : NULL;
536 }
537
538 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
539 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
540 return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->comp : NULL;
541 }
542
543 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
544 umtx_initOnce(noopInitOnce, &initSingletons, "noop", errorCode);
545 return noopSingleton;
546 }
547
548 const Normalizer2 *
549 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
550 if(U_FAILURE(errorCode)) {
551 return NULL;
552 }
553 switch(mode) {
554 case UNORM_NFD:
555 return getNFDInstance(errorCode);
556 case UNORM_NFKD:
557 return getNFKDInstance(errorCode);
558 case UNORM_NFC:
559 return getNFCInstance(errorCode);
560 case UNORM_NFKC:
561 return getNFKCInstance(errorCode);
562 case UNORM_FCD:
563 return getFCDInstance(errorCode);
564 default: // UNORM_NONE
565 return getNoopInstance(errorCode);
566 }
567 }
568
569 const Normalizer2Impl *
570 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
571 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
572 return nfcSingleton!=NULL ? &nfcSingleton->impl : NULL;
573 }
574
575 const Normalizer2Impl *
576 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
577 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
578 return nfkcSingleton!=NULL ? &nfkcSingleton->impl : NULL;
579 }
580
581 const Normalizer2Impl *
582 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
583 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
584 return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->impl : NULL;
585 }
586
587 const Normalizer2Impl *
588 Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
589 return &((Normalizer2WithImpl *)norm2)->impl;
590 }
591
592 const Normalizer2 *
593 Normalizer2::getNFCInstance(UErrorCode &errorCode) {
594 return Normalizer2Factory::getNFCInstance(errorCode);
595 }
596
597 const Normalizer2 *
598 Normalizer2::getNFDInstance(UErrorCode &errorCode) {
599 return Normalizer2Factory::getNFDInstance(errorCode);
600 }
601
602 const Normalizer2 *
603 Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
604 return Normalizer2Factory::getNFKCInstance(errorCode);
605 }
606
607 const Normalizer2 *
608 Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
609 return Normalizer2Factory::getNFKDInstance(errorCode);
610 }
611
612 const Normalizer2 *
613 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
614 return Normalizer2Factory::getNFKC_CFInstance(errorCode);
615 }
616
617 const Normalizer2 *
618 Normalizer2::getInstance(const char *packageName,
619 const char *name,
620 UNormalization2Mode mode,
621 UErrorCode &errorCode) {
622 if(U_FAILURE(errorCode)) {
623 return NULL;
624 }
625 if(name==NULL || *name==0) {
626 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
627 return NULL;
628 }
629 Norm2AllModes *allModes=NULL;
630 if(packageName==NULL) {
631 if(0==uprv_strcmp(name, "nfc")) {
632 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
633 allModes=nfcSingleton;
634 } else if(0==uprv_strcmp(name, "nfkc")) {
635 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
636 allModes=nfkcSingleton;
637 } else if(0==uprv_strcmp(name, "nfkc_cf")) {
638 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
639 allModes=nfkc_cfSingleton;
640 }
641 }
642 if(allModes==NULL && U_SUCCESS(errorCode)) {
643 {
644 Mutex lock;
645 if(cache!=NULL) {
646 allModes=(Norm2AllModes *)uhash_get(cache, name);
647 }
648 }
649 if(allModes==NULL) {
650 LocalPointer<Norm2AllModes> localAllModes(
651 Norm2AllModes::createInstance(packageName, name, errorCode));
652 if(U_SUCCESS(errorCode)) {
653 Mutex lock;
654 if(cache==NULL) {
655 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
656 if(U_FAILURE(errorCode)) {
657 return NULL;
658 }
659 uhash_setKeyDeleter(cache, uprv_free);
660 uhash_setValueDeleter(cache, deleteNorm2AllModes);
661 }
662 void *temp=uhash_get(cache, name);
663 if(temp==NULL) {
664 int32_t keyLength=uprv_strlen(name)+1;
665 char *nameCopy=(char *)uprv_malloc(keyLength);
666 if(nameCopy==NULL) {
667 errorCode=U_MEMORY_ALLOCATION_ERROR;
668 return NULL;
669 }
670 uprv_memcpy(nameCopy, name, keyLength);
671 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
672 } else {
673 // race condition
674 allModes=(Norm2AllModes *)temp;
675 }
676 }
677 }
678 }
679 if(allModes!=NULL && U_SUCCESS(errorCode)) {
680 switch(mode) {
681 case UNORM2_COMPOSE:
682 return &allModes->comp;
683 case UNORM2_DECOMPOSE:
684 return &allModes->decomp;
685 case UNORM2_FCD:
686 return &allModes->fcd;
687 case UNORM2_COMPOSE_CONTIGUOUS:
688 return &allModes->fcc;
689 default:
690 break; // do nothing
691 }
692 }
693 return NULL;
694 }
695
696 U_NAMESPACE_END
697
698 // C API ------------------------------------------------------------------- ***
699
700 U_NAMESPACE_USE
701
702 U_CAPI const UNormalizer2 * U_EXPORT2
703 unorm2_getNFCInstance(UErrorCode *pErrorCode) {
704 return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
705 }
706
707 U_CAPI const UNormalizer2 * U_EXPORT2
708 unorm2_getNFDInstance(UErrorCode *pErrorCode) {
709 return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
710 }
711
712 U_CAPI const UNormalizer2 * U_EXPORT2
713 unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
714 return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
715 }
716
717 U_CAPI const UNormalizer2 * U_EXPORT2
718 unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
719 return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
720 }
721
722 U_CAPI const UNormalizer2 * U_EXPORT2
723 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
724 return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
725 }
726
727 U_CAPI const UNormalizer2 * U_EXPORT2
728 unorm2_getInstance(const char *packageName,
729 const char *name,
730 UNormalization2Mode mode,
731 UErrorCode *pErrorCode) {
732 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
733 }
734
735 U_CAPI void U_EXPORT2
736 unorm2_close(UNormalizer2 *norm2) {
737 delete (Normalizer2 *)norm2;
738 }
739
740 U_CAPI int32_t U_EXPORT2
741 unorm2_normalize(const UNormalizer2 *norm2,
742 const UChar *src, int32_t length,
743 UChar *dest, int32_t capacity,
744 UErrorCode *pErrorCode) {
745 if(U_FAILURE(*pErrorCode)) {
746 return 0;
747 }
748 if( (src==NULL ? length!=0 : length<-1) ||
749 (dest==NULL ? capacity!=0 : capacity<0) ||
750 (src==dest && src!=NULL)
751 ) {
752 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
753 return 0;
754 }
755 UnicodeString destString(dest, 0, capacity);
756 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
757 if(length!=0) {
758 const Normalizer2 *n2=(const Normalizer2 *)norm2;
759 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
760 if(n2wi!=NULL) {
761 // Avoid duplicate argument checking and support NUL-terminated src.
762 ReorderingBuffer buffer(n2wi->impl, destString);
763 if(buffer.init(length, *pErrorCode)) {
764 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
765 }
766 } else {
767 UnicodeString srcString(length<0, src, length);
768 n2->normalize(srcString, destString, *pErrorCode);
769 }
770 }
771 return destString.extract(dest, capacity, *pErrorCode);
772 }
773
774 static int32_t
775 normalizeSecondAndAppend(const UNormalizer2 *norm2,
776 UChar *first, int32_t firstLength, int32_t firstCapacity,
777 const UChar *second, int32_t secondLength,
778 UBool doNormalize,
779 UErrorCode *pErrorCode) {
780 if(U_FAILURE(*pErrorCode)) {
781 return 0;
782 }
783 if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
784 (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
785 (firstCapacity<0 || firstLength<-1)) ||
786 (first==second && first!=NULL)
787 ) {
788 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
789 return 0;
790 }
791 UnicodeString firstString(first, firstLength, firstCapacity);
792 firstLength=firstString.length(); // In case it was -1.
793 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
794 if(secondLength!=0) {
795 const Normalizer2 *n2=(const Normalizer2 *)norm2;
796 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
797 if(n2wi!=NULL) {
798 // Avoid duplicate argument checking and support NUL-terminated src.
799 UnicodeString safeMiddle;
800 {
801 ReorderingBuffer buffer(n2wi->impl, firstString);
802 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1
803 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
804 doNormalize, safeMiddle, buffer, *pErrorCode);
805 }
806 } // The ReorderingBuffer destructor finalizes firstString.
807 if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
808 // Restore the modified suffix of the first string.
809 // This does not restore first[] array contents between firstLength and firstCapacity.
810 // (That might be uninitialized memory, as far as we know.)
811 if(first!=NULL) { /* don't dereference NULL */
812 safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
813 if(firstLength<firstCapacity) {
814 first[firstLength]=0; // NUL-terminate in case it was originally.
815 }
816 }
817 }
818 } else {
819 UnicodeString secondString(secondLength<0, second, secondLength);
820 if(doNormalize) {
821 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
822 } else {
823 n2->append(firstString, secondString, *pErrorCode);
824 }
825 }
826 }
827 return firstString.extract(first, firstCapacity, *pErrorCode);
828 }
829
830 U_CAPI int32_t U_EXPORT2
831 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
832 UChar *first, int32_t firstLength, int32_t firstCapacity,
833 const UChar *second, int32_t secondLength,
834 UErrorCode *pErrorCode) {
835 return normalizeSecondAndAppend(norm2,
836 first, firstLength, firstCapacity,
837 second, secondLength,
838 TRUE, pErrorCode);
839 }
840
841 U_CAPI int32_t U_EXPORT2
842 unorm2_append(const UNormalizer2 *norm2,
843 UChar *first, int32_t firstLength, int32_t firstCapacity,
844 const UChar *second, int32_t secondLength,
845 UErrorCode *pErrorCode) {
846 return normalizeSecondAndAppend(norm2,
847 first, firstLength, firstCapacity,
848 second, secondLength,
849 FALSE, pErrorCode);
850 }
851
852 U_CAPI int32_t U_EXPORT2
853 unorm2_getDecomposition(const UNormalizer2 *norm2,
854 UChar32 c, UChar *decomposition, int32_t capacity,
855 UErrorCode *pErrorCode) {
856 if(U_FAILURE(*pErrorCode)) {
857 return 0;
858 }
859 if(decomposition==NULL ? capacity!=0 : capacity<0) {
860 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
861 return 0;
862 }
863 UnicodeString destString(decomposition, 0, capacity);
864 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
865 return destString.extract(decomposition, capacity, *pErrorCode);
866 } else {
867 return -1;
868 }
869 }
870
871 U_CAPI int32_t U_EXPORT2
872 unorm2_getRawDecomposition(const UNormalizer2 *norm2,
873 UChar32 c, UChar *decomposition, int32_t capacity,
874 UErrorCode *pErrorCode) {
875 if(U_FAILURE(*pErrorCode)) {
876 return 0;
877 }
878 if(decomposition==NULL ? capacity!=0 : capacity<0) {
879 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
880 return 0;
881 }
882 UnicodeString destString(decomposition, 0, capacity);
883 if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) {
884 return destString.extract(decomposition, capacity, *pErrorCode);
885 } else {
886 return -1;
887 }
888 }
889
890 U_CAPI UChar32 U_EXPORT2
891 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) {
892 return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b);
893 }
894
895 U_CAPI uint8_t U_EXPORT2
896 unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
897 return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
898 }
899
900 U_CAPI UBool U_EXPORT2
901 unorm2_isNormalized(const UNormalizer2 *norm2,
902 const UChar *s, int32_t length,
903 UErrorCode *pErrorCode) {
904 if(U_FAILURE(*pErrorCode)) {
905 return 0;
906 }
907 if((s==NULL && length!=0) || length<-1) {
908 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
909 return 0;
910 }
911 UnicodeString sString(length<0, s, length);
912 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
913 }
914
915 U_CAPI UNormalizationCheckResult U_EXPORT2
916 unorm2_quickCheck(const UNormalizer2 *norm2,
917 const UChar *s, int32_t length,
918 UErrorCode *pErrorCode) {
919 if(U_FAILURE(*pErrorCode)) {
920 return UNORM_NO;
921 }
922 if((s==NULL && length!=0) || length<-1) {
923 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
924 return UNORM_NO;
925 }
926 UnicodeString sString(length<0, s, length);
927 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
928 }
929
930 U_CAPI int32_t U_EXPORT2
931 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
932 const UChar *s, int32_t length,
933 UErrorCode *pErrorCode) {
934 if(U_FAILURE(*pErrorCode)) {
935 return 0;
936 }
937 if((s==NULL && length!=0) || length<-1) {
938 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
939 return 0;
940 }
941 UnicodeString sString(length<0, s, length);
942 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
943 }
944
945 U_CAPI UBool U_EXPORT2
946 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
947 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
948 }
949
950 U_CAPI UBool U_EXPORT2
951 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
952 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
953 }
954
955 U_CAPI UBool U_EXPORT2
956 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
957 return ((const Normalizer2 *)norm2)->isInert(c);
958 }
959
960 // Some properties APIs ---------------------------------------------------- ***
961
962 U_CAPI uint8_t U_EXPORT2
963 u_getCombiningClass(UChar32 c) {
964 UErrorCode errorCode=U_ZERO_ERROR;
965 const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode);
966 if(U_SUCCESS(errorCode)) {
967 return nfd->getCombiningClass(c);
968 } else {
969 return 0;
970 }
971 }
972
973 U_CFUNC UNormalizationCheckResult
974 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
975 if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
976 return UNORM_YES;
977 }
978 UErrorCode errorCode=U_ZERO_ERROR;
979 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
980 if(U_SUCCESS(errorCode)) {
981 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
982 } else {
983 return UNORM_MAYBE;
984 }
985 }
986
987 U_CFUNC uint16_t
988 unorm_getFCD16(UChar32 c) {
989 UErrorCode errorCode=U_ZERO_ERROR;
990 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
991 if(U_SUCCESS(errorCode)) {
992 return impl->getFCD16(c);
993 } else {
994 return 0;
995 }
996 }
997
998 #endif // !UCONFIG_NO_NORMALIZATION