]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/normalizer2.cpp
ICU-511.25.tar.gz
[apple/icu.git] / icuSources / common / normalizer2.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2009-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: normalizer2.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2009nov22
14 * created by: Markus W. Scherer
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_NORMALIZATION
20
21 #include "unicode/localpointer.h"
22 #include "unicode/normalizer2.h"
23 #include "unicode/unistr.h"
24 #include "unicode/unorm.h"
25 #include "cpputils.h"
26 #include "cstring.h"
27 #include "mutex.h"
28 #include "normalizer2impl.h"
29 #include "ucln_cmn.h"
30 #include "uhash.h"
31
32 U_NAMESPACE_BEGIN
33
34 // Public API dispatch via Normalizer2 subclasses -------------------------- ***
35
36 Normalizer2::~Normalizer2() {}
37
38 UBool
39 Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
40 return FALSE;
41 }
42
43 UChar32
44 Normalizer2::composePair(UChar32, UChar32) const {
45 return U_SENTINEL;
46 }
47
48 uint8_t
49 Normalizer2::getCombiningClass(UChar32 /*c*/) const {
50 return 0;
51 }
52
53 // Normalizer2 implementation for the old UNORM_NONE.
54 class NoopNormalizer2 : public Normalizer2 {
55 virtual ~NoopNormalizer2();
56
57 virtual UnicodeString &
58 normalize(const UnicodeString &src,
59 UnicodeString &dest,
60 UErrorCode &errorCode) const {
61 if(U_SUCCESS(errorCode)) {
62 if(&dest!=&src) {
63 dest=src;
64 } else {
65 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
66 }
67 }
68 return dest;
69 }
70 virtual UnicodeString &
71 normalizeSecondAndAppend(UnicodeString &first,
72 const UnicodeString &second,
73 UErrorCode &errorCode) const {
74 if(U_SUCCESS(errorCode)) {
75 if(&first!=&second) {
76 first.append(second);
77 } else {
78 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
79 }
80 }
81 return first;
82 }
83 virtual UnicodeString &
84 append(UnicodeString &first,
85 const UnicodeString &second,
86 UErrorCode &errorCode) const {
87 if(U_SUCCESS(errorCode)) {
88 if(&first!=&second) {
89 first.append(second);
90 } else {
91 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
92 }
93 }
94 return first;
95 }
96 virtual UBool
97 getDecomposition(UChar32, UnicodeString &) const {
98 return FALSE;
99 }
100 // No need to override the default getRawDecomposition().
101 virtual UBool
102 isNormalized(const UnicodeString &, UErrorCode &) const {
103 return TRUE;
104 }
105 virtual UNormalizationCheckResult
106 quickCheck(const UnicodeString &, UErrorCode &) const {
107 return UNORM_YES;
108 }
109 virtual int32_t
110 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
111 return s.length();
112 }
113 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
114 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
115 virtual UBool isInert(UChar32) const { return TRUE; }
116 };
117
118 NoopNormalizer2::~NoopNormalizer2() {}
119
120 // Intermediate class:
121 // Has Normalizer2Impl and does boilerplate argument checking and setup.
122 class Normalizer2WithImpl : public Normalizer2 {
123 public:
124 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
125 virtual ~Normalizer2WithImpl();
126
127 // normalize
128 virtual UnicodeString &
129 normalize(const UnicodeString &src,
130 UnicodeString &dest,
131 UErrorCode &errorCode) const {
132 if(U_FAILURE(errorCode)) {
133 dest.setToBogus();
134 return dest;
135 }
136 const UChar *sArray=src.getBuffer();
137 if(&dest==&src || sArray==NULL) {
138 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
139 dest.setToBogus();
140 return dest;
141 }
142 dest.remove();
143 ReorderingBuffer buffer(impl, dest);
144 if(buffer.init(src.length(), errorCode)) {
145 normalize(sArray, sArray+src.length(), buffer, errorCode);
146 }
147 return dest;
148 }
149 virtual void
150 normalize(const UChar *src, const UChar *limit,
151 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
152
153 // normalize and append
154 virtual UnicodeString &
155 normalizeSecondAndAppend(UnicodeString &first,
156 const UnicodeString &second,
157 UErrorCode &errorCode) const {
158 return normalizeSecondAndAppend(first, second, TRUE, errorCode);
159 }
160 virtual UnicodeString &
161 append(UnicodeString &first,
162 const UnicodeString &second,
163 UErrorCode &errorCode) const {
164 return normalizeSecondAndAppend(first, second, FALSE, errorCode);
165 }
166 UnicodeString &
167 normalizeSecondAndAppend(UnicodeString &first,
168 const UnicodeString &second,
169 UBool doNormalize,
170 UErrorCode &errorCode) const {
171 uprv_checkCanGetBuffer(first, errorCode);
172 if(U_FAILURE(errorCode)) {
173 return first;
174 }
175 const UChar *secondArray=second.getBuffer();
176 if(&first==&second || secondArray==NULL) {
177 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
178 return first;
179 }
180 int32_t firstLength=first.length();
181 UnicodeString safeMiddle;
182 {
183 ReorderingBuffer buffer(impl, first);
184 if(buffer.init(firstLength+second.length(), errorCode)) {
185 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
186 safeMiddle, buffer, errorCode);
187 }
188 } // The ReorderingBuffer destructor finalizes the first string.
189 if(U_FAILURE(errorCode)) {
190 // Restore the modified suffix of the first string.
191 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
192 }
193 return first;
194 }
195 virtual void
196 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
197 UnicodeString &safeMiddle,
198 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
199 virtual UBool
200 getDecomposition(UChar32 c, UnicodeString &decomposition) const {
201 UChar buffer[4];
202 int32_t length;
203 const UChar *d=impl.getDecomposition(c, buffer, length);
204 if(d==NULL) {
205 return FALSE;
206 }
207 if(d==buffer) {
208 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
209 } else {
210 decomposition.setTo(FALSE, d, length); // read-only alias
211 }
212 return TRUE;
213 }
214 virtual UBool
215 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
216 UChar buffer[30];
217 int32_t length;
218 const UChar *d=impl.getRawDecomposition(c, buffer, length);
219 if(d==NULL) {
220 return FALSE;
221 }
222 if(d==buffer) {
223 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
224 } else {
225 decomposition.setTo(FALSE, d, length); // read-only alias
226 }
227 return TRUE;
228 }
229 virtual UChar32
230 composePair(UChar32 a, UChar32 b) const {
231 return impl.composePair(a, b);
232 }
233
234 virtual uint8_t
235 getCombiningClass(UChar32 c) const {
236 return impl.getCC(impl.getNorm16(c));
237 }
238
239 // quick checks
240 virtual UBool
241 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
242 if(U_FAILURE(errorCode)) {
243 return FALSE;
244 }
245 const UChar *sArray=s.getBuffer();
246 if(sArray==NULL) {
247 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
248 return FALSE;
249 }
250 const UChar *sLimit=sArray+s.length();
251 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
252 }
253 virtual UNormalizationCheckResult
254 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
255 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
256 }
257 virtual int32_t
258 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
259 if(U_FAILURE(errorCode)) {
260 return 0;
261 }
262 const UChar *sArray=s.getBuffer();
263 if(sArray==NULL) {
264 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
265 return 0;
266 }
267 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
268 }
269 virtual const UChar *
270 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
271
272 virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
273 return UNORM_YES;
274 }
275
276 const Normalizer2Impl &impl;
277 };
278
279 Normalizer2WithImpl::~Normalizer2WithImpl() {}
280
281 class DecomposeNormalizer2 : public Normalizer2WithImpl {
282 public:
283 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
284 virtual ~DecomposeNormalizer2();
285
286 private:
287 virtual void
288 normalize(const UChar *src, const UChar *limit,
289 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
290 impl.decompose(src, limit, &buffer, errorCode);
291 }
292 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
293 virtual void
294 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
295 UnicodeString &safeMiddle,
296 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
297 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
298 }
299 virtual const UChar *
300 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
301 return impl.decompose(src, limit, NULL, errorCode);
302 }
303 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
304 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
305 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
306 }
307 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
308 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
309 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
310 };
311
312 DecomposeNormalizer2::~DecomposeNormalizer2() {}
313
314 class ComposeNormalizer2 : public Normalizer2WithImpl {
315 public:
316 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
317 Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
318 virtual ~ComposeNormalizer2();
319
320 private:
321 virtual void
322 normalize(const UChar *src, const UChar *limit,
323 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
324 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
325 }
326 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
327 virtual void
328 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
329 UnicodeString &safeMiddle,
330 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
331 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
332 }
333
334 virtual UBool
335 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
336 if(U_FAILURE(errorCode)) {
337 return FALSE;
338 }
339 const UChar *sArray=s.getBuffer();
340 if(sArray==NULL) {
341 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
342 return FALSE;
343 }
344 UnicodeString temp;
345 ReorderingBuffer buffer(impl, temp);
346 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
347 return FALSE;
348 }
349 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
350 }
351 virtual UNormalizationCheckResult
352 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
353 if(U_FAILURE(errorCode)) {
354 return UNORM_MAYBE;
355 }
356 const UChar *sArray=s.getBuffer();
357 if(sArray==NULL) {
358 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
359 return UNORM_MAYBE;
360 }
361 UNormalizationCheckResult qcResult=UNORM_YES;
362 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
363 return qcResult;
364 }
365 virtual const UChar *
366 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
367 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
368 }
369 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
370 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
371 return impl.getCompQuickCheck(impl.getNorm16(c));
372 }
373 virtual UBool hasBoundaryBefore(UChar32 c) const {
374 return impl.hasCompBoundaryBefore(c);
375 }
376 virtual UBool hasBoundaryAfter(UChar32 c) const {
377 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
378 }
379 virtual UBool isInert(UChar32 c) const {
380 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
381 }
382
383 const UBool onlyContiguous;
384 };
385
386 ComposeNormalizer2::~ComposeNormalizer2() {}
387
388 class FCDNormalizer2 : public Normalizer2WithImpl {
389 public:
390 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
391 virtual ~FCDNormalizer2();
392
393 private:
394 virtual void
395 normalize(const UChar *src, const UChar *limit,
396 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
397 impl.makeFCD(src, limit, &buffer, errorCode);
398 }
399 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
400 virtual void
401 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
402 UnicodeString &safeMiddle,
403 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
404 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
405 }
406 virtual const UChar *
407 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
408 return impl.makeFCD(src, limit, NULL, errorCode);
409 }
410 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
411 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
412 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
413 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
414 };
415
416 FCDNormalizer2::~FCDNormalizer2() {}
417
418 // instance cache ---------------------------------------------------------- ***
419
420 struct Norm2AllModes : public UMemory {
421 static Norm2AllModes *createInstance(const char *packageName,
422 const char *name,
423 UErrorCode &errorCode);
424 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
425
426 Normalizer2Impl impl;
427 ComposeNormalizer2 comp;
428 DecomposeNormalizer2 decomp;
429 FCDNormalizer2 fcd;
430 ComposeNormalizer2 fcc;
431 };
432
433 Norm2AllModes *
434 Norm2AllModes::createInstance(const char *packageName,
435 const char *name,
436 UErrorCode &errorCode) {
437 if(U_FAILURE(errorCode)) {
438 return NULL;
439 }
440 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
441 if(allModes.isNull()) {
442 errorCode=U_MEMORY_ALLOCATION_ERROR;
443 return NULL;
444 }
445 allModes->impl.load(packageName, name, errorCode);
446 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
447 }
448
449 U_CDECL_BEGIN
450 static UBool U_CALLCONV uprv_normalizer2_cleanup();
451 U_CDECL_END
452
453 class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> {
454 public:
455 Norm2AllModesSingleton(TriStateSingleton &s, const char *n) :
456 TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {}
457 Norm2AllModes *getInstance(UErrorCode &errorCode) {
458 return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode);
459 }
460 private:
461 static void *createInstance(const void *context, UErrorCode &errorCode) {
462 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
463 return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode);
464 }
465
466 const char *name;
467 };
468
469 STATIC_TRI_STATE_SINGLETON(nfcSingleton);
470 STATIC_TRI_STATE_SINGLETON(nfkcSingleton);
471 STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton);
472
473 class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> {
474 public:
475 Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {}
476 Normalizer2 *getInstance(UErrorCode &errorCode) {
477 return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode);
478 }
479 private:
480 static void *createInstance(const void *, UErrorCode &errorCode) {
481 Normalizer2 *noop=new NoopNormalizer2;
482 if(noop==NULL) {
483 errorCode=U_MEMORY_ALLOCATION_ERROR;
484 }
485 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
486 return noop;
487 }
488 };
489
490 STATIC_SIMPLE_SINGLETON(noopSingleton);
491
492 static UHashtable *cache=NULL;
493
494 U_CDECL_BEGIN
495
496 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
497 delete (Norm2AllModes *)allModes;
498 }
499
500 static UBool U_CALLCONV uprv_normalizer2_cleanup() {
501 Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance();
502 Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance();
503 Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance();
504 Norm2Singleton(noopSingleton).deleteInstance();
505 uhash_close(cache);
506 cache=NULL;
507 return TRUE;
508 }
509
510 U_CDECL_END
511
512 const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
513 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
514 return allModes!=NULL ? &allModes->comp : NULL;
515 }
516
517 const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
518 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
519 return allModes!=NULL ? &allModes->decomp : NULL;
520 }
521
522 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
523 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
524 return allModes!=NULL ? &allModes->fcd : NULL;
525 }
526
527 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
528 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
529 return allModes!=NULL ? &allModes->fcc : NULL;
530 }
531
532 const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
533 Norm2AllModes *allModes=
534 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
535 return allModes!=NULL ? &allModes->comp : NULL;
536 }
537
538 const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
539 Norm2AllModes *allModes=
540 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
541 return allModes!=NULL ? &allModes->decomp : NULL;
542 }
543
544 const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
545 Norm2AllModes *allModes=
546 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
547 return allModes!=NULL ? &allModes->comp : NULL;
548 }
549
550 const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
551 return Norm2Singleton(noopSingleton).getInstance(errorCode);
552 }
553
554 const Normalizer2 *
555 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
556 if(U_FAILURE(errorCode)) {
557 return NULL;
558 }
559 switch(mode) {
560 case UNORM_NFD:
561 return getNFDInstance(errorCode);
562 case UNORM_NFKD:
563 return getNFKDInstance(errorCode);
564 case UNORM_NFC:
565 return getNFCInstance(errorCode);
566 case UNORM_NFKC:
567 return getNFKCInstance(errorCode);
568 case UNORM_FCD:
569 return getFCDInstance(errorCode);
570 default: // UNORM_NONE
571 return getNoopInstance(errorCode);
572 }
573 }
574
575 const Normalizer2Impl *
576 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
577 Norm2AllModes *allModes=
578 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
579 return allModes!=NULL ? &allModes->impl : NULL;
580 }
581
582 const Normalizer2Impl *
583 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
584 Norm2AllModes *allModes=
585 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
586 return allModes!=NULL ? &allModes->impl : NULL;
587 }
588
589 const Normalizer2Impl *
590 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
591 Norm2AllModes *allModes=
592 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
593 return allModes!=NULL ? &allModes->impl : NULL;
594 }
595
596 const Normalizer2Impl *
597 Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
598 return &((Normalizer2WithImpl *)norm2)->impl;
599 }
600
601 const Normalizer2 *
602 Normalizer2::getNFCInstance(UErrorCode &errorCode) {
603 return Normalizer2Factory::getNFCInstance(errorCode);
604 }
605
606 const Normalizer2 *
607 Normalizer2::getNFDInstance(UErrorCode &errorCode) {
608 return Normalizer2Factory::getNFDInstance(errorCode);
609 }
610
611 const Normalizer2 *
612 Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
613 return Normalizer2Factory::getNFKCInstance(errorCode);
614 }
615
616 const Normalizer2 *
617 Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
618 return Normalizer2Factory::getNFKDInstance(errorCode);
619 }
620
621 const Normalizer2 *
622 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
623 return Normalizer2Factory::getNFKC_CFInstance(errorCode);
624 }
625
626 const Normalizer2 *
627 Normalizer2::getInstance(const char *packageName,
628 const char *name,
629 UNormalization2Mode mode,
630 UErrorCode &errorCode) {
631 if(U_FAILURE(errorCode)) {
632 return NULL;
633 }
634 if(name==NULL || *name==0) {
635 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
636 return NULL;
637 }
638 Norm2AllModes *allModes=NULL;
639 if(packageName==NULL) {
640 if(0==uprv_strcmp(name, "nfc")) {
641 allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
642 } else if(0==uprv_strcmp(name, "nfkc")) {
643 allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
644 } else if(0==uprv_strcmp(name, "nfkc_cf")) {
645 allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
646 }
647 }
648 if(allModes==NULL && U_SUCCESS(errorCode)) {
649 {
650 Mutex lock;
651 if(cache!=NULL) {
652 allModes=(Norm2AllModes *)uhash_get(cache, name);
653 }
654 }
655 if(allModes==NULL) {
656 LocalPointer<Norm2AllModes> localAllModes(
657 Norm2AllModes::createInstance(packageName, name, errorCode));
658 if(U_SUCCESS(errorCode)) {
659 Mutex lock;
660 if(cache==NULL) {
661 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
662 if(U_FAILURE(errorCode)) {
663 return NULL;
664 }
665 uhash_setKeyDeleter(cache, uprv_free);
666 uhash_setValueDeleter(cache, deleteNorm2AllModes);
667 }
668 void *temp=uhash_get(cache, name);
669 if(temp==NULL) {
670 int32_t keyLength=uprv_strlen(name)+1;
671 char *nameCopy=(char *)uprv_malloc(keyLength);
672 if(nameCopy==NULL) {
673 errorCode=U_MEMORY_ALLOCATION_ERROR;
674 return NULL;
675 }
676 uprv_memcpy(nameCopy, name, keyLength);
677 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
678 } else {
679 // race condition
680 allModes=(Norm2AllModes *)temp;
681 }
682 }
683 }
684 }
685 if(allModes!=NULL && U_SUCCESS(errorCode)) {
686 switch(mode) {
687 case UNORM2_COMPOSE:
688 return &allModes->comp;
689 case UNORM2_DECOMPOSE:
690 return &allModes->decomp;
691 case UNORM2_FCD:
692 return &allModes->fcd;
693 case UNORM2_COMPOSE_CONTIGUOUS:
694 return &allModes->fcc;
695 default:
696 break; // do nothing
697 }
698 }
699 return NULL;
700 }
701
702 U_NAMESPACE_END
703
704 // C API ------------------------------------------------------------------- ***
705
706 U_NAMESPACE_USE
707
708 U_CAPI const UNormalizer2 * U_EXPORT2
709 unorm2_getNFCInstance(UErrorCode *pErrorCode) {
710 return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
711 }
712
713 U_CAPI const UNormalizer2 * U_EXPORT2
714 unorm2_getNFDInstance(UErrorCode *pErrorCode) {
715 return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
716 }
717
718 U_CAPI const UNormalizer2 * U_EXPORT2
719 unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
720 return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
721 }
722
723 U_CAPI const UNormalizer2 * U_EXPORT2
724 unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
725 return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
726 }
727
728 U_CAPI const UNormalizer2 * U_EXPORT2
729 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
730 return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
731 }
732
733 U_CAPI const UNormalizer2 * U_EXPORT2
734 unorm2_getInstance(const char *packageName,
735 const char *name,
736 UNormalization2Mode mode,
737 UErrorCode *pErrorCode) {
738 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
739 }
740
741 U_CAPI void U_EXPORT2
742 unorm2_close(UNormalizer2 *norm2) {
743 delete (Normalizer2 *)norm2;
744 }
745
746 U_CAPI int32_t U_EXPORT2
747 unorm2_normalize(const UNormalizer2 *norm2,
748 const UChar *src, int32_t length,
749 UChar *dest, int32_t capacity,
750 UErrorCode *pErrorCode) {
751 if(U_FAILURE(*pErrorCode)) {
752 return 0;
753 }
754 if( (src==NULL ? length!=0 : length<-1) ||
755 (dest==NULL ? capacity!=0 : capacity<0) ||
756 (src==dest && src!=NULL)
757 ) {
758 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
759 return 0;
760 }
761 UnicodeString destString(dest, 0, capacity);
762 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
763 if(length!=0) {
764 const Normalizer2 *n2=(const Normalizer2 *)norm2;
765 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
766 if(n2wi!=NULL) {
767 // Avoid duplicate argument checking and support NUL-terminated src.
768 ReorderingBuffer buffer(n2wi->impl, destString);
769 if(buffer.init(length, *pErrorCode)) {
770 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
771 }
772 } else {
773 UnicodeString srcString(length<0, src, length);
774 n2->normalize(srcString, destString, *pErrorCode);
775 }
776 }
777 return destString.extract(dest, capacity, *pErrorCode);
778 }
779
780 static int32_t
781 normalizeSecondAndAppend(const UNormalizer2 *norm2,
782 UChar *first, int32_t firstLength, int32_t firstCapacity,
783 const UChar *second, int32_t secondLength,
784 UBool doNormalize,
785 UErrorCode *pErrorCode) {
786 if(U_FAILURE(*pErrorCode)) {
787 return 0;
788 }
789 if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
790 (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
791 (firstCapacity<0 || firstLength<-1)) ||
792 (first==second && first!=NULL)
793 ) {
794 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
795 return 0;
796 }
797 UnicodeString firstString(first, firstLength, firstCapacity);
798 firstLength=firstString.length(); // In case it was -1.
799 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
800 if(secondLength!=0) {
801 const Normalizer2 *n2=(const Normalizer2 *)norm2;
802 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
803 if(n2wi!=NULL) {
804 // Avoid duplicate argument checking and support NUL-terminated src.
805 UnicodeString safeMiddle;
806 {
807 ReorderingBuffer buffer(n2wi->impl, firstString);
808 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1
809 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
810 doNormalize, safeMiddle, buffer, *pErrorCode);
811 }
812 } // The ReorderingBuffer destructor finalizes firstString.
813 if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
814 // Restore the modified suffix of the first string.
815 // This does not restore first[] array contents between firstLength and firstCapacity.
816 // (That might be uninitialized memory, as far as we know.)
817 if(first!=NULL) { /* don't dereference NULL */
818 safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
819 if(firstLength<firstCapacity) {
820 first[firstLength]=0; // NUL-terminate in case it was originally.
821 }
822 }
823 }
824 } else {
825 UnicodeString secondString(secondLength<0, second, secondLength);
826 if(doNormalize) {
827 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
828 } else {
829 n2->append(firstString, secondString, *pErrorCode);
830 }
831 }
832 }
833 return firstString.extract(first, firstCapacity, *pErrorCode);
834 }
835
836 U_CAPI int32_t U_EXPORT2
837 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
838 UChar *first, int32_t firstLength, int32_t firstCapacity,
839 const UChar *second, int32_t secondLength,
840 UErrorCode *pErrorCode) {
841 return normalizeSecondAndAppend(norm2,
842 first, firstLength, firstCapacity,
843 second, secondLength,
844 TRUE, pErrorCode);
845 }
846
847 U_CAPI int32_t U_EXPORT2
848 unorm2_append(const UNormalizer2 *norm2,
849 UChar *first, int32_t firstLength, int32_t firstCapacity,
850 const UChar *second, int32_t secondLength,
851 UErrorCode *pErrorCode) {
852 return normalizeSecondAndAppend(norm2,
853 first, firstLength, firstCapacity,
854 second, secondLength,
855 FALSE, pErrorCode);
856 }
857
858 U_CAPI int32_t U_EXPORT2
859 unorm2_getDecomposition(const UNormalizer2 *norm2,
860 UChar32 c, UChar *decomposition, int32_t capacity,
861 UErrorCode *pErrorCode) {
862 if(U_FAILURE(*pErrorCode)) {
863 return 0;
864 }
865 if(decomposition==NULL ? capacity!=0 : capacity<0) {
866 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
867 return 0;
868 }
869 UnicodeString destString(decomposition, 0, capacity);
870 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
871 return destString.extract(decomposition, capacity, *pErrorCode);
872 } else {
873 return -1;
874 }
875 }
876
877 U_CAPI int32_t U_EXPORT2
878 unorm2_getRawDecomposition(const UNormalizer2 *norm2,
879 UChar32 c, UChar *decomposition, int32_t capacity,
880 UErrorCode *pErrorCode) {
881 if(U_FAILURE(*pErrorCode)) {
882 return 0;
883 }
884 if(decomposition==NULL ? capacity!=0 : capacity<0) {
885 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
886 return 0;
887 }
888 UnicodeString destString(decomposition, 0, capacity);
889 if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) {
890 return destString.extract(decomposition, capacity, *pErrorCode);
891 } else {
892 return -1;
893 }
894 }
895
896 U_CAPI UChar32 U_EXPORT2
897 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) {
898 return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b);
899 }
900
901 U_CAPI uint8_t U_EXPORT2
902 unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
903 return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
904 }
905
906 U_CAPI UBool U_EXPORT2
907 unorm2_isNormalized(const UNormalizer2 *norm2,
908 const UChar *s, int32_t length,
909 UErrorCode *pErrorCode) {
910 if(U_FAILURE(*pErrorCode)) {
911 return 0;
912 }
913 if((s==NULL && length!=0) || length<-1) {
914 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
915 return 0;
916 }
917 UnicodeString sString(length<0, s, length);
918 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
919 }
920
921 U_CAPI UNormalizationCheckResult U_EXPORT2
922 unorm2_quickCheck(const UNormalizer2 *norm2,
923 const UChar *s, int32_t length,
924 UErrorCode *pErrorCode) {
925 if(U_FAILURE(*pErrorCode)) {
926 return UNORM_NO;
927 }
928 if((s==NULL && length!=0) || length<-1) {
929 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
930 return UNORM_NO;
931 }
932 UnicodeString sString(length<0, s, length);
933 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
934 }
935
936 U_CAPI int32_t U_EXPORT2
937 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
938 const UChar *s, int32_t length,
939 UErrorCode *pErrorCode) {
940 if(U_FAILURE(*pErrorCode)) {
941 return 0;
942 }
943 if((s==NULL && length!=0) || length<-1) {
944 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
945 return 0;
946 }
947 UnicodeString sString(length<0, s, length);
948 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
949 }
950
951 U_CAPI UBool U_EXPORT2
952 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
953 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
954 }
955
956 U_CAPI UBool U_EXPORT2
957 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
958 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
959 }
960
961 U_CAPI UBool U_EXPORT2
962 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
963 return ((const Normalizer2 *)norm2)->isInert(c);
964 }
965
966 // Some properties APIs ---------------------------------------------------- ***
967
968 U_CAPI uint8_t U_EXPORT2
969 u_getCombiningClass(UChar32 c) {
970 UErrorCode errorCode=U_ZERO_ERROR;
971 const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode);
972 if(U_SUCCESS(errorCode)) {
973 return nfd->getCombiningClass(c);
974 } else {
975 return 0;
976 }
977 }
978
979 U_CFUNC UNormalizationCheckResult
980 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
981 if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
982 return UNORM_YES;
983 }
984 UErrorCode errorCode=U_ZERO_ERROR;
985 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
986 if(U_SUCCESS(errorCode)) {
987 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
988 } else {
989 return UNORM_MAYBE;
990 }
991 }
992
993 U_CFUNC uint16_t
994 unorm_getFCD16(UChar32 c) {
995 UErrorCode errorCode=U_ZERO_ERROR;
996 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
997 if(U_SUCCESS(errorCode)) {
998 return impl->getFCD16(c);
999 } else {
1000 return 0;
1001 }
1002 }
1003
1004 #endif // !UCONFIG_NO_NORMALIZATION