]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/normalizer2.cpp
ICU-531.30.tar.gz
[apple/icu.git] / icuSources / common / normalizer2.cpp
CommitLineData
729e4ab9
A
1/*
2*******************************************************************************
3*
57a6839d 4* Copyright (C) 2009-2013, International Business Machines
729e4ab9
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: normalizer2.cpp
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2009nov22
14* created by: Markus W. Scherer
15*/
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_NORMALIZATION
20
21#include "unicode/localpointer.h"
22#include "unicode/normalizer2.h"
23#include "unicode/unistr.h"
24#include "unicode/unorm.h"
25#include "cpputils.h"
26#include "cstring.h"
27#include "mutex.h"
28#include "normalizer2impl.h"
57a6839d 29#include "uassert.h"
729e4ab9
A
30#include "ucln_cmn.h"
31#include "uhash.h"
32
33U_NAMESPACE_BEGIN
34
35// Public API dispatch via Normalizer2 subclasses -------------------------- ***
36
4388f060
A
37Normalizer2::~Normalizer2() {}
38
39UBool
40Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
41 return FALSE;
42}
43
44UChar32
45Normalizer2::composePair(UChar32, UChar32) const {
46 return U_SENTINEL;
47}
48
49uint8_t
50Normalizer2::getCombiningClass(UChar32 /*c*/) const {
51 return 0;
52}
53
729e4ab9
A
54// Normalizer2 implementation for the old UNORM_NONE.
55class NoopNormalizer2 : public Normalizer2 {
4388f060
A
56 virtual ~NoopNormalizer2();
57
729e4ab9
A
58 virtual UnicodeString &
59 normalize(const UnicodeString &src,
60 UnicodeString &dest,
61 UErrorCode &errorCode) const {
62 if(U_SUCCESS(errorCode)) {
63 if(&dest!=&src) {
64 dest=src;
65 } else {
66 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
67 }
68 }
69 return dest;
70 }
71 virtual UnicodeString &
72 normalizeSecondAndAppend(UnicodeString &first,
73 const UnicodeString &second,
74 UErrorCode &errorCode) const {
75 if(U_SUCCESS(errorCode)) {
76 if(&first!=&second) {
77 first.append(second);
78 } else {
79 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
80 }
81 }
82 return first;
83 }
84 virtual UnicodeString &
85 append(UnicodeString &first,
86 const UnicodeString &second,
87 UErrorCode &errorCode) const {
88 if(U_SUCCESS(errorCode)) {
89 if(&first!=&second) {
90 first.append(second);
91 } else {
92 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
93 }
94 }
95 return first;
96 }
97 virtual UBool
98 getDecomposition(UChar32, UnicodeString &) const {
99 return FALSE;
100 }
4388f060 101 // No need to override the default getRawDecomposition().
729e4ab9
A
102 virtual UBool
103 isNormalized(const UnicodeString &, UErrorCode &) const {
104 return TRUE;
105 }
106 virtual UNormalizationCheckResult
107 quickCheck(const UnicodeString &, UErrorCode &) const {
108 return UNORM_YES;
109 }
110 virtual int32_t
111 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
112 return s.length();
113 }
114 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
115 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
116 virtual UBool isInert(UChar32) const { return TRUE; }
117};
118
4388f060
A
119NoopNormalizer2::~NoopNormalizer2() {}
120
729e4ab9
A
121// Intermediate class:
122// Has Normalizer2Impl and does boilerplate argument checking and setup.
123class Normalizer2WithImpl : public Normalizer2 {
124public:
125 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
4388f060 126 virtual ~Normalizer2WithImpl();
729e4ab9
A
127
128 // normalize
129 virtual UnicodeString &
130 normalize(const UnicodeString &src,
131 UnicodeString &dest,
132 UErrorCode &errorCode) const {
133 if(U_FAILURE(errorCode)) {
134 dest.setToBogus();
135 return dest;
136 }
137 const UChar *sArray=src.getBuffer();
138 if(&dest==&src || sArray==NULL) {
139 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
140 dest.setToBogus();
141 return dest;
142 }
143 dest.remove();
144 ReorderingBuffer buffer(impl, dest);
145 if(buffer.init(src.length(), errorCode)) {
146 normalize(sArray, sArray+src.length(), buffer, errorCode);
147 }
148 return dest;
149 }
150 virtual void
151 normalize(const UChar *src, const UChar *limit,
152 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
153
154 // normalize and append
155 virtual UnicodeString &
156 normalizeSecondAndAppend(UnicodeString &first,
157 const UnicodeString &second,
158 UErrorCode &errorCode) const {
159 return normalizeSecondAndAppend(first, second, TRUE, errorCode);
160 }
161 virtual UnicodeString &
162 append(UnicodeString &first,
163 const UnicodeString &second,
164 UErrorCode &errorCode) const {
165 return normalizeSecondAndAppend(first, second, FALSE, errorCode);
166 }
167 UnicodeString &
168 normalizeSecondAndAppend(UnicodeString &first,
169 const UnicodeString &second,
170 UBool doNormalize,
171 UErrorCode &errorCode) const {
172 uprv_checkCanGetBuffer(first, errorCode);
173 if(U_FAILURE(errorCode)) {
174 return first;
175 }
176 const UChar *secondArray=second.getBuffer();
177 if(&first==&second || secondArray==NULL) {
178 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
179 return first;
180 }
4388f060
A
181 int32_t firstLength=first.length();
182 UnicodeString safeMiddle;
183 {
184 ReorderingBuffer buffer(impl, first);
185 if(buffer.init(firstLength+second.length(), errorCode)) {
186 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
187 safeMiddle, buffer, errorCode);
188 }
189 } // The ReorderingBuffer destructor finalizes the first string.
190 if(U_FAILURE(errorCode)) {
191 // Restore the modified suffix of the first string.
192 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
729e4ab9
A
193 }
194 return first;
195 }
196 virtual void
197 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
4388f060 198 UnicodeString &safeMiddle,
729e4ab9
A
199 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
200 virtual UBool
201 getDecomposition(UChar32 c, UnicodeString &decomposition) const {
202 UChar buffer[4];
203 int32_t length;
204 const UChar *d=impl.getDecomposition(c, buffer, length);
205 if(d==NULL) {
206 return FALSE;
207 }
208 if(d==buffer) {
209 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
210 } else {
211 decomposition.setTo(FALSE, d, length); // read-only alias
212 }
213 return TRUE;
214 }
4388f060
A
215 virtual UBool
216 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
217 UChar buffer[30];
218 int32_t length;
219 const UChar *d=impl.getRawDecomposition(c, buffer, length);
220 if(d==NULL) {
221 return FALSE;
222 }
223 if(d==buffer) {
224 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
225 } else {
226 decomposition.setTo(FALSE, d, length); // read-only alias
227 }
228 return TRUE;
229 }
230 virtual UChar32
231 composePair(UChar32 a, UChar32 b) const {
232 return impl.composePair(a, b);
233 }
234
235 virtual uint8_t
236 getCombiningClass(UChar32 c) const {
237 return impl.getCC(impl.getNorm16(c));
238 }
729e4ab9
A
239
240 // quick checks
241 virtual UBool
242 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
243 if(U_FAILURE(errorCode)) {
244 return FALSE;
245 }
246 const UChar *sArray=s.getBuffer();
247 if(sArray==NULL) {
248 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
249 return FALSE;
250 }
251 const UChar *sLimit=sArray+s.length();
252 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
253 }
254 virtual UNormalizationCheckResult
255 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
256 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
257 }
258 virtual int32_t
259 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
260 if(U_FAILURE(errorCode)) {
261 return 0;
262 }
263 const UChar *sArray=s.getBuffer();
264 if(sArray==NULL) {
265 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
266 return 0;
267 }
268 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
269 }
270 virtual const UChar *
271 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
272
273 virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
274 return UNORM_YES;
275 }
276
277 const Normalizer2Impl &impl;
278};
279
4388f060
A
280Normalizer2WithImpl::~Normalizer2WithImpl() {}
281
729e4ab9
A
282class DecomposeNormalizer2 : public Normalizer2WithImpl {
283public:
284 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
4388f060 285 virtual ~DecomposeNormalizer2();
729e4ab9
A
286
287private:
288 virtual void
289 normalize(const UChar *src, const UChar *limit,
290 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
291 impl.decompose(src, limit, &buffer, errorCode);
292 }
293 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
294 virtual void
295 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
4388f060 296 UnicodeString &safeMiddle,
729e4ab9 297 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
4388f060 298 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
729e4ab9
A
299 }
300 virtual const UChar *
301 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
302 return impl.decompose(src, limit, NULL, errorCode);
303 }
304 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
305 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
306 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
307 }
308 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
309 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
310 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
311};
312
4388f060
A
313DecomposeNormalizer2::~DecomposeNormalizer2() {}
314
729e4ab9
A
315class ComposeNormalizer2 : public Normalizer2WithImpl {
316public:
317 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
318 Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
4388f060 319 virtual ~ComposeNormalizer2();
729e4ab9
A
320
321private:
322 virtual void
323 normalize(const UChar *src, const UChar *limit,
324 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
325 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
326 }
327 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
328 virtual void
329 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
4388f060 330 UnicodeString &safeMiddle,
729e4ab9 331 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
4388f060 332 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
729e4ab9
A
333 }
334
335 virtual UBool
336 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
337 if(U_FAILURE(errorCode)) {
338 return FALSE;
339 }
340 const UChar *sArray=s.getBuffer();
341 if(sArray==NULL) {
342 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
343 return FALSE;
344 }
345 UnicodeString temp;
346 ReorderingBuffer buffer(impl, temp);
347 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
348 return FALSE;
349 }
350 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
351 }
352 virtual UNormalizationCheckResult
353 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
354 if(U_FAILURE(errorCode)) {
355 return UNORM_MAYBE;
356 }
357 const UChar *sArray=s.getBuffer();
358 if(sArray==NULL) {
359 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
360 return UNORM_MAYBE;
361 }
362 UNormalizationCheckResult qcResult=UNORM_YES;
363 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
364 return qcResult;
365 }
366 virtual const UChar *
367 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
368 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
369 }
370 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
371 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
372 return impl.getCompQuickCheck(impl.getNorm16(c));
373 }
374 virtual UBool hasBoundaryBefore(UChar32 c) const {
375 return impl.hasCompBoundaryBefore(c);
376 }
377 virtual UBool hasBoundaryAfter(UChar32 c) const {
378 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
379 }
380 virtual UBool isInert(UChar32 c) const {
381 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
382 }
383
384 const UBool onlyContiguous;
385};
386
4388f060
A
387ComposeNormalizer2::~ComposeNormalizer2() {}
388
729e4ab9
A
389class FCDNormalizer2 : public Normalizer2WithImpl {
390public:
391 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
4388f060 392 virtual ~FCDNormalizer2();
729e4ab9
A
393
394private:
395 virtual void
396 normalize(const UChar *src, const UChar *limit,
397 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
398 impl.makeFCD(src, limit, &buffer, errorCode);
399 }
400 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
401 virtual void
402 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
4388f060 403 UnicodeString &safeMiddle,
729e4ab9 404 ReorderingBuffer &buffer, UErrorCode &errorCode) const {
4388f060 405 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
729e4ab9
A
406 }
407 virtual const UChar *
408 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
409 return impl.makeFCD(src, limit, NULL, errorCode);
410 }
411 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
412 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
413 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
414 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
415};
416
4388f060
A
417FCDNormalizer2::~FCDNormalizer2() {}
418
729e4ab9
A
419// instance cache ---------------------------------------------------------- ***
420
421struct Norm2AllModes : public UMemory {
422 static Norm2AllModes *createInstance(const char *packageName,
423 const char *name,
424 UErrorCode &errorCode);
425 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
426
427 Normalizer2Impl impl;
428 ComposeNormalizer2 comp;
429 DecomposeNormalizer2 decomp;
430 FCDNormalizer2 fcd;
431 ComposeNormalizer2 fcc;
432};
433
434Norm2AllModes *
435Norm2AllModes::createInstance(const char *packageName,
436 const char *name,
437 UErrorCode &errorCode) {
438 if(U_FAILURE(errorCode)) {
439 return NULL;
440 }
441 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
442 if(allModes.isNull()) {
443 errorCode=U_MEMORY_ALLOCATION_ERROR;
444 return NULL;
445 }
446 allModes->impl.load(packageName, name, errorCode);
447 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
448}
449
450U_CDECL_BEGIN
451static UBool U_CALLCONV uprv_normalizer2_cleanup();
452U_CDECL_END
453
729e4ab9 454
57a6839d
A
455static Norm2AllModes *nfcSingleton;
456static Norm2AllModes *nfkcSingleton;
457static Norm2AllModes *nfkc_cfSingleton;
458static Normalizer2 *noopSingleton;
459static UHashtable *cache=NULL;
460
461static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
462static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
463static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
464static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER;
465
466// UInitOnce singleton initialization function
467static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
468 if (uprv_strcmp(what, "nfc") == 0) {
469 nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
470 } else if (uprv_strcmp(what, "nfkc") == 0) {
471 nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
472 } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
473 nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
474 } else if (uprv_strcmp(what, "noop") == 0) {
475 noopSingleton = new NoopNormalizer2;
476 } else {
477 U_ASSERT(FALSE); // Unknown singleton
478 }
479 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
480}
729e4ab9
A
481
482U_CDECL_BEGIN
483
484static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
485 delete (Norm2AllModes *)allModes;
486}
487
488static UBool U_CALLCONV uprv_normalizer2_cleanup() {
57a6839d
A
489 delete nfcSingleton;
490 nfcSingleton = NULL;
491 delete nfkcSingleton;
492 nfkcSingleton = NULL;
493 delete nfkc_cfSingleton;
494 nfkc_cfSingleton = NULL;
495 delete noopSingleton;
496 noopSingleton = NULL;
729e4ab9
A
497 uhash_close(cache);
498 cache=NULL;
57a6839d
A
499 nfcInitOnce.reset();
500 nfkcInitOnce.reset();
501 nfkc_cfInitOnce.reset();
502 noopInitOnce.reset();
729e4ab9
A
503 return TRUE;
504}
505
506U_CDECL_END
507
508const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
57a6839d
A
509 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
510 return nfcSingleton!=NULL ? &nfcSingleton->comp : NULL;
729e4ab9
A
511}
512
513const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
57a6839d
A
514 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
515 return nfcSingleton!=NULL ? &nfcSingleton->decomp : NULL;
729e4ab9
A
516}
517
518const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
57a6839d
A
519 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
520 return nfcSingleton!=NULL ? &nfcSingleton->fcd : NULL;
729e4ab9
A
521}
522
523const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
57a6839d
A
524 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
525 return nfcSingleton!=NULL ? &nfcSingleton->fcc : NULL;
729e4ab9
A
526}
527
528const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
57a6839d
A
529 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
530 return nfkcSingleton!=NULL ? &nfkcSingleton->comp : NULL;
729e4ab9
A
531}
532
533const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
57a6839d
A
534 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
535 return nfkcSingleton!=NULL ? &nfkcSingleton->decomp : NULL;
729e4ab9
A
536}
537
538const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
57a6839d
A
539 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
540 return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->comp : NULL;
729e4ab9
A
541}
542
543const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
57a6839d
A
544 umtx_initOnce(noopInitOnce, &initSingletons, "noop", errorCode);
545 return noopSingleton;
729e4ab9
A
546}
547
548const Normalizer2 *
549Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
550 if(U_FAILURE(errorCode)) {
551 return NULL;
552 }
553 switch(mode) {
554 case UNORM_NFD:
555 return getNFDInstance(errorCode);
556 case UNORM_NFKD:
557 return getNFKDInstance(errorCode);
558 case UNORM_NFC:
559 return getNFCInstance(errorCode);
560 case UNORM_NFKC:
561 return getNFKCInstance(errorCode);
562 case UNORM_FCD:
563 return getFCDInstance(errorCode);
564 default: // UNORM_NONE
565 return getNoopInstance(errorCode);
566 }
567}
568
569const Normalizer2Impl *
570Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
57a6839d
A
571 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
572 return nfcSingleton!=NULL ? &nfcSingleton->impl : NULL;
729e4ab9
A
573}
574
575const Normalizer2Impl *
576Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
57a6839d
A
577 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
578 return nfkcSingleton!=NULL ? &nfkcSingleton->impl : NULL;
729e4ab9
A
579}
580
581const Normalizer2Impl *
582Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
57a6839d
A
583 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
584 return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->impl : NULL;
729e4ab9
A
585}
586
587const Normalizer2Impl *
588Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
589 return &((Normalizer2WithImpl *)norm2)->impl;
590}
591
4388f060
A
592const Normalizer2 *
593Normalizer2::getNFCInstance(UErrorCode &errorCode) {
594 return Normalizer2Factory::getNFCInstance(errorCode);
595}
596
597const Normalizer2 *
598Normalizer2::getNFDInstance(UErrorCode &errorCode) {
599 return Normalizer2Factory::getNFDInstance(errorCode);
600}
601
602const Normalizer2 *
603Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
604 return Normalizer2Factory::getNFKCInstance(errorCode);
605}
606
607const Normalizer2 *
608Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
609 return Normalizer2Factory::getNFKDInstance(errorCode);
610}
611
612const Normalizer2 *
613Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
614 return Normalizer2Factory::getNFKC_CFInstance(errorCode);
729e4ab9
A
615}
616
617const Normalizer2 *
618Normalizer2::getInstance(const char *packageName,
619 const char *name,
620 UNormalization2Mode mode,
621 UErrorCode &errorCode) {
622 if(U_FAILURE(errorCode)) {
623 return NULL;
624 }
625 if(name==NULL || *name==0) {
626 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
4388f060 627 return NULL;
729e4ab9
A
628 }
629 Norm2AllModes *allModes=NULL;
630 if(packageName==NULL) {
631 if(0==uprv_strcmp(name, "nfc")) {
57a6839d
A
632 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
633 allModes=nfcSingleton;
729e4ab9 634 } else if(0==uprv_strcmp(name, "nfkc")) {
57a6839d
A
635 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
636 allModes=nfkcSingleton;
729e4ab9 637 } else if(0==uprv_strcmp(name, "nfkc_cf")) {
57a6839d
A
638 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
639 allModes=nfkc_cfSingleton;
729e4ab9
A
640 }
641 }
642 if(allModes==NULL && U_SUCCESS(errorCode)) {
643 {
644 Mutex lock;
645 if(cache!=NULL) {
646 allModes=(Norm2AllModes *)uhash_get(cache, name);
647 }
648 }
649 if(allModes==NULL) {
650 LocalPointer<Norm2AllModes> localAllModes(
651 Norm2AllModes::createInstance(packageName, name, errorCode));
652 if(U_SUCCESS(errorCode)) {
653 Mutex lock;
654 if(cache==NULL) {
655 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
656 if(U_FAILURE(errorCode)) {
657 return NULL;
658 }
659 uhash_setKeyDeleter(cache, uprv_free);
660 uhash_setValueDeleter(cache, deleteNorm2AllModes);
661 }
662 void *temp=uhash_get(cache, name);
663 if(temp==NULL) {
664 int32_t keyLength=uprv_strlen(name)+1;
665 char *nameCopy=(char *)uprv_malloc(keyLength);
666 if(nameCopy==NULL) {
667 errorCode=U_MEMORY_ALLOCATION_ERROR;
668 return NULL;
669 }
670 uprv_memcpy(nameCopy, name, keyLength);
671 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
672 } else {
673 // race condition
674 allModes=(Norm2AllModes *)temp;
675 }
676 }
677 }
678 }
679 if(allModes!=NULL && U_SUCCESS(errorCode)) {
680 switch(mode) {
681 case UNORM2_COMPOSE:
682 return &allModes->comp;
683 case UNORM2_DECOMPOSE:
684 return &allModes->decomp;
685 case UNORM2_FCD:
729e4ab9
A
686 return &allModes->fcd;
687 case UNORM2_COMPOSE_CONTIGUOUS:
688 return &allModes->fcc;
689 default:
690 break; // do nothing
691 }
692 }
693 return NULL;
694}
695
729e4ab9
A
696U_NAMESPACE_END
697
698// C API ------------------------------------------------------------------- ***
699
700U_NAMESPACE_USE
701
51004dcb 702U_CAPI const UNormalizer2 * U_EXPORT2
4388f060
A
703unorm2_getNFCInstance(UErrorCode *pErrorCode) {
704 return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
705}
706
51004dcb 707U_CAPI const UNormalizer2 * U_EXPORT2
4388f060
A
708unorm2_getNFDInstance(UErrorCode *pErrorCode) {
709 return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
710}
711
51004dcb 712U_CAPI const UNormalizer2 * U_EXPORT2
4388f060
A
713unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
714 return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
715}
716
51004dcb 717U_CAPI const UNormalizer2 * U_EXPORT2
4388f060
A
718unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
719 return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
720}
721
51004dcb 722U_CAPI const UNormalizer2 * U_EXPORT2
4388f060
A
723unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
724 return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
725}
726
51004dcb 727U_CAPI const UNormalizer2 * U_EXPORT2
729e4ab9
A
728unorm2_getInstance(const char *packageName,
729 const char *name,
730 UNormalization2Mode mode,
731 UErrorCode *pErrorCode) {
732 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
733}
734
51004dcb 735U_CAPI void U_EXPORT2
729e4ab9
A
736unorm2_close(UNormalizer2 *norm2) {
737 delete (Normalizer2 *)norm2;
738}
739
51004dcb 740U_CAPI int32_t U_EXPORT2
729e4ab9
A
741unorm2_normalize(const UNormalizer2 *norm2,
742 const UChar *src, int32_t length,
743 UChar *dest, int32_t capacity,
744 UErrorCode *pErrorCode) {
745 if(U_FAILURE(*pErrorCode)) {
746 return 0;
747 }
748 if( (src==NULL ? length!=0 : length<-1) ||
749 (dest==NULL ? capacity!=0 : capacity<0) ||
750 (src==dest && src!=NULL)
751 ) {
752 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
753 return 0;
754 }
755 UnicodeString destString(dest, 0, capacity);
756 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
757 if(length!=0) {
758 const Normalizer2 *n2=(const Normalizer2 *)norm2;
759 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
760 if(n2wi!=NULL) {
761 // Avoid duplicate argument checking and support NUL-terminated src.
762 ReorderingBuffer buffer(n2wi->impl, destString);
763 if(buffer.init(length, *pErrorCode)) {
764 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
765 }
766 } else {
767 UnicodeString srcString(length<0, src, length);
768 n2->normalize(srcString, destString, *pErrorCode);
769 }
770 }
771 return destString.extract(dest, capacity, *pErrorCode);
772}
773
774static int32_t
775normalizeSecondAndAppend(const UNormalizer2 *norm2,
776 UChar *first, int32_t firstLength, int32_t firstCapacity,
777 const UChar *second, int32_t secondLength,
778 UBool doNormalize,
779 UErrorCode *pErrorCode) {
780 if(U_FAILURE(*pErrorCode)) {
781 return 0;
782 }
783 if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
784 (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
785 (firstCapacity<0 || firstLength<-1)) ||
786 (first==second && first!=NULL)
787 ) {
788 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
789 return 0;
790 }
791 UnicodeString firstString(first, firstLength, firstCapacity);
4388f060 792 firstLength=firstString.length(); // In case it was -1.
729e4ab9
A
793 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
794 if(secondLength!=0) {
795 const Normalizer2 *n2=(const Normalizer2 *)norm2;
796 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
797 if(n2wi!=NULL) {
798 // Avoid duplicate argument checking and support NUL-terminated src.
4388f060
A
799 UnicodeString safeMiddle;
800 {
801 ReorderingBuffer buffer(n2wi->impl, firstString);
802 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1
803 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
804 doNormalize, safeMiddle, buffer, *pErrorCode);
805 }
806 } // The ReorderingBuffer destructor finalizes firstString.
807 if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
808 // Restore the modified suffix of the first string.
809 // This does not restore first[] array contents between firstLength and firstCapacity.
810 // (That might be uninitialized memory, as far as we know.)
811 if(first!=NULL) { /* don't dereference NULL */
812 safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
813 if(firstLength<firstCapacity) {
814 first[firstLength]=0; // NUL-terminate in case it was originally.
815 }
816 }
729e4ab9
A
817 }
818 } else {
819 UnicodeString secondString(secondLength<0, second, secondLength);
820 if(doNormalize) {
821 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
822 } else {
823 n2->append(firstString, secondString, *pErrorCode);
824 }
825 }
826 }
827 return firstString.extract(first, firstCapacity, *pErrorCode);
828}
829
51004dcb 830U_CAPI int32_t U_EXPORT2
729e4ab9
A
831unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
832 UChar *first, int32_t firstLength, int32_t firstCapacity,
833 const UChar *second, int32_t secondLength,
834 UErrorCode *pErrorCode) {
835 return normalizeSecondAndAppend(norm2,
836 first, firstLength, firstCapacity,
837 second, secondLength,
838 TRUE, pErrorCode);
839}
840
51004dcb 841U_CAPI int32_t U_EXPORT2
729e4ab9
A
842unorm2_append(const UNormalizer2 *norm2,
843 UChar *first, int32_t firstLength, int32_t firstCapacity,
844 const UChar *second, int32_t secondLength,
845 UErrorCode *pErrorCode) {
846 return normalizeSecondAndAppend(norm2,
847 first, firstLength, firstCapacity,
848 second, secondLength,
849 FALSE, pErrorCode);
850}
851
51004dcb 852U_CAPI int32_t U_EXPORT2
729e4ab9
A
853unorm2_getDecomposition(const UNormalizer2 *norm2,
854 UChar32 c, UChar *decomposition, int32_t capacity,
855 UErrorCode *pErrorCode) {
856 if(U_FAILURE(*pErrorCode)) {
857 return 0;
858 }
859 if(decomposition==NULL ? capacity!=0 : capacity<0) {
860 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
861 return 0;
862 }
863 UnicodeString destString(decomposition, 0, capacity);
864 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
865 return destString.extract(decomposition, capacity, *pErrorCode);
866 } else {
867 return -1;
868 }
869}
870
51004dcb 871U_CAPI int32_t U_EXPORT2
4388f060
A
872unorm2_getRawDecomposition(const UNormalizer2 *norm2,
873 UChar32 c, UChar *decomposition, int32_t capacity,
874 UErrorCode *pErrorCode) {
875 if(U_FAILURE(*pErrorCode)) {
876 return 0;
877 }
878 if(decomposition==NULL ? capacity!=0 : capacity<0) {
879 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
880 return 0;
881 }
882 UnicodeString destString(decomposition, 0, capacity);
883 if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) {
884 return destString.extract(decomposition, capacity, *pErrorCode);
885 } else {
886 return -1;
887 }
888}
889
51004dcb 890U_CAPI UChar32 U_EXPORT2
4388f060
A
891unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) {
892 return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b);
893}
894
51004dcb 895U_CAPI uint8_t U_EXPORT2
4388f060
A
896unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
897 return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
898}
899
51004dcb 900U_CAPI UBool U_EXPORT2
729e4ab9
A
901unorm2_isNormalized(const UNormalizer2 *norm2,
902 const UChar *s, int32_t length,
903 UErrorCode *pErrorCode) {
904 if(U_FAILURE(*pErrorCode)) {
905 return 0;
906 }
907 if((s==NULL && length!=0) || length<-1) {
908 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
909 return 0;
910 }
911 UnicodeString sString(length<0, s, length);
912 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
913}
914
51004dcb 915U_CAPI UNormalizationCheckResult U_EXPORT2
729e4ab9
A
916unorm2_quickCheck(const UNormalizer2 *norm2,
917 const UChar *s, int32_t length,
918 UErrorCode *pErrorCode) {
919 if(U_FAILURE(*pErrorCode)) {
920 return UNORM_NO;
921 }
922 if((s==NULL && length!=0) || length<-1) {
923 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
924 return UNORM_NO;
925 }
926 UnicodeString sString(length<0, s, length);
927 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
928}
929
51004dcb 930U_CAPI int32_t U_EXPORT2
729e4ab9
A
931unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
932 const UChar *s, int32_t length,
933 UErrorCode *pErrorCode) {
934 if(U_FAILURE(*pErrorCode)) {
935 return 0;
936 }
937 if((s==NULL && length!=0) || length<-1) {
938 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
939 return 0;
940 }
941 UnicodeString sString(length<0, s, length);
942 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
943}
944
51004dcb 945U_CAPI UBool U_EXPORT2
729e4ab9
A
946unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
947 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
948}
949
51004dcb 950U_CAPI UBool U_EXPORT2
729e4ab9
A
951unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
952 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
953}
954
51004dcb 955U_CAPI UBool U_EXPORT2
729e4ab9
A
956unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
957 return ((const Normalizer2 *)norm2)->isInert(c);
958}
959
960// Some properties APIs ---------------------------------------------------- ***
961
4388f060
A
962U_CAPI uint8_t U_EXPORT2
963u_getCombiningClass(UChar32 c) {
964 UErrorCode errorCode=U_ZERO_ERROR;
965 const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode);
966 if(U_SUCCESS(errorCode)) {
967 return nfd->getCombiningClass(c);
968 } else {
969 return 0;
970 }
971}
972
973U_CFUNC UNormalizationCheckResult
729e4ab9
A
974unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
975 if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
976 return UNORM_YES;
977 }
978 UErrorCode errorCode=U_ZERO_ERROR;
979 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
980 if(U_SUCCESS(errorCode)) {
981 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
982 } else {
983 return UNORM_MAYBE;
984 }
985}
986
4388f060
A
987U_CFUNC uint16_t
988unorm_getFCD16(UChar32 c) {
989 UErrorCode errorCode=U_ZERO_ERROR;
990 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
991 if(U_SUCCESS(errorCode)) {
992 return impl->getFCD16(c);
729e4ab9 993 } else {
4388f060 994 return 0;
729e4ab9
A
995 }
996}
997
998#endif // !UCONFIG_NO_NORMALIZATION