]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/normalizer2.cpp
ICU-551.24.tar.gz
[apple/icu.git] / icuSources / common / normalizer2.cpp
CommitLineData
729e4ab9
A
1/*
2*******************************************************************************
3*
b331163b 4* Copyright (C) 2009-2014, International Business Machines
729e4ab9
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: normalizer2.cpp
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2009nov22
14* created by: Markus W. Scherer
15*/
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_NORMALIZATION
20
729e4ab9
A
21#include "unicode/normalizer2.h"
22#include "unicode/unistr.h"
23#include "unicode/unorm.h"
729e4ab9
A
24#include "cstring.h"
25#include "mutex.h"
b331163b 26#include "norm2allmodes.h"
729e4ab9 27#include "normalizer2impl.h"
57a6839d 28#include "uassert.h"
729e4ab9 29#include "ucln_cmn.h"
b331163b
A
30
31using icu::Normalizer2Impl;
32
33// NFC/NFD data machine-generated by gennorm2 --csource
34#include "norm2_nfc_data.h"
729e4ab9
A
35
36U_NAMESPACE_BEGIN
37
38// Public API dispatch via Normalizer2 subclasses -------------------------- ***
39
4388f060
A
40Normalizer2::~Normalizer2() {}
41
42UBool
43Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
44 return FALSE;
45}
46
47UChar32
48Normalizer2::composePair(UChar32, UChar32) const {
49 return U_SENTINEL;
50}
51
52uint8_t
53Normalizer2::getCombiningClass(UChar32 /*c*/) const {
54 return 0;
55}
56
729e4ab9
A
57// Normalizer2 implementation for the old UNORM_NONE.
58class NoopNormalizer2 : public Normalizer2 {
4388f060
A
59 virtual ~NoopNormalizer2();
60
729e4ab9
A
61 virtual UnicodeString &
62 normalize(const UnicodeString &src,
63 UnicodeString &dest,
64 UErrorCode &errorCode) const {
65 if(U_SUCCESS(errorCode)) {
66 if(&dest!=&src) {
67 dest=src;
68 } else {
69 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
70 }
71 }
72 return dest;
73 }
74 virtual UnicodeString &
75 normalizeSecondAndAppend(UnicodeString &first,
76 const UnicodeString &second,
77 UErrorCode &errorCode) const {
78 if(U_SUCCESS(errorCode)) {
79 if(&first!=&second) {
80 first.append(second);
81 } else {
82 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
83 }
84 }
85 return first;
86 }
87 virtual UnicodeString &
88 append(UnicodeString &first,
89 const UnicodeString &second,
90 UErrorCode &errorCode) const {
91 if(U_SUCCESS(errorCode)) {
92 if(&first!=&second) {
93 first.append(second);
94 } else {
95 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
96 }
97 }
98 return first;
99 }
100 virtual UBool
101 getDecomposition(UChar32, UnicodeString &) const {
102 return FALSE;
103 }
4388f060 104 // No need to override the default getRawDecomposition().
729e4ab9
A
105 virtual UBool
106 isNormalized(const UnicodeString &, UErrorCode &) const {
107 return TRUE;
108 }
109 virtual UNormalizationCheckResult
110 quickCheck(const UnicodeString &, UErrorCode &) const {
111 return UNORM_YES;
112 }
113 virtual int32_t
114 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
115 return s.length();
116 }
117 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
118 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
119 virtual UBool isInert(UChar32) const { return TRUE; }
120};
121
4388f060
A
122NoopNormalizer2::~NoopNormalizer2() {}
123
4388f060
A
124Normalizer2WithImpl::~Normalizer2WithImpl() {}
125
4388f060
A
126DecomposeNormalizer2::~DecomposeNormalizer2() {}
127
4388f060
A
128ComposeNormalizer2::~ComposeNormalizer2() {}
129
4388f060
A
130FCDNormalizer2::~FCDNormalizer2() {}
131
729e4ab9
A
132// instance cache ---------------------------------------------------------- ***
133
b331163b
A
134Norm2AllModes::~Norm2AllModes() {
135 delete impl;
136}
729e4ab9
A
137
138Norm2AllModes *
b331163b 139Norm2AllModes::createInstance(Normalizer2Impl *impl, UErrorCode &errorCode) {
729e4ab9 140 if(U_FAILURE(errorCode)) {
b331163b 141 delete impl;
729e4ab9
A
142 return NULL;
143 }
b331163b
A
144 Norm2AllModes *allModes=new Norm2AllModes(impl);
145 if(allModes==NULL) {
729e4ab9 146 errorCode=U_MEMORY_ALLOCATION_ERROR;
b331163b 147 delete impl;
729e4ab9
A
148 return NULL;
149 }
b331163b
A
150 return allModes;
151}
152
153Norm2AllModes *
154Norm2AllModes::createNFCInstance(UErrorCode &errorCode) {
155 if(U_FAILURE(errorCode)) {
156 return NULL;
157 }
158 Normalizer2Impl *impl=new Normalizer2Impl;
159 if(impl==NULL) {
160 errorCode=U_MEMORY_ALLOCATION_ERROR;
161 return NULL;
162 }
163 impl->init(norm2_nfc_data_indexes, &norm2_nfc_data_trie,
164 norm2_nfc_data_extraData, norm2_nfc_data_smallFCD);
165 return createInstance(impl, errorCode);
729e4ab9
A
166}
167
168U_CDECL_BEGIN
169static UBool U_CALLCONV uprv_normalizer2_cleanup();
170U_CDECL_END
171
57a6839d 172static Norm2AllModes *nfcSingleton;
57a6839d 173static Normalizer2 *noopSingleton;
57a6839d
A
174
175static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
57a6839d
A
176static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER;
177
b331163b
A
178// UInitOnce singleton initialization functions
179static void U_CALLCONV initNFCSingleton(UErrorCode &errorCode) {
180 nfcSingleton=Norm2AllModes::createNFCInstance(errorCode);
181 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
182}
183
184static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) {
185 if(U_FAILURE(errorCode)) {
186 return;
187 }
188 noopSingleton=new NoopNormalizer2;
189 if(noopSingleton==NULL) {
190 errorCode=U_MEMORY_ALLOCATION_ERROR;
191 return;
57a6839d
A
192 }
193 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
194}
729e4ab9
A
195
196U_CDECL_BEGIN
197
729e4ab9 198static UBool U_CALLCONV uprv_normalizer2_cleanup() {
57a6839d
A
199 delete nfcSingleton;
200 nfcSingleton = NULL;
57a6839d
A
201 delete noopSingleton;
202 noopSingleton = NULL;
57a6839d 203 nfcInitOnce.reset();
57a6839d 204 noopInitOnce.reset();
729e4ab9
A
205 return TRUE;
206}
207
208U_CDECL_END
209
b331163b
A
210const Norm2AllModes *
211Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
212 if(U_FAILURE(errorCode)) { return NULL; }
213 umtx_initOnce(nfcInitOnce, &initNFCSingleton, errorCode);
214 return nfcSingleton;
729e4ab9
A
215}
216
b331163b
A
217const Normalizer2 *
218Normalizer2::getNFCInstance(UErrorCode &errorCode) {
219 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
220 return allModes!=NULL ? &allModes->comp : NULL;
729e4ab9
A
221}
222
b331163b
A
223const Normalizer2 *
224Normalizer2::getNFDInstance(UErrorCode &errorCode) {
225 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
226 return allModes!=NULL ? &allModes->decomp : NULL;
729e4ab9
A
227}
228
b331163b
A
229const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
230 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
231 return allModes!=NULL ? &allModes->fcd : NULL;
729e4ab9
A
232}
233
b331163b
A
234const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
235 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
236 return allModes!=NULL ? &allModes->fcc : NULL;
729e4ab9
A
237}
238
239const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
b331163b
A
240 if(U_FAILURE(errorCode)) { return NULL; }
241 umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode);
57a6839d 242 return noopSingleton;
729e4ab9
A
243}
244
729e4ab9
A
245const Normalizer2Impl *
246Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
b331163b
A
247 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
248 return allModes!=NULL ? allModes->impl : NULL;
729e4ab9
A
249}
250
251const Normalizer2Impl *
252Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
253 return &((Normalizer2WithImpl *)norm2)->impl;
254}
255
729e4ab9
A
256U_NAMESPACE_END
257
258// C API ------------------------------------------------------------------- ***
259
260U_NAMESPACE_USE
261
51004dcb 262U_CAPI const UNormalizer2 * U_EXPORT2
4388f060
A
263unorm2_getNFCInstance(UErrorCode *pErrorCode) {
264 return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
265}
266
51004dcb 267U_CAPI const UNormalizer2 * U_EXPORT2
4388f060
A
268unorm2_getNFDInstance(UErrorCode *pErrorCode) {
269 return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
270}
271
51004dcb 272U_CAPI void U_EXPORT2
729e4ab9
A
273unorm2_close(UNormalizer2 *norm2) {
274 delete (Normalizer2 *)norm2;
275}
276
51004dcb 277U_CAPI int32_t U_EXPORT2
729e4ab9
A
278unorm2_normalize(const UNormalizer2 *norm2,
279 const UChar *src, int32_t length,
280 UChar *dest, int32_t capacity,
281 UErrorCode *pErrorCode) {
282 if(U_FAILURE(*pErrorCode)) {
283 return 0;
284 }
285 if( (src==NULL ? length!=0 : length<-1) ||
286 (dest==NULL ? capacity!=0 : capacity<0) ||
287 (src==dest && src!=NULL)
288 ) {
289 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
290 return 0;
291 }
292 UnicodeString destString(dest, 0, capacity);
293 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
294 if(length!=0) {
295 const Normalizer2 *n2=(const Normalizer2 *)norm2;
296 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
297 if(n2wi!=NULL) {
298 // Avoid duplicate argument checking and support NUL-terminated src.
299 ReorderingBuffer buffer(n2wi->impl, destString);
300 if(buffer.init(length, *pErrorCode)) {
301 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
302 }
303 } else {
304 UnicodeString srcString(length<0, src, length);
305 n2->normalize(srcString, destString, *pErrorCode);
306 }
307 }
308 return destString.extract(dest, capacity, *pErrorCode);
309}
310
311static int32_t
312normalizeSecondAndAppend(const UNormalizer2 *norm2,
313 UChar *first, int32_t firstLength, int32_t firstCapacity,
314 const UChar *second, int32_t secondLength,
315 UBool doNormalize,
316 UErrorCode *pErrorCode) {
317 if(U_FAILURE(*pErrorCode)) {
318 return 0;
319 }
320 if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
321 (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
322 (firstCapacity<0 || firstLength<-1)) ||
323 (first==second && first!=NULL)
324 ) {
325 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
326 return 0;
327 }
328 UnicodeString firstString(first, firstLength, firstCapacity);
4388f060 329 firstLength=firstString.length(); // In case it was -1.
729e4ab9
A
330 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
331 if(secondLength!=0) {
332 const Normalizer2 *n2=(const Normalizer2 *)norm2;
333 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
334 if(n2wi!=NULL) {
335 // Avoid duplicate argument checking and support NUL-terminated src.
4388f060
A
336 UnicodeString safeMiddle;
337 {
338 ReorderingBuffer buffer(n2wi->impl, firstString);
339 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1
340 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
341 doNormalize, safeMiddle, buffer, *pErrorCode);
342 }
343 } // The ReorderingBuffer destructor finalizes firstString.
344 if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
345 // Restore the modified suffix of the first string.
346 // This does not restore first[] array contents between firstLength and firstCapacity.
347 // (That might be uninitialized memory, as far as we know.)
348 if(first!=NULL) { /* don't dereference NULL */
349 safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
350 if(firstLength<firstCapacity) {
351 first[firstLength]=0; // NUL-terminate in case it was originally.
352 }
353 }
729e4ab9
A
354 }
355 } else {
356 UnicodeString secondString(secondLength<0, second, secondLength);
357 if(doNormalize) {
358 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
359 } else {
360 n2->append(firstString, secondString, *pErrorCode);
361 }
362 }
363 }
364 return firstString.extract(first, firstCapacity, *pErrorCode);
365}
366
51004dcb 367U_CAPI int32_t U_EXPORT2
729e4ab9
A
368unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
369 UChar *first, int32_t firstLength, int32_t firstCapacity,
370 const UChar *second, int32_t secondLength,
371 UErrorCode *pErrorCode) {
372 return normalizeSecondAndAppend(norm2,
373 first, firstLength, firstCapacity,
374 second, secondLength,
375 TRUE, pErrorCode);
376}
377
51004dcb 378U_CAPI int32_t U_EXPORT2
729e4ab9
A
379unorm2_append(const UNormalizer2 *norm2,
380 UChar *first, int32_t firstLength, int32_t firstCapacity,
381 const UChar *second, int32_t secondLength,
382 UErrorCode *pErrorCode) {
383 return normalizeSecondAndAppend(norm2,
384 first, firstLength, firstCapacity,
385 second, secondLength,
386 FALSE, pErrorCode);
387}
388
51004dcb 389U_CAPI int32_t U_EXPORT2
729e4ab9
A
390unorm2_getDecomposition(const UNormalizer2 *norm2,
391 UChar32 c, UChar *decomposition, int32_t capacity,
392 UErrorCode *pErrorCode) {
393 if(U_FAILURE(*pErrorCode)) {
394 return 0;
395 }
396 if(decomposition==NULL ? capacity!=0 : capacity<0) {
397 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
398 return 0;
399 }
400 UnicodeString destString(decomposition, 0, capacity);
401 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
402 return destString.extract(decomposition, capacity, *pErrorCode);
403 } else {
404 return -1;
405 }
406}
407
51004dcb 408U_CAPI int32_t U_EXPORT2
4388f060
A
409unorm2_getRawDecomposition(const UNormalizer2 *norm2,
410 UChar32 c, UChar *decomposition, int32_t capacity,
411 UErrorCode *pErrorCode) {
412 if(U_FAILURE(*pErrorCode)) {
413 return 0;
414 }
415 if(decomposition==NULL ? capacity!=0 : capacity<0) {
416 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
417 return 0;
418 }
419 UnicodeString destString(decomposition, 0, capacity);
420 if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) {
421 return destString.extract(decomposition, capacity, *pErrorCode);
422 } else {
423 return -1;
424 }
425}
426
51004dcb 427U_CAPI UChar32 U_EXPORT2
4388f060
A
428unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) {
429 return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b);
430}
431
51004dcb 432U_CAPI uint8_t U_EXPORT2
4388f060
A
433unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
434 return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
435}
436
51004dcb 437U_CAPI UBool U_EXPORT2
729e4ab9
A
438unorm2_isNormalized(const UNormalizer2 *norm2,
439 const UChar *s, int32_t length,
440 UErrorCode *pErrorCode) {
441 if(U_FAILURE(*pErrorCode)) {
442 return 0;
443 }
444 if((s==NULL && length!=0) || length<-1) {
445 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
446 return 0;
447 }
448 UnicodeString sString(length<0, s, length);
449 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
450}
451
51004dcb 452U_CAPI UNormalizationCheckResult U_EXPORT2
729e4ab9
A
453unorm2_quickCheck(const UNormalizer2 *norm2,
454 const UChar *s, int32_t length,
455 UErrorCode *pErrorCode) {
456 if(U_FAILURE(*pErrorCode)) {
457 return UNORM_NO;
458 }
459 if((s==NULL && length!=0) || length<-1) {
460 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
461 return UNORM_NO;
462 }
463 UnicodeString sString(length<0, s, length);
464 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
465}
466
51004dcb 467U_CAPI int32_t U_EXPORT2
729e4ab9
A
468unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
469 const UChar *s, int32_t length,
470 UErrorCode *pErrorCode) {
471 if(U_FAILURE(*pErrorCode)) {
472 return 0;
473 }
474 if((s==NULL && length!=0) || length<-1) {
475 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
476 return 0;
477 }
478 UnicodeString sString(length<0, s, length);
479 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
480}
481
51004dcb 482U_CAPI UBool U_EXPORT2
729e4ab9
A
483unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
484 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
485}
486
51004dcb 487U_CAPI UBool U_EXPORT2
729e4ab9
A
488unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
489 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
490}
491
51004dcb 492U_CAPI UBool U_EXPORT2
729e4ab9
A
493unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
494 return ((const Normalizer2 *)norm2)->isInert(c);
495}
496
497// Some properties APIs ---------------------------------------------------- ***
498
4388f060
A
499U_CAPI uint8_t U_EXPORT2
500u_getCombiningClass(UChar32 c) {
501 UErrorCode errorCode=U_ZERO_ERROR;
b331163b 502 const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode);
4388f060
A
503 if(U_SUCCESS(errorCode)) {
504 return nfd->getCombiningClass(c);
505 } else {
506 return 0;
507 }
508}
509
4388f060
A
510U_CFUNC uint16_t
511unorm_getFCD16(UChar32 c) {
512 UErrorCode errorCode=U_ZERO_ERROR;
513 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
514 if(U_SUCCESS(errorCode)) {
515 return impl->getFCD16(c);
729e4ab9 516 } else {
4388f060 517 return 0;
729e4ab9
A
518 }
519}
520
521#endif // !UCONFIG_NO_NORMALIZATION