ICU-59131.0.1.tar.gz
[apple/icu.git] / icuSources / common / normalizer2.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
729e4ab9
A
3/*
4*******************************************************************************
5*
2ca993e8 6* Copyright (C) 2009-2016, International Business Machines
729e4ab9
A
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: normalizer2.cpp
f3c0d7a5 11* encoding: UTF-8
729e4ab9
A
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2009nov22
16* created by: Markus W. Scherer
17*/
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_NORMALIZATION
22
729e4ab9
A
23#include "unicode/normalizer2.h"
24#include "unicode/unistr.h"
25#include "unicode/unorm.h"
729e4ab9
A
26#include "cstring.h"
27#include "mutex.h"
b331163b 28#include "norm2allmodes.h"
729e4ab9 29#include "normalizer2impl.h"
57a6839d 30#include "uassert.h"
729e4ab9 31#include "ucln_cmn.h"
b331163b
A
32
33using icu::Normalizer2Impl;
34
35// NFC/NFD data machine-generated by gennorm2 --csource
2ca993e8 36#define INCLUDED_FROM_NORMALIZER2_CPP
b331163b 37#include "norm2_nfc_data.h"
729e4ab9
A
38
39U_NAMESPACE_BEGIN
40
41// Public API dispatch via Normalizer2 subclasses -------------------------- ***
42
4388f060
A
43Normalizer2::~Normalizer2() {}
44
45UBool
46Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
47 return FALSE;
48}
49
50UChar32
51Normalizer2::composePair(UChar32, UChar32) const {
52 return U_SENTINEL;
53}
54
55uint8_t
56Normalizer2::getCombiningClass(UChar32 /*c*/) const {
57 return 0;
58}
59
729e4ab9
A
60// Normalizer2 implementation for the old UNORM_NONE.
61class NoopNormalizer2 : public Normalizer2 {
4388f060
A
62 virtual ~NoopNormalizer2();
63
729e4ab9
A
64 virtual UnicodeString &
65 normalize(const UnicodeString &src,
66 UnicodeString &dest,
67 UErrorCode &errorCode) const {
68 if(U_SUCCESS(errorCode)) {
69 if(&dest!=&src) {
70 dest=src;
71 } else {
72 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
73 }
74 }
75 return dest;
76 }
77 virtual UnicodeString &
78 normalizeSecondAndAppend(UnicodeString &first,
79 const UnicodeString &second,
80 UErrorCode &errorCode) const {
81 if(U_SUCCESS(errorCode)) {
82 if(&first!=&second) {
83 first.append(second);
84 } else {
85 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
86 }
87 }
88 return first;
89 }
90 virtual UnicodeString &
91 append(UnicodeString &first,
92 const UnicodeString &second,
93 UErrorCode &errorCode) const {
94 if(U_SUCCESS(errorCode)) {
95 if(&first!=&second) {
96 first.append(second);
97 } else {
98 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
99 }
100 }
101 return first;
102 }
103 virtual UBool
104 getDecomposition(UChar32, UnicodeString &) const {
105 return FALSE;
106 }
4388f060 107 // No need to override the default getRawDecomposition().
729e4ab9
A
108 virtual UBool
109 isNormalized(const UnicodeString &, UErrorCode &) const {
110 return TRUE;
111 }
112 virtual UNormalizationCheckResult
113 quickCheck(const UnicodeString &, UErrorCode &) const {
114 return UNORM_YES;
115 }
116 virtual int32_t
117 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
118 return s.length();
119 }
120 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
121 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
122 virtual UBool isInert(UChar32) const { return TRUE; }
123};
124
4388f060
A
125NoopNormalizer2::~NoopNormalizer2() {}
126
4388f060
A
127Normalizer2WithImpl::~Normalizer2WithImpl() {}
128
4388f060
A
129DecomposeNormalizer2::~DecomposeNormalizer2() {}
130
4388f060
A
131ComposeNormalizer2::~ComposeNormalizer2() {}
132
4388f060
A
133FCDNormalizer2::~FCDNormalizer2() {}
134
729e4ab9
A
135// instance cache ---------------------------------------------------------- ***
136
b331163b
A
137Norm2AllModes::~Norm2AllModes() {
138 delete impl;
139}
729e4ab9
A
140
141Norm2AllModes *
b331163b 142Norm2AllModes::createInstance(Normalizer2Impl *impl, UErrorCode &errorCode) {
729e4ab9 143 if(U_FAILURE(errorCode)) {
b331163b 144 delete impl;
729e4ab9
A
145 return NULL;
146 }
b331163b
A
147 Norm2AllModes *allModes=new Norm2AllModes(impl);
148 if(allModes==NULL) {
729e4ab9 149 errorCode=U_MEMORY_ALLOCATION_ERROR;
b331163b 150 delete impl;
729e4ab9
A
151 return NULL;
152 }
b331163b
A
153 return allModes;
154}
155
156Norm2AllModes *
157Norm2AllModes::createNFCInstance(UErrorCode &errorCode) {
158 if(U_FAILURE(errorCode)) {
159 return NULL;
160 }
161 Normalizer2Impl *impl=new Normalizer2Impl;
162 if(impl==NULL) {
163 errorCode=U_MEMORY_ALLOCATION_ERROR;
164 return NULL;
165 }
166 impl->init(norm2_nfc_data_indexes, &norm2_nfc_data_trie,
167 norm2_nfc_data_extraData, norm2_nfc_data_smallFCD);
168 return createInstance(impl, errorCode);
729e4ab9
A
169}
170
171U_CDECL_BEGIN
172static UBool U_CALLCONV uprv_normalizer2_cleanup();
173U_CDECL_END
174
57a6839d 175static Norm2AllModes *nfcSingleton;
57a6839d 176static Normalizer2 *noopSingleton;
57a6839d
A
177
178static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
57a6839d
A
179static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER;
180
b331163b
A
181// UInitOnce singleton initialization functions
182static void U_CALLCONV initNFCSingleton(UErrorCode &errorCode) {
183 nfcSingleton=Norm2AllModes::createNFCInstance(errorCode);
184 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
185}
186
187static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) {
188 if(U_FAILURE(errorCode)) {
189 return;
190 }
191 noopSingleton=new NoopNormalizer2;
192 if(noopSingleton==NULL) {
193 errorCode=U_MEMORY_ALLOCATION_ERROR;
194 return;
57a6839d
A
195 }
196 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
197}
729e4ab9
A
198
199U_CDECL_BEGIN
200
729e4ab9 201static UBool U_CALLCONV uprv_normalizer2_cleanup() {
57a6839d
A
202 delete nfcSingleton;
203 nfcSingleton = NULL;
57a6839d
A
204 delete noopSingleton;
205 noopSingleton = NULL;
57a6839d 206 nfcInitOnce.reset();
57a6839d 207 noopInitOnce.reset();
729e4ab9
A
208 return TRUE;
209}
210
211U_CDECL_END
212
b331163b
A
213const Norm2AllModes *
214Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
215 if(U_FAILURE(errorCode)) { return NULL; }
216 umtx_initOnce(nfcInitOnce, &initNFCSingleton, errorCode);
217 return nfcSingleton;
729e4ab9
A
218}
219
b331163b
A
220const Normalizer2 *
221Normalizer2::getNFCInstance(UErrorCode &errorCode) {
222 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
223 return allModes!=NULL ? &allModes->comp : NULL;
729e4ab9
A
224}
225
b331163b
A
226const Normalizer2 *
227Normalizer2::getNFDInstance(UErrorCode &errorCode) {
228 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
229 return allModes!=NULL ? &allModes->decomp : NULL;
729e4ab9
A
230}
231
b331163b
A
232const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
233 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
234 return allModes!=NULL ? &allModes->fcd : NULL;
729e4ab9
A
235}
236
b331163b
A
237const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
238 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
239 return allModes!=NULL ? &allModes->fcc : NULL;
729e4ab9
A
240}
241
242const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
b331163b
A
243 if(U_FAILURE(errorCode)) { return NULL; }
244 umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode);
57a6839d 245 return noopSingleton;
729e4ab9
A
246}
247
729e4ab9
A
248const Normalizer2Impl *
249Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
b331163b
A
250 const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
251 return allModes!=NULL ? allModes->impl : NULL;
729e4ab9
A
252}
253
254const Normalizer2Impl *
255Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
256 return &((Normalizer2WithImpl *)norm2)->impl;
257}
258
729e4ab9
A
259U_NAMESPACE_END
260
261// C API ------------------------------------------------------------------- ***
262
263U_NAMESPACE_USE
264
51004dcb 265U_CAPI const UNormalizer2 * U_EXPORT2
4388f060
A
266unorm2_getNFCInstance(UErrorCode *pErrorCode) {
267 return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
268}
269
51004dcb 270U_CAPI const UNormalizer2 * U_EXPORT2
4388f060
A
271unorm2_getNFDInstance(UErrorCode *pErrorCode) {
272 return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
273}
274
51004dcb 275U_CAPI void U_EXPORT2
729e4ab9
A
276unorm2_close(UNormalizer2 *norm2) {
277 delete (Normalizer2 *)norm2;
278}
279
51004dcb 280U_CAPI int32_t U_EXPORT2
729e4ab9
A
281unorm2_normalize(const UNormalizer2 *norm2,
282 const UChar *src, int32_t length,
283 UChar *dest, int32_t capacity,
284 UErrorCode *pErrorCode) {
285 if(U_FAILURE(*pErrorCode)) {
286 return 0;
287 }
288 if( (src==NULL ? length!=0 : length<-1) ||
289 (dest==NULL ? capacity!=0 : capacity<0) ||
290 (src==dest && src!=NULL)
291 ) {
292 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
293 return 0;
294 }
295 UnicodeString destString(dest, 0, capacity);
296 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
297 if(length!=0) {
298 const Normalizer2 *n2=(const Normalizer2 *)norm2;
299 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
300 if(n2wi!=NULL) {
301 // Avoid duplicate argument checking and support NUL-terminated src.
302 ReorderingBuffer buffer(n2wi->impl, destString);
303 if(buffer.init(length, *pErrorCode)) {
304 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
305 }
306 } else {
307 UnicodeString srcString(length<0, src, length);
308 n2->normalize(srcString, destString, *pErrorCode);
309 }
310 }
311 return destString.extract(dest, capacity, *pErrorCode);
312}
313
314static int32_t
315normalizeSecondAndAppend(const UNormalizer2 *norm2,
316 UChar *first, int32_t firstLength, int32_t firstCapacity,
317 const UChar *second, int32_t secondLength,
318 UBool doNormalize,
319 UErrorCode *pErrorCode) {
320 if(U_FAILURE(*pErrorCode)) {
321 return 0;
322 }
323 if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
324 (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
325 (firstCapacity<0 || firstLength<-1)) ||
326 (first==second && first!=NULL)
327 ) {
328 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
329 return 0;
330 }
331 UnicodeString firstString(first, firstLength, firstCapacity);
4388f060 332 firstLength=firstString.length(); // In case it was -1.
729e4ab9
A
333 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
334 if(secondLength!=0) {
335 const Normalizer2 *n2=(const Normalizer2 *)norm2;
336 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
337 if(n2wi!=NULL) {
338 // Avoid duplicate argument checking and support NUL-terminated src.
4388f060
A
339 UnicodeString safeMiddle;
340 {
341 ReorderingBuffer buffer(n2wi->impl, firstString);
342 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1
343 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
344 doNormalize, safeMiddle, buffer, *pErrorCode);
345 }
346 } // The ReorderingBuffer destructor finalizes firstString.
347 if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
348 // Restore the modified suffix of the first string.
349 // This does not restore first[] array contents between firstLength and firstCapacity.
350 // (That might be uninitialized memory, as far as we know.)
351 if(first!=NULL) { /* don't dereference NULL */
352 safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
353 if(firstLength<firstCapacity) {
354 first[firstLength]=0; // NUL-terminate in case it was originally.
355 }
356 }
729e4ab9
A
357 }
358 } else {
359 UnicodeString secondString(secondLength<0, second, secondLength);
360 if(doNormalize) {
361 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
362 } else {
363 n2->append(firstString, secondString, *pErrorCode);
364 }
365 }
366 }
367 return firstString.extract(first, firstCapacity, *pErrorCode);
368}
369
51004dcb 370U_CAPI int32_t U_EXPORT2
729e4ab9
A
371unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
372 UChar *first, int32_t firstLength, int32_t firstCapacity,
373 const UChar *second, int32_t secondLength,
374 UErrorCode *pErrorCode) {
375 return normalizeSecondAndAppend(norm2,
376 first, firstLength, firstCapacity,
377 second, secondLength,
378 TRUE, pErrorCode);
379}
380
51004dcb 381U_CAPI int32_t U_EXPORT2
729e4ab9
A
382unorm2_append(const UNormalizer2 *norm2,
383 UChar *first, int32_t firstLength, int32_t firstCapacity,
384 const UChar *second, int32_t secondLength,
385 UErrorCode *pErrorCode) {
386 return normalizeSecondAndAppend(norm2,
387 first, firstLength, firstCapacity,
388 second, secondLength,
389 FALSE, pErrorCode);
390}
391
51004dcb 392U_CAPI int32_t U_EXPORT2
729e4ab9
A
393unorm2_getDecomposition(const UNormalizer2 *norm2,
394 UChar32 c, UChar *decomposition, int32_t capacity,
395 UErrorCode *pErrorCode) {
396 if(U_FAILURE(*pErrorCode)) {
397 return 0;
398 }
399 if(decomposition==NULL ? capacity!=0 : capacity<0) {
400 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
401 return 0;
402 }
403 UnicodeString destString(decomposition, 0, capacity);
404 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
405 return destString.extract(decomposition, capacity, *pErrorCode);
406 } else {
407 return -1;
408 }
409}
410
51004dcb 411U_CAPI int32_t U_EXPORT2
4388f060
A
412unorm2_getRawDecomposition(const UNormalizer2 *norm2,
413 UChar32 c, UChar *decomposition, int32_t capacity,
414 UErrorCode *pErrorCode) {
415 if(U_FAILURE(*pErrorCode)) {
416 return 0;
417 }
418 if(decomposition==NULL ? capacity!=0 : capacity<0) {
419 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
420 return 0;
421 }
422 UnicodeString destString(decomposition, 0, capacity);
423 if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) {
424 return destString.extract(decomposition, capacity, *pErrorCode);
425 } else {
426 return -1;
427 }
428}
429
51004dcb 430U_CAPI UChar32 U_EXPORT2
4388f060
A
431unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) {
432 return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b);
433}
434
51004dcb 435U_CAPI uint8_t U_EXPORT2
4388f060
A
436unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
437 return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
438}
439
51004dcb 440U_CAPI UBool U_EXPORT2
729e4ab9
A
441unorm2_isNormalized(const UNormalizer2 *norm2,
442 const UChar *s, int32_t length,
443 UErrorCode *pErrorCode) {
444 if(U_FAILURE(*pErrorCode)) {
445 return 0;
446 }
447 if((s==NULL && length!=0) || length<-1) {
448 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
449 return 0;
450 }
451 UnicodeString sString(length<0, s, length);
452 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
453}
454
51004dcb 455U_CAPI UNormalizationCheckResult U_EXPORT2
729e4ab9
A
456unorm2_quickCheck(const UNormalizer2 *norm2,
457 const UChar *s, int32_t length,
458 UErrorCode *pErrorCode) {
459 if(U_FAILURE(*pErrorCode)) {
460 return UNORM_NO;
461 }
462 if((s==NULL && length!=0) || length<-1) {
463 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
464 return UNORM_NO;
465 }
466 UnicodeString sString(length<0, s, length);
467 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
468}
469
51004dcb 470U_CAPI int32_t U_EXPORT2
729e4ab9
A
471unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
472 const UChar *s, int32_t length,
473 UErrorCode *pErrorCode) {
474 if(U_FAILURE(*pErrorCode)) {
475 return 0;
476 }
477 if((s==NULL && length!=0) || length<-1) {
478 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
479 return 0;
480 }
481 UnicodeString sString(length<0, s, length);
482 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
483}
484
51004dcb 485U_CAPI UBool U_EXPORT2
729e4ab9
A
486unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
487 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
488}
489
51004dcb 490U_CAPI UBool U_EXPORT2
729e4ab9
A
491unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
492 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
493}
494
51004dcb 495U_CAPI UBool U_EXPORT2
729e4ab9
A
496unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
497 return ((const Normalizer2 *)norm2)->isInert(c);
498}
499
500// Some properties APIs ---------------------------------------------------- ***
501
4388f060
A
502U_CAPI uint8_t U_EXPORT2
503u_getCombiningClass(UChar32 c) {
504 UErrorCode errorCode=U_ZERO_ERROR;
b331163b 505 const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode);
4388f060
A
506 if(U_SUCCESS(errorCode)) {
507 return nfd->getCombiningClass(c);
508 } else {
509 return 0;
510 }
511}
512
4388f060
A
513U_CFUNC uint16_t
514unorm_getFCD16(UChar32 c) {
515 UErrorCode errorCode=U_ZERO_ERROR;
516 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
517 if(U_SUCCESS(errorCode)) {
518 return impl->getFCD16(c);
729e4ab9 519 } else {
4388f060 520 return 0;
729e4ab9
A
521 }
522}
523
524#endif // !UCONFIG_NO_NORMALIZATION