2 ******************************************************************************
3 * Copyright (c) 1996-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
8 * Created by: Vladimir Weinstein 12052000
10 * Modification history :
12 * Date Name Description
13 * 02/01/01 synwee Added normalization quickcheck enum and method.
14 * 02/12/01 synwee Commented out quickcheck util api has been approved
15 * Added private method for doing FCD checks
16 * 02/23/01 synwee Modified quickcheck and checkFCE to run through
17 * string for codepoints < 0x300 for the normalization
19 * 05/25/01+ Markus Scherer total rewrite, implement all normalization here
20 * instead of just wrappers around normlzr.cpp,
21 * load unorm.dat, support Unicode 3.1 with
22 * supplementary code points, etc.
23 * 2009-nov..2010-jan Markus Scherer total rewrite, new Normalizer2 API & code
26 #include "unicode/utypes.h"
28 #if !UCONFIG_NO_NORMALIZATION
30 #include "unicode/udata.h"
31 #include "unicode/ustring.h"
32 #include "unicode/uiter.h"
33 #include "unicode/unorm.h"
34 #include "unicode/unorm2.h"
35 #include "normalizer2impl.h"
40 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
44 /* quick check functions ---------------------------------------------------- */
46 U_CAPI UNormalizationCheckResult U_EXPORT2
47 unorm_quickCheck(const UChar
*src
,
49 UNormalizationMode mode
,
50 UErrorCode
*pErrorCode
) {
51 const Normalizer2
*n2
=Normalizer2Factory::getInstance(mode
, *pErrorCode
);
52 return unorm2_quickCheck((const UNormalizer2
*)n2
, src
, srcLength
, pErrorCode
);
55 U_CAPI UNormalizationCheckResult U_EXPORT2
56 unorm_quickCheckWithOptions(const UChar
*src
, int32_t srcLength
,
57 UNormalizationMode mode
, int32_t options
,
58 UErrorCode
*pErrorCode
) {
59 const Normalizer2
*n2
=Normalizer2Factory::getInstance(mode
, *pErrorCode
);
60 if(options
&UNORM_UNICODE_3_2
) {
61 FilteredNormalizer2
fn2(*n2
, *uniset_getUnicode32Instance(*pErrorCode
));
62 return unorm2_quickCheck(
63 reinterpret_cast<const UNormalizer2
*>(static_cast<Normalizer2
*>(&fn2
)),
64 src
, srcLength
, pErrorCode
);
66 return unorm2_quickCheck((const UNormalizer2
*)n2
, src
, srcLength
, pErrorCode
);
70 U_CAPI UBool U_EXPORT2
71 unorm_isNormalized(const UChar
*src
, int32_t srcLength
,
72 UNormalizationMode mode
,
73 UErrorCode
*pErrorCode
) {
74 const Normalizer2
*n2
=Normalizer2Factory::getInstance(mode
, *pErrorCode
);
75 return unorm2_isNormalized((const UNormalizer2
*)n2
, src
, srcLength
, pErrorCode
);
78 U_CAPI UBool U_EXPORT2
79 unorm_isNormalizedWithOptions(const UChar
*src
, int32_t srcLength
,
80 UNormalizationMode mode
, int32_t options
,
81 UErrorCode
*pErrorCode
) {
82 const Normalizer2
*n2
=Normalizer2Factory::getInstance(mode
, *pErrorCode
);
83 if(options
&UNORM_UNICODE_3_2
) {
84 FilteredNormalizer2
fn2(*n2
, *uniset_getUnicode32Instance(*pErrorCode
));
85 return unorm2_isNormalized(
86 reinterpret_cast<const UNormalizer2
*>(static_cast<Normalizer2
*>(&fn2
)),
87 src
, srcLength
, pErrorCode
);
89 return unorm2_isNormalized((const UNormalizer2
*)n2
, src
, srcLength
, pErrorCode
);
93 /* normalize() API ---------------------------------------------------------- */
95 /** Public API for normalizing. */
96 U_CAPI
int32_t U_EXPORT2
97 unorm_normalize(const UChar
*src
, int32_t srcLength
,
98 UNormalizationMode mode
, int32_t options
,
99 UChar
*dest
, int32_t destCapacity
,
100 UErrorCode
*pErrorCode
) {
101 const Normalizer2
*n2
=Normalizer2Factory::getInstance(mode
, *pErrorCode
);
102 if(options
&UNORM_UNICODE_3_2
) {
103 FilteredNormalizer2
fn2(*n2
, *uniset_getUnicode32Instance(*pErrorCode
));
104 return unorm2_normalize(
105 reinterpret_cast<const UNormalizer2
*>(static_cast<Normalizer2
*>(&fn2
)),
106 src
, srcLength
, dest
, destCapacity
, pErrorCode
);
108 return unorm2_normalize((const UNormalizer2
*)n2
,
109 src
, srcLength
, dest
, destCapacity
, pErrorCode
);
114 /* iteration functions ------------------------------------------------------ */
117 _iterate(UCharIterator
*src
, UBool forward
,
118 UChar
*dest
, int32_t destCapacity
,
119 const Normalizer2
*n2
,
120 UBool doNormalize
, UBool
*pNeededToNormalize
,
121 UErrorCode
*pErrorCode
) {
122 if(U_FAILURE(*pErrorCode
)) {
125 if(destCapacity
<0 || (dest
==NULL
&& destCapacity
>0) || src
==NULL
) {
126 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
130 if(pNeededToNormalize
!=NULL
) {
131 *pNeededToNormalize
=FALSE
;
133 if(!(forward
? src
->hasNext(src
) : src
->hasPrevious(src
))) {
134 return u_terminateUChars(dest
, destCapacity
, 0, pErrorCode
);
137 UnicodeString buffer
;
140 /* get one character and ignore its properties */
141 buffer
.append(uiter_next32(src
));
142 /* get all following characters until we see a boundary */
143 while((c
=uiter_next32(src
))>=0) {
144 if(n2
->hasBoundaryBefore(c
)) {
145 /* back out the latest movement to stop at the boundary */
146 src
->move(src
, -U16_LENGTH(c
), UITER_CURRENT
);
153 while((c
=uiter_previous32(src
))>=0) {
154 /* always write this character to the front of the buffer */
156 /* stop if this just-copied character is a boundary */
157 if(n2
->hasBoundaryBefore(c
)) {
163 UnicodeString
destString(dest
, 0, destCapacity
);
164 if(buffer
.length()>0 && doNormalize
) {
165 n2
->normalize(buffer
, destString
, *pErrorCode
).extract(dest
, destCapacity
, *pErrorCode
);
166 if(pNeededToNormalize
!=NULL
&& U_SUCCESS(*pErrorCode
)) {
167 *pNeededToNormalize
= destString
!=buffer
;
169 return destString
.length();
171 /* just copy the source characters */
172 return buffer
.extract(dest
, destCapacity
, *pErrorCode
);
177 unorm_iterate(UCharIterator
*src
, UBool forward
,
178 UChar
*dest
, int32_t destCapacity
,
179 UNormalizationMode mode
, int32_t options
,
180 UBool doNormalize
, UBool
*pNeededToNormalize
,
181 UErrorCode
*pErrorCode
) {
182 const Normalizer2
*n2
=Normalizer2Factory::getInstance(mode
, *pErrorCode
);
183 if(options
&UNORM_UNICODE_3_2
) {
184 const UnicodeSet
*uni32
= uniset_getUnicode32Instance(*pErrorCode
);
185 if(U_FAILURE(*pErrorCode
)) {
188 FilteredNormalizer2
fn2(*n2
, *uni32
);
189 return _iterate(src
, forward
, dest
, destCapacity
,
190 &fn2
, doNormalize
, pNeededToNormalize
, pErrorCode
);
192 return _iterate(src
, forward
, dest
, destCapacity
,
193 n2
, doNormalize
, pNeededToNormalize
, pErrorCode
);
196 U_CAPI
int32_t U_EXPORT2
197 unorm_previous(UCharIterator
*src
,
198 UChar
*dest
, int32_t destCapacity
,
199 UNormalizationMode mode
, int32_t options
,
200 UBool doNormalize
, UBool
*pNeededToNormalize
,
201 UErrorCode
*pErrorCode
) {
202 return unorm_iterate(src
, FALSE
,
205 doNormalize
, pNeededToNormalize
,
209 U_CAPI
int32_t U_EXPORT2
210 unorm_next(UCharIterator
*src
,
211 UChar
*dest
, int32_t destCapacity
,
212 UNormalizationMode mode
, int32_t options
,
213 UBool doNormalize
, UBool
*pNeededToNormalize
,
214 UErrorCode
*pErrorCode
) {
215 return unorm_iterate(src
, TRUE
,
218 doNormalize
, pNeededToNormalize
,
222 /* Concatenation of normalized strings -------------------------------------- */
225 _concatenate(const UChar
*left
, int32_t leftLength
,
226 const UChar
*right
, int32_t rightLength
,
227 UChar
*dest
, int32_t destCapacity
,
228 const Normalizer2
*n2
,
229 UErrorCode
*pErrorCode
) {
230 if(U_FAILURE(*pErrorCode
)) {
233 if(destCapacity
<0 || (dest
==NULL
&& destCapacity
>0) ||
234 left
==NULL
|| leftLength
<-1 || right
==NULL
|| rightLength
<-1) {
235 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
239 /* check for overlapping right and destination */
241 ((right
>=dest
&& right
<(dest
+destCapacity
)) ||
242 (rightLength
>0 && dest
>=right
&& dest
<(right
+rightLength
)))
244 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
248 /* allow left==dest */
249 UnicodeString destString
;
251 destString
.setTo(dest
, leftLength
, destCapacity
);
253 destString
.setTo(dest
, 0, destCapacity
);
254 destString
.append(left
, leftLength
);
256 return n2
->append(destString
, UnicodeString(rightLength
<0, right
, rightLength
), *pErrorCode
).
257 extract(dest
, destCapacity
, *pErrorCode
);
260 U_CAPI
int32_t U_EXPORT2
261 unorm_concatenate(const UChar
*left
, int32_t leftLength
,
262 const UChar
*right
, int32_t rightLength
,
263 UChar
*dest
, int32_t destCapacity
,
264 UNormalizationMode mode
, int32_t options
,
265 UErrorCode
*pErrorCode
) {
266 const Normalizer2
*n2
=Normalizer2Factory::getInstance(mode
, *pErrorCode
);
267 if(options
&UNORM_UNICODE_3_2
) {
268 const UnicodeSet
*uni32
= uniset_getUnicode32Instance(*pErrorCode
);
269 if(U_FAILURE(*pErrorCode
)) {
272 FilteredNormalizer2
fn2(*n2
, *uni32
);
273 return _concatenate(left
, leftLength
, right
, rightLength
,
274 dest
, destCapacity
, &fn2
, pErrorCode
);
276 return _concatenate(left
, leftLength
, right
, rightLength
,
277 dest
, destCapacity
, n2
, pErrorCode
);
280 #endif /* #if !UCONFIG_NO_NORMALIZATION */