2 ******************************************************************************
3 * Copyright (c) 1996-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
8 * Created by: Vladimir Weinstein 12052000
10 * Modification history :
12 * Date Name Description
13 * 02/01/01 synwee Added normalization quickcheck enum and method.
14 * 02/12/01 synwee Commented out quickcheck util api has been approved
15 * Added private method for doing FCD checks
16 * 02/23/01 synwee Modified quickcheck and checkFCE to run through
17 * string for codepoints < 0x300 for the normalization
19 * 05/25/01+ Markus Scherer total rewrite, implement all normalization here
20 * instead of just wrappers around normlzr.cpp,
21 * load unorm.dat, support Unicode 3.1 with
22 * supplementary code points, etc.
23 * 2009-nov..2010-jan Markus Scherer total rewrite, new Normalizer2 API & code
26 #include "unicode/utypes.h"
28 #if !UCONFIG_NO_NORMALIZATION
30 #include "unicode/udata.h"
31 #include "unicode/ustring.h"
32 #include "unicode/uiter.h"
33 #include "unicode/unorm.h"
34 #include "unicode/unorm2.h"
35 #include "normalizer2impl.h"
42 /* quick check functions ---------------------------------------------------- */
44 U_CAPI UNormalizationCheckResult U_EXPORT2
45 unorm_quickCheck(const UChar
*src
,
47 UNormalizationMode mode
,
48 UErrorCode
*pErrorCode
) {
49 const Normalizer2
*n2
=Normalizer2Factory::getInstance(mode
, *pErrorCode
);
50 return unorm2_quickCheck((const UNormalizer2
*)n2
, src
, srcLength
, pErrorCode
);
53 U_CAPI UNormalizationCheckResult U_EXPORT2
54 unorm_quickCheckWithOptions(const UChar
*src
, int32_t srcLength
,
55 UNormalizationMode mode
, int32_t options
,
56 UErrorCode
*pErrorCode
) {
57 const Normalizer2
*n2
=Normalizer2Factory::getInstance(mode
, *pErrorCode
);
58 if(options
&UNORM_UNICODE_3_2
) {
59 FilteredNormalizer2
fn2(*n2
, *uniset_getUnicode32Instance(*pErrorCode
));
60 return unorm2_quickCheck(
61 reinterpret_cast<const UNormalizer2
*>(static_cast<Normalizer2
*>(&fn2
)),
62 src
, srcLength
, pErrorCode
);
64 return unorm2_quickCheck((const UNormalizer2
*)n2
, src
, srcLength
, pErrorCode
);
68 U_CAPI UBool U_EXPORT2
69 unorm_isNormalized(const UChar
*src
, int32_t srcLength
,
70 UNormalizationMode mode
,
71 UErrorCode
*pErrorCode
) {
72 const Normalizer2
*n2
=Normalizer2Factory::getInstance(mode
, *pErrorCode
);
73 return unorm2_isNormalized((const UNormalizer2
*)n2
, src
, srcLength
, pErrorCode
);
76 U_CAPI UBool U_EXPORT2
77 unorm_isNormalizedWithOptions(const UChar
*src
, int32_t srcLength
,
78 UNormalizationMode mode
, int32_t options
,
79 UErrorCode
*pErrorCode
) {
80 const Normalizer2
*n2
=Normalizer2Factory::getInstance(mode
, *pErrorCode
);
81 if(options
&UNORM_UNICODE_3_2
) {
82 FilteredNormalizer2
fn2(*n2
, *uniset_getUnicode32Instance(*pErrorCode
));
83 return unorm2_isNormalized(
84 reinterpret_cast<const UNormalizer2
*>(static_cast<Normalizer2
*>(&fn2
)),
85 src
, srcLength
, pErrorCode
);
87 return unorm2_isNormalized((const UNormalizer2
*)n2
, src
, srcLength
, pErrorCode
);
91 /* normalize() API ---------------------------------------------------------- */
93 /** Public API for normalizing. */
94 U_CAPI
int32_t U_EXPORT2
95 unorm_normalize(const UChar
*src
, int32_t srcLength
,
96 UNormalizationMode mode
, int32_t options
,
97 UChar
*dest
, int32_t destCapacity
,
98 UErrorCode
*pErrorCode
) {
99 const Normalizer2
*n2
=Normalizer2Factory::getInstance(mode
, *pErrorCode
);
100 if(options
&UNORM_UNICODE_3_2
) {
101 FilteredNormalizer2
fn2(*n2
, *uniset_getUnicode32Instance(*pErrorCode
));
102 return unorm2_normalize(
103 reinterpret_cast<const UNormalizer2
*>(static_cast<Normalizer2
*>(&fn2
)),
104 src
, srcLength
, dest
, destCapacity
, pErrorCode
);
106 return unorm2_normalize((const UNormalizer2
*)n2
,
107 src
, srcLength
, dest
, destCapacity
, pErrorCode
);
112 /* iteration functions ------------------------------------------------------ */
115 _iterate(UCharIterator
*src
, UBool forward
,
116 UChar
*dest
, int32_t destCapacity
,
117 const Normalizer2
*n2
,
118 UBool doNormalize
, UBool
*pNeededToNormalize
,
119 UErrorCode
*pErrorCode
) {
120 if(U_FAILURE(*pErrorCode
)) {
123 if(destCapacity
<0 || (dest
==NULL
&& destCapacity
>0) || src
==NULL
) {
124 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
128 if(pNeededToNormalize
!=NULL
) {
129 *pNeededToNormalize
=FALSE
;
131 if(!(forward
? src
->hasNext(src
) : src
->hasPrevious(src
))) {
132 return u_terminateUChars(dest
, destCapacity
, 0, pErrorCode
);
135 UnicodeString buffer
;
138 /* get one character and ignore its properties */
139 buffer
.append(uiter_next32(src
));
140 /* get all following characters until we see a boundary */
141 while((c
=uiter_next32(src
))>=0) {
142 if(n2
->hasBoundaryBefore(c
)) {
143 /* back out the latest movement to stop at the boundary */
144 src
->move(src
, -U16_LENGTH(c
), UITER_CURRENT
);
151 while((c
=uiter_previous32(src
))>=0) {
152 /* always write this character to the front of the buffer */
154 /* stop if this just-copied character is a boundary */
155 if(n2
->hasBoundaryBefore(c
)) {
161 UnicodeString
destString(dest
, 0, destCapacity
);
162 if(buffer
.length()>0 && doNormalize
) {
163 n2
->normalize(buffer
, destString
, *pErrorCode
).extract(dest
, destCapacity
, *pErrorCode
);
164 if(pNeededToNormalize
!=NULL
&& U_SUCCESS(*pErrorCode
)) {
165 *pNeededToNormalize
= destString
!=buffer
;
167 return destString
.length();
169 /* just copy the source characters */
170 return buffer
.extract(dest
, destCapacity
, *pErrorCode
);
175 unorm_iterate(UCharIterator
*src
, UBool forward
,
176 UChar
*dest
, int32_t destCapacity
,
177 UNormalizationMode mode
, int32_t options
,
178 UBool doNormalize
, UBool
*pNeededToNormalize
,
179 UErrorCode
*pErrorCode
) {
180 const Normalizer2
*n2
=Normalizer2Factory::getInstance(mode
, *pErrorCode
);
181 if(options
&UNORM_UNICODE_3_2
) {
182 const UnicodeSet
*uni32
= uniset_getUnicode32Instance(*pErrorCode
);
183 if(U_FAILURE(*pErrorCode
)) {
186 FilteredNormalizer2
fn2(*n2
, *uni32
);
187 return _iterate(src
, forward
, dest
, destCapacity
,
188 &fn2
, doNormalize
, pNeededToNormalize
, pErrorCode
);
190 return _iterate(src
, forward
, dest
, destCapacity
,
191 n2
, doNormalize
, pNeededToNormalize
, pErrorCode
);
194 U_CAPI
int32_t U_EXPORT2
195 unorm_previous(UCharIterator
*src
,
196 UChar
*dest
, int32_t destCapacity
,
197 UNormalizationMode mode
, int32_t options
,
198 UBool doNormalize
, UBool
*pNeededToNormalize
,
199 UErrorCode
*pErrorCode
) {
200 return unorm_iterate(src
, FALSE
,
203 doNormalize
, pNeededToNormalize
,
207 U_CAPI
int32_t U_EXPORT2
208 unorm_next(UCharIterator
*src
,
209 UChar
*dest
, int32_t destCapacity
,
210 UNormalizationMode mode
, int32_t options
,
211 UBool doNormalize
, UBool
*pNeededToNormalize
,
212 UErrorCode
*pErrorCode
) {
213 return unorm_iterate(src
, TRUE
,
216 doNormalize
, pNeededToNormalize
,
220 /* Concatenation of normalized strings -------------------------------------- */
223 _concatenate(const UChar
*left
, int32_t leftLength
,
224 const UChar
*right
, int32_t rightLength
,
225 UChar
*dest
, int32_t destCapacity
,
226 const Normalizer2
*n2
,
227 UErrorCode
*pErrorCode
) {
228 if(U_FAILURE(*pErrorCode
)) {
231 if(destCapacity
<0 || (dest
==NULL
&& destCapacity
>0) ||
232 left
==NULL
|| leftLength
<-1 || right
==NULL
|| rightLength
<-1) {
233 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
237 /* check for overlapping right and destination */
239 ((right
>=dest
&& right
<(dest
+destCapacity
)) ||
240 (rightLength
>0 && dest
>=right
&& dest
<(right
+rightLength
)))
242 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
246 /* allow left==dest */
247 UnicodeString destString
;
249 destString
.setTo(dest
, leftLength
, destCapacity
);
251 destString
.setTo(dest
, 0, destCapacity
);
252 destString
.append(left
, leftLength
);
254 return n2
->append(destString
, UnicodeString(rightLength
<0, right
, rightLength
), *pErrorCode
).
255 extract(dest
, destCapacity
, *pErrorCode
);
258 U_CAPI
int32_t U_EXPORT2
259 unorm_concatenate(const UChar
*left
, int32_t leftLength
,
260 const UChar
*right
, int32_t rightLength
,
261 UChar
*dest
, int32_t destCapacity
,
262 UNormalizationMode mode
, int32_t options
,
263 UErrorCode
*pErrorCode
) {
264 const Normalizer2
*n2
=Normalizer2Factory::getInstance(mode
, *pErrorCode
);
265 if(options
&UNORM_UNICODE_3_2
) {
266 const UnicodeSet
*uni32
= uniset_getUnicode32Instance(*pErrorCode
);
267 if(U_FAILURE(*pErrorCode
)) {
270 FilteredNormalizer2
fn2(*n2
, *uni32
);
271 return _concatenate(left
, leftLength
, right
, rightLength
,
272 dest
, destCapacity
, &fn2
, pErrorCode
);
274 return _concatenate(left
, leftLength
, right
, rightLength
,
275 dest
, destCapacity
, n2
, pErrorCode
);
278 #endif /* #if !UCONFIG_NO_NORMALIZATION */