]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/nortrans.cpp
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / i18n / nortrans.cpp
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
46f4442e 3* Copyright (C) 2001-2007, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6* Date Name Description
7* 07/03/01 aliu Creation.
8**********************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_TRANSLITERATION
14
15#include "unicode/uniset.h"
16#include "unicode/uiter.h"
17#include "nortrans.h"
18#include "unormimp.h"
b75a7d8f
A
19#include "ucln_in.h"
20
21U_NAMESPACE_BEGIN
22
374ca955 23UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NormalizationTransliterator)
b75a7d8f
A
24
25/**
26 * System registration hook.
27 */
28void NormalizationTransliterator::registerIDs() {
29 UErrorCode errorCode = U_ZERO_ERROR;
30 if(!unorm_haveData(&errorCode)) {
31 return;
32 }
33
34 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFC"),
35 _create, integerToken(UNORM_NFC));
36 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFKC"),
37 _create, integerToken(UNORM_NFKC));
38 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFD"),
39 _create, integerToken(UNORM_NFD));
40 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFKD"),
41 _create, integerToken(UNORM_NFKD));
42 Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("NFC"),
43 UNICODE_STRING_SIMPLE("NFD"), TRUE);
44 Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("NFKC"),
45 UNICODE_STRING_SIMPLE("NFKD"), TRUE);
46}
47
48/**
49 * Factory methods
50 */
51Transliterator* NormalizationTransliterator::_create(const UnicodeString& ID,
52 Token context) {
53 return new NormalizationTransliterator(ID, (UNormalizationMode) context.integer, 0);
54}
55
56/**
57 * Constructs a transliterator.
58 */
59NormalizationTransliterator::NormalizationTransliterator(
60 const UnicodeString& id,
61 UNormalizationMode mode, int32_t opt) :
62 Transliterator(id, 0) {
63 fMode = mode;
64 options = opt;
65}
66
67/**
68 * Destructor.
69 */
70NormalizationTransliterator::~NormalizationTransliterator() {
71}
72
73/**
74 * Copy constructor.
75 */
76NormalizationTransliterator::NormalizationTransliterator(const NormalizationTransliterator& o) :
77Transliterator(o) {
78 fMode = o.fMode;
79 options = o.options;
80}
81
82/**
83 * Assignment operator.
84 */
46f4442e 85/*NormalizationTransliterator& NormalizationTransliterator::operator=(const NormalizationTransliterator& o) {
b75a7d8f
A
86 Transliterator::operator=(o);
87 fMode = o.fMode;
88 options = o.options;
89 return *this;
46f4442e 90}*/
b75a7d8f
A
91
92/**
93 * Transliterator API.
94 */
95Transliterator* NormalizationTransliterator::clone(void) const {
96 return new NormalizationTransliterator(*this);
97}
98
99/**
100 * Implements {@link Transliterator#handleTransliterate}.
101 */
102void NormalizationTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
103 UBool isIncremental) const {
104 // start and limit of the input range
105 int32_t start = offsets.start;
106 int32_t limit = offsets.limit;
107 int32_t length, delta;
108
109 if(start >= limit) {
110 return;
111 }
112
113 // a C code unit iterator, implemented around the Replaceable
114 UCharIterator iter;
115 uiter_setReplaceable(&iter, &text);
116
117 // the output string and buffer pointer
118 UnicodeString output;
119 UChar *buffer;
120 UBool neededToNormalize;
121
122 UErrorCode errorCode;
123
124 /*
125 * Normalize as short chunks at a time as possible even in
126 * bulk mode, so that styled text is minimally disrupted.
127 * In incremental mode, a chunk that ends with offsets.limit
128 * must not be normalized.
129 *
130 * If it was known that the input text is not styled, then
131 * a bulk mode normalization could look like this:
132 *
133
134 UChar staticChars[256];
135 UnicodeString input;
136
137 length = limit - start;
138 input.setTo(staticChars, 0, sizeof(staticChars)/U_SIZEOF_UCHAR); // writable alias
139
140 _Replaceable_extractBetween(text, start, limit, input.getBuffer(length));
141 input.releaseBuffer(length);
142
143 UErrorCode status = U_ZERO_ERROR;
144 Normalizer::normalize(input, fMode, options, output, status);
145
146 text.handleReplaceBetween(start, limit, output);
147
148 int32_t delta = output.length() - length;
149 offsets.contextLimit += delta;
150 offsets.limit += delta;
151 offsets.start = limit + delta;
152
153 *
154 */
155 while(start < limit) {
156 // set the iterator limits for the remaining input range
157 // this is a moving target because of the replacements in the text object
158 iter.start = iter.index = start;
159 iter.limit = limit;
160
161 // incrementally normalize a small chunk of the input
162 buffer = output.getBuffer(-1);
163 errorCode = U_ZERO_ERROR;
164 length = unorm_next(&iter, buffer, output.getCapacity(),
165 fMode, 0,
166 TRUE, &neededToNormalize,
167 &errorCode);
73c04bcf 168 output.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
b75a7d8f
A
169
170 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
171 // use a larger output string buffer and do it again from the start
172 iter.index = start;
173 buffer = output.getBuffer(length);
174 errorCode = U_ZERO_ERROR;
175 length = unorm_next(&iter, buffer, output.getCapacity(),
176 fMode, 0,
177 TRUE, &neededToNormalize,
178 &errorCode);
73c04bcf 179 output.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
b75a7d8f
A
180 }
181
182 if(U_FAILURE(errorCode)) {
183 break;
184 }
185
186 limit = iter.index;
187 if(isIncremental && limit == iter.limit) {
188 // stop in incremental mode when we reach the input limit
189 // in case there are additional characters that could change the
190 // normalization result
191
192 // UNLESS all characters in the result of the normalization of
193 // the last run are in the skippable set
194 const UChar *s=output.getBuffer();
195 int32_t i=0, outLength=output.length();
196 UChar32 c;
197
198 while(i<outLength) {
199 U16_NEXT(s, i, outLength, c);
200 if(!unorm_isNFSkippable(c, fMode)) {
201 outLength=-1; // I wish C++ had labeled loops and break outer; ...
202 break;
203 }
204 }
205 if (outLength<0) {
206 break;
207 }
208 }
209
210 if(neededToNormalize) {
211 // replace the input chunk with its normalized form
212 text.handleReplaceBetween(start, limit, output);
213
214 // update all necessary indexes accordingly
215 delta = length - (limit - start); // length change in the text object
216 start = limit += delta; // the next chunk starts where this one ends, with adjustment
217 limit = offsets.limit += delta; // set the iteration limit to the adjusted end of the input range
218 offsets.contextLimit += delta;
219 } else {
220 // delta == 0
221 start = limit;
222 limit = offsets.limit;
223 }
224 }
225
226 offsets.start = start;
227}
228
229U_NAMESPACE_END
230
231#endif /* #if !UCONFIG_NO_TRANSLITERATION */