]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/strrepl.cpp
2 **********************************************************************
3 * Copyright (c) 2002-2012, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 01/21/2002 aliu Creation.
8 **********************************************************************
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_TRANSLITERATION
15 #include "unicode/uniset.h"
16 #include "unicode/utf16.h"
23 UnicodeReplacer::~UnicodeReplacer() {}
24 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringReplacer
)
27 * Construct a StringReplacer that sets the emits the given output
28 * text and sets the cursor to the given position.
29 * @param theOutput text that will replace input text when the
30 * replace() method is called. May contain stand-in characters
31 * that represent nested replacers.
32 * @param theCursorPos cursor position that will be returned by
33 * the replace() method
34 * @param theData transliterator context object that translates
35 * stand-in characters to UnicodeReplacer objects
37 StringReplacer::StringReplacer(const UnicodeString
& theOutput
,
39 const TransliterationRuleData
* theData
) {
41 cursorPos
= theCursorPos
;
48 * Construct a StringReplacer that sets the emits the given output
49 * text and does not modify the cursor.
50 * @param theOutput text that will replace input text when the
51 * replace() method is called. May contain stand-in characters
52 * that represent nested replacers.
53 * @param theData transliterator context object that translates
54 * stand-in characters to UnicodeReplacer objects
56 StringReplacer::StringReplacer(const UnicodeString
& theOutput
,
57 const TransliterationRuleData
* theData
) {
68 StringReplacer::StringReplacer(const StringReplacer
& other
) :
69 UnicodeFunctor(other
),
70 UnicodeReplacer(other
)
72 output
= other
.output
;
73 cursorPos
= other
.cursorPos
;
74 hasCursor
= other
.hasCursor
;
76 isComplex
= other
.isComplex
;
82 StringReplacer::~StringReplacer() {
86 * Implement UnicodeFunctor
88 UnicodeFunctor
* StringReplacer::clone() const {
89 return new StringReplacer(*this);
93 * Implement UnicodeFunctor
95 UnicodeReplacer
* StringReplacer::toReplacer() const {
96 return const_cast<StringReplacer
*>(this);
100 * UnicodeReplacer API
102 int32_t StringReplacer::replace(Replaceable
& text
,
107 int32_t newStart
= 0;
109 // NOTE: It should be possible to _always_ run the complex
110 // processing code; just slower. If not, then there is a bug
111 // in the complex processing code.
113 // Simple (no nested replacers) Processing Code :
115 text
.handleReplaceBetween(start
, limit
, output
);
116 outLen
= output
.length();
118 // Setup default cursor position (for cursorPos within output)
119 newStart
= cursorPos
;
122 // Complex (nested replacers) Processing Code :
124 /* When there are segments to be copied, use the Replaceable.copy()
125 * API in order to retain out-of-band data. Copy everything to the
126 * end of the string, then copy them back over the key. This preserves
127 * the integrity of indices into the key and surrounding context while
128 * generating the output text.
131 int32_t oOutput
; // offset into 'output'
134 // The temporary buffer starts at tempStart, and extends
135 // to destLimit. The start of the buffer has a single
136 // character from before the key. This provides style
137 // data when addition characters are filled into the
138 // temporary buffer. If there is nothing to the left, use
139 // the non-character U+FFFF, which Replaceable subclasses
140 // should treat specially as a "no-style character."
141 // destStart points to the point after the style context
142 // character, so it is tempStart+1 or tempStart+2.
143 int32_t tempStart
= text
.length(); // start of temp buffer
144 int32_t destStart
= tempStart
; // copy new text to here
146 int32_t len
= U16_LENGTH(text
.char32At(start
-1));
147 text
.copy(start
-len
, start
, tempStart
);
150 UnicodeString
str((UChar
) 0xFFFF);
151 text
.handleReplaceBetween(tempStart
, tempStart
, str
);
154 int32_t destLimit
= destStart
;
156 for (oOutput
=0; oOutput
<output
.length(); ) {
157 if (oOutput
== cursorPos
) {
158 // Record the position of the cursor
159 newStart
= destLimit
- destStart
; // relative to start
161 UChar32 c
= output
.char32At(oOutput
);
162 UnicodeReplacer
* r
= data
->lookupReplacer(c
);
164 // Accumulate straight (non-segment) text.
169 // Insert any accumulated straight text.
170 if (buf
.length() > 0) {
171 text
.handleReplaceBetween(destLimit
, destLimit
, buf
);
172 destLimit
+= buf
.length();
176 // Delegate output generation to replacer object
177 int32_t len
= r
->replace(text
, destLimit
, destLimit
, cursor
);
180 oOutput
+= U16_LENGTH(c
);
182 // Insert any accumulated straight text.
183 if (buf
.length() > 0) {
184 text
.handleReplaceBetween(destLimit
, destLimit
, buf
);
185 destLimit
+= buf
.length();
187 if (oOutput
== cursorPos
) {
188 // Record the position of the cursor
189 newStart
= destLimit
- destStart
; // relative to start
192 outLen
= destLimit
- destStart
;
194 // Copy new text to start, and delete it
195 text
.copy(destStart
, destLimit
, start
);
196 text
.handleReplaceBetween(tempStart
+ outLen
, destLimit
+ outLen
, UnicodeString());
198 // Delete the old text (the key)
199 text
.handleReplaceBetween(start
+ outLen
, limit
+ outLen
, UnicodeString());
203 // Adjust the cursor for positions outside the key. These
204 // refer to code points rather than code units. If cursorPos
205 // is within the output string, then use newStart, which has
206 // already been set above.
209 int32_t n
= cursorPos
;
210 // Outside the output string, cursorPos counts code points
211 while (n
< 0 && newStart
> 0) {
212 newStart
-= U16_LENGTH(text
.char32At(newStart
-1));
216 } else if (cursorPos
> output
.length()) {
217 newStart
= start
+ outLen
;
218 int32_t n
= cursorPos
- output
.length();
219 // Outside the output string, cursorPos counts code points
220 while (n
> 0 && newStart
< text
.length()) {
221 newStart
+= U16_LENGTH(text
.char32At(newStart
));
226 // Cursor is within output string. It has been set up above
227 // to be relative to start.
238 * UnicodeReplacer API
240 UnicodeString
& StringReplacer::toReplacerPattern(UnicodeString
& rule
,
241 UBool escapeUnprintable
) const {
243 UnicodeString quoteBuf
;
245 int32_t cursor
= cursorPos
;
247 // Handle a cursor preceding the output
248 if (hasCursor
&& cursor
< 0) {
249 while (cursor
++ < 0) {
250 ICU_Utility::appendToRule(rule
, (UChar
)0x0040 /*@*/, TRUE
, escapeUnprintable
, quoteBuf
);
252 // Fall through and append '|' below
255 for (int32_t i
=0; i
<output
.length(); ++i
) {
256 if (hasCursor
&& i
== cursor
) {
257 ICU_Utility::appendToRule(rule
, (UChar
)0x007C /*|*/, TRUE
, escapeUnprintable
, quoteBuf
);
259 UChar c
= output
.charAt(i
); // Ok to use 16-bits here
261 UnicodeReplacer
* r
= data
->lookupReplacer(c
);
263 ICU_Utility::appendToRule(rule
, c
, FALSE
, escapeUnprintable
, quoteBuf
);
266 r
->toReplacerPattern(buf
, escapeUnprintable
);
267 buf
.insert(0, (UChar
)0x20);
268 buf
.append((UChar
)0x20);
269 ICU_Utility::appendToRule(rule
, buf
,
270 TRUE
, escapeUnprintable
, quoteBuf
);
274 // Handle a cursor after the output. Use > rather than >= because
275 // if cursor == output.length() it is at the end of the output,
276 // which is the default position, so we need not emit it.
277 if (hasCursor
&& cursor
> output
.length()) {
278 cursor
-= output
.length();
279 while (cursor
-- > 0) {
280 ICU_Utility::appendToRule(rule
, (UChar
)0x0040 /*@*/, TRUE
, escapeUnprintable
, quoteBuf
);
282 ICU_Utility::appendToRule(rule
, (UChar
)0x007C /*|*/, TRUE
, escapeUnprintable
, quoteBuf
);
284 // Flush quoteBuf out to result
285 ICU_Utility::appendToRule(rule
, -1,
286 TRUE
, escapeUnprintable
, quoteBuf
);
292 * Implement UnicodeReplacer
294 void StringReplacer::addReplacementSetTo(UnicodeSet
& toUnionTo
) const {
296 for (int32_t i
=0; i
<output
.length(); i
+=U16_LENGTH(ch
)) {
297 ch
= output
.char32At(i
);
298 UnicodeReplacer
* r
= data
->lookupReplacer(ch
);
302 r
->addReplacementSetTo(toUnionTo
);
310 void StringReplacer::setData(const TransliterationRuleData
* d
) {
313 while (i
<output
.length()) {
314 UChar32 c
= output
.char32At(i
);
315 UnicodeFunctor
* f
= data
->lookup(c
);
325 #endif /* #if !UCONFIG_NO_TRANSLITERATION */