]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/strrepl.cpp
2 **********************************************************************
3 * Copyright (c) 2002-2004, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 01/21/2002 aliu Creation.
8 **********************************************************************
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_TRANSLITERATION
18 #include "unicode/uniset.h"
22 static const UChar EMPTY
[] = { 0 }; // empty string: ""
24 UnicodeReplacer::~UnicodeReplacer() {}
25 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringReplacer
)
28 * Construct a StringReplacer that sets the emits the given output
29 * text and sets the cursor to the given position.
30 * @param theOutput text that will replace input text when the
31 * replace() method is called. May contain stand-in characters
32 * that represent nested replacers.
33 * @param theCursorPos cursor position that will be returned by
34 * the replace() method
35 * @param theData transliterator context object that translates
36 * stand-in characters to UnicodeReplacer objects
38 StringReplacer::StringReplacer(const UnicodeString
& theOutput
,
40 const TransliterationRuleData
* theData
) {
42 cursorPos
= theCursorPos
;
49 * Construct a StringReplacer that sets the emits the given output
50 * text and does not modify the cursor.
51 * @param theOutput text that will replace input text when the
52 * replace() method is called. May contain stand-in characters
53 * that represent nested replacers.
54 * @param theData transliterator context object that translates
55 * stand-in characters to UnicodeReplacer objects
57 StringReplacer::StringReplacer(const UnicodeString
& theOutput
,
58 const TransliterationRuleData
* theData
) {
69 StringReplacer::StringReplacer(const StringReplacer
& other
) :
70 UnicodeFunctor(other
),
71 UnicodeReplacer(other
)
73 output
= other
.output
;
74 cursorPos
= other
.cursorPos
;
75 hasCursor
= other
.hasCursor
;
77 isComplex
= other
.isComplex
;
83 StringReplacer::~StringReplacer() {
87 * Implement UnicodeFunctor
89 UnicodeFunctor
* StringReplacer::clone() const {
90 return new StringReplacer(*this);
94 * Implement UnicodeFunctor
96 UnicodeReplacer
* StringReplacer::toReplacer() const {
97 return (UnicodeReplacer
*) this;
101 * UnicodeReplacer API
103 int32_t StringReplacer::replace(Replaceable
& text
,
108 int32_t newStart
= 0;
110 // NOTE: It should be possible to _always_ run the complex
111 // processing code; just slower. If not, then there is a bug
112 // in the complex processing code.
114 // Simple (no nested replacers) Processing Code :
116 text
.handleReplaceBetween(start
, limit
, output
);
117 outLen
= output
.length();
119 // Setup default cursor position (for cursorPos within output)
120 newStart
= cursorPos
;
123 // Complex (nested replacers) Processing Code :
125 /* When there are segments to be copied, use the Replaceable.copy()
126 * API in order to retain out-of-band data. Copy everything to the
127 * end of the string, then copy them back over the key. This preserves
128 * the integrity of indices into the key and surrounding context while
129 * generating the output text.
132 int32_t oOutput
; // offset into 'output'
135 // The temporary buffer starts at tempStart, and extends
136 // to destLimit. The start of the buffer has a single
137 // character from before the key. This provides style
138 // data when addition characters are filled into the
139 // temporary buffer. If there is nothing to the left, use
140 // the non-character U+FFFF, which Replaceable subclasses
141 // should treat specially as a "no-style character."
142 // destStart points to the point after the style context
143 // character, so it is tempStart+1 or tempStart+2.
144 int32_t tempStart
= text
.length(); // start of temp buffer
145 int32_t destStart
= tempStart
; // copy new text to here
147 int32_t len
= UTF_CHAR_LENGTH(text
.char32At(start
-1));
148 text
.copy(start
-len
, start
, tempStart
);
151 UnicodeString
str((UChar
) 0xFFFF);
152 text
.handleReplaceBetween(tempStart
, tempStart
, str
);
155 int32_t destLimit
= destStart
;
157 for (oOutput
=0; oOutput
<output
.length(); ) {
158 if (oOutput
== cursorPos
) {
159 // Record the position of the cursor
160 newStart
= destLimit
- destStart
; // relative to start
162 UChar32 c
= output
.char32At(oOutput
);
163 UnicodeReplacer
* r
= data
->lookupReplacer(c
);
165 // Accumulate straight (non-segment) text.
170 // Insert any accumulated straight text.
171 if (buf
.length() > 0) {
172 text
.handleReplaceBetween(destLimit
, destLimit
, buf
);
173 destLimit
+= buf
.length();
177 // Delegate output generation to replacer object
178 int32_t len
= r
->replace(text
, destLimit
, destLimit
, cursor
);
181 oOutput
+= UTF_CHAR_LENGTH(c
);
183 // Insert any accumulated straight text.
184 if (buf
.length() > 0) {
185 text
.handleReplaceBetween(destLimit
, destLimit
, buf
);
186 destLimit
+= buf
.length();
188 if (oOutput
== cursorPos
) {
189 // Record the position of the cursor
190 newStart
= destLimit
- destStart
; // relative to start
193 outLen
= destLimit
- destStart
;
195 // Copy new text to start, and delete it
196 text
.copy(destStart
, destLimit
, start
);
197 text
.handleReplaceBetween(tempStart
+ outLen
, destLimit
+ outLen
, EMPTY
);
199 // Delete the old text (the key)
200 text
.handleReplaceBetween(start
+ outLen
, limit
+ outLen
, EMPTY
);
204 // Adjust the cursor for positions outside the key. These
205 // refer to code points rather than code units. If cursorPos
206 // is within the output string, then use newStart, which has
207 // already been set above.
210 int32_t n
= cursorPos
;
211 // Outside the output string, cursorPos counts code points
212 while (n
< 0 && newStart
> 0) {
213 newStart
-= UTF_CHAR_LENGTH(text
.char32At(newStart
-1));
217 } else if (cursorPos
> output
.length()) {
218 newStart
= start
+ outLen
;
219 int32_t n
= cursorPos
- output
.length();
220 // Outside the output string, cursorPos counts code points
221 while (n
> 0 && newStart
< text
.length()) {
222 newStart
+= UTF_CHAR_LENGTH(text
.char32At(newStart
));
227 // Cursor is within output string. It has been set up above
228 // to be relative to start.
239 * UnicodeReplacer API
241 UnicodeString
& StringReplacer::toReplacerPattern(UnicodeString
& rule
,
242 UBool escapeUnprintable
) const {
244 UnicodeString quoteBuf
;
246 int32_t cursor
= cursorPos
;
248 // Handle a cursor preceding the output
249 if (hasCursor
&& cursor
< 0) {
250 while (cursor
++ < 0) {
251 ICU_Utility::appendToRule(rule
, (UChar
)0x0040 /*@*/, TRUE
, escapeUnprintable
, quoteBuf
);
253 // Fall through and append '|' below
256 for (int32_t i
=0; i
<output
.length(); ++i
) {
257 if (hasCursor
&& i
== cursor
) {
258 ICU_Utility::appendToRule(rule
, (UChar
)0x007C /*|*/, TRUE
, escapeUnprintable
, quoteBuf
);
260 UChar c
= output
.charAt(i
); // Ok to use 16-bits here
262 UnicodeReplacer
* r
= data
->lookupReplacer(c
);
264 ICU_Utility::appendToRule(rule
, c
, FALSE
, escapeUnprintable
, quoteBuf
);
267 r
->toReplacerPattern(buf
, escapeUnprintable
);
268 buf
.insert(0, (UChar
)0x20);
269 buf
.append((UChar
)0x20);
270 ICU_Utility::appendToRule(rule
, buf
,
271 TRUE
, escapeUnprintable
, quoteBuf
);
275 // Handle a cursor after the output. Use > rather than >= because
276 // if cursor == output.length() it is at the end of the output,
277 // which is the default position, so we need not emit it.
278 if (hasCursor
&& cursor
> output
.length()) {
279 cursor
-= output
.length();
280 while (cursor
-- > 0) {
281 ICU_Utility::appendToRule(rule
, (UChar
)0x0040 /*@*/, TRUE
, escapeUnprintable
, quoteBuf
);
283 ICU_Utility::appendToRule(rule
, (UChar
)0x007C /*|*/, TRUE
, escapeUnprintable
, quoteBuf
);
285 // Flush quoteBuf out to result
286 ICU_Utility::appendToRule(rule
, -1,
287 TRUE
, escapeUnprintable
, quoteBuf
);
293 * Implement UnicodeReplacer
295 void StringReplacer::addReplacementSetTo(UnicodeSet
& toUnionTo
) const {
297 for (int32_t i
=0; i
<output
.length(); i
+=UTF_CHAR_LENGTH(ch
)) {
298 ch
= output
.char32At(i
);
299 UnicodeReplacer
* r
= data
->lookupReplacer(ch
);
303 r
->addReplacementSetTo(toUnionTo
);
311 void StringReplacer::setData(const TransliterationRuleData
* d
) {
314 while (i
<output
.length()) {
315 UChar32 c
= output
.char32At(i
);
316 UnicodeFunctor
* f
= data
->lookup(c
);
320 i
+= UTF_CHAR_LENGTH(c
);
326 #endif /* #if !UCONFIG_NO_TRANSLITERATION */