]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/strrepl.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (c) 2002-2012, International Business Machines Corporation
6 * and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 01/21/2002 aliu Creation.
10 **********************************************************************
13 #include "unicode/utypes.h"
15 #if !UCONFIG_NO_TRANSLITERATION
17 #include "unicode/uniset.h"
18 #include "unicode/utf16.h"
25 UnicodeReplacer::~UnicodeReplacer() {}
26 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringReplacer
)
29 * Construct a StringReplacer that sets the emits the given output
30 * text and sets the cursor to the given position.
31 * @param theOutput text that will replace input text when the
32 * replace() method is called. May contain stand-in characters
33 * that represent nested replacers.
34 * @param theCursorPos cursor position that will be returned by
35 * the replace() method
36 * @param theData transliterator context object that translates
37 * stand-in characters to UnicodeReplacer objects
39 StringReplacer::StringReplacer(const UnicodeString
& theOutput
,
41 const TransliterationRuleData
* theData
) {
43 cursorPos
= theCursorPos
;
50 * Construct a StringReplacer that sets the emits the given output
51 * text and does not modify the cursor.
52 * @param theOutput text that will replace input text when the
53 * replace() method is called. May contain stand-in characters
54 * that represent nested replacers.
55 * @param theData transliterator context object that translates
56 * stand-in characters to UnicodeReplacer objects
58 StringReplacer::StringReplacer(const UnicodeString
& theOutput
,
59 const TransliterationRuleData
* theData
) {
70 StringReplacer::StringReplacer(const StringReplacer
& other
) :
71 UnicodeFunctor(other
),
72 UnicodeReplacer(other
)
74 output
= other
.output
;
75 cursorPos
= other
.cursorPos
;
76 hasCursor
= other
.hasCursor
;
78 isComplex
= other
.isComplex
;
84 StringReplacer::~StringReplacer() {
88 * Implement UnicodeFunctor
90 UnicodeFunctor
* StringReplacer::clone() const {
91 return new StringReplacer(*this);
95 * Implement UnicodeFunctor
97 UnicodeReplacer
* StringReplacer::toReplacer() const {
98 return const_cast<StringReplacer
*>(this);
102 * UnicodeReplacer API
104 int32_t StringReplacer::replace(Replaceable
& text
,
109 int32_t newStart
= 0;
111 // NOTE: It should be possible to _always_ run the complex
112 // processing code; just slower. If not, then there is a bug
113 // in the complex processing code.
115 // Simple (no nested replacers) Processing Code :
117 text
.handleReplaceBetween(start
, limit
, output
);
118 outLen
= output
.length();
120 // Setup default cursor position (for cursorPos within output)
121 newStart
= cursorPos
;
124 // Complex (nested replacers) Processing Code :
126 /* When there are segments to be copied, use the Replaceable.copy()
127 * API in order to retain out-of-band data. Copy everything to the
128 * end of the string, then copy them back over the key. This preserves
129 * the integrity of indices into the key and surrounding context while
130 * generating the output text.
133 int32_t oOutput
; // offset into 'output'
136 // The temporary buffer starts at tempStart, and extends
137 // to destLimit. The start of the buffer has a single
138 // character from before the key. This provides style
139 // data when addition characters are filled into the
140 // temporary buffer. If there is nothing to the left, use
141 // the non-character U+FFFF, which Replaceable subclasses
142 // should treat specially as a "no-style character."
143 // destStart points to the point after the style context
144 // character, so it is tempStart+1 or tempStart+2.
145 int32_t tempStart
= text
.length(); // start of temp buffer
146 int32_t destStart
= tempStart
; // copy new text to here
148 int32_t len
= U16_LENGTH(text
.char32At(start
-1));
149 text
.copy(start
-len
, start
, tempStart
);
152 UnicodeString
str((UChar
) 0xFFFF);
153 text
.handleReplaceBetween(tempStart
, tempStart
, str
);
156 int32_t destLimit
= destStart
;
158 for (oOutput
=0; oOutput
<output
.length(); ) {
159 if (oOutput
== cursorPos
) {
160 // Record the position of the cursor
161 newStart
= destLimit
- destStart
; // relative to start
163 UChar32 c
= output
.char32At(oOutput
);
164 UnicodeReplacer
* r
= data
->lookupReplacer(c
);
166 // Accumulate straight (non-segment) text.
171 // Insert any accumulated straight text.
172 if (buf
.length() > 0) {
173 text
.handleReplaceBetween(destLimit
, destLimit
, buf
);
174 destLimit
+= buf
.length();
178 // Delegate output generation to replacer object
179 int32_t len
= r
->replace(text
, destLimit
, destLimit
, cursor
);
182 oOutput
+= U16_LENGTH(c
);
184 // Insert any accumulated straight text.
185 if (buf
.length() > 0) {
186 text
.handleReplaceBetween(destLimit
, destLimit
, buf
);
187 destLimit
+= buf
.length();
189 if (oOutput
== cursorPos
) {
190 // Record the position of the cursor
191 newStart
= destLimit
- destStart
; // relative to start
194 outLen
= destLimit
- destStart
;
196 // Copy new text to start, and delete it
197 text
.copy(destStart
, destLimit
, start
);
198 text
.handleReplaceBetween(tempStart
+ outLen
, destLimit
+ outLen
, UnicodeString());
200 // Delete the old text (the key)
201 text
.handleReplaceBetween(start
+ outLen
, limit
+ outLen
, UnicodeString());
205 // Adjust the cursor for positions outside the key. These
206 // refer to code points rather than code units. If cursorPos
207 // is within the output string, then use newStart, which has
208 // already been set above.
211 int32_t n
= cursorPos
;
212 // Outside the output string, cursorPos counts code points
213 while (n
< 0 && newStart
> 0) {
214 newStart
-= U16_LENGTH(text
.char32At(newStart
-1));
218 } else if (cursorPos
> output
.length()) {
219 newStart
= start
+ outLen
;
220 int32_t n
= cursorPos
- output
.length();
221 // Outside the output string, cursorPos counts code points
222 while (n
> 0 && newStart
< text
.length()) {
223 newStart
+= U16_LENGTH(text
.char32At(newStart
));
228 // Cursor is within output string. It has been set up above
229 // to be relative to start.
240 * UnicodeReplacer API
242 UnicodeString
& StringReplacer::toReplacerPattern(UnicodeString
& rule
,
243 UBool escapeUnprintable
) const {
245 UnicodeString quoteBuf
;
247 int32_t cursor
= cursorPos
;
249 // Handle a cursor preceding the output
250 if (hasCursor
&& cursor
< 0) {
251 while (cursor
++ < 0) {
252 ICU_Utility::appendToRule(rule
, (UChar
)0x0040 /*@*/, TRUE
, escapeUnprintable
, quoteBuf
);
254 // Fall through and append '|' below
257 for (int32_t i
=0; i
<output
.length(); ++i
) {
258 if (hasCursor
&& i
== cursor
) {
259 ICU_Utility::appendToRule(rule
, (UChar
)0x007C /*|*/, TRUE
, escapeUnprintable
, quoteBuf
);
261 UChar c
= output
.charAt(i
); // Ok to use 16-bits here
263 UnicodeReplacer
* r
= data
->lookupReplacer(c
);
265 ICU_Utility::appendToRule(rule
, c
, FALSE
, escapeUnprintable
, quoteBuf
);
268 r
->toReplacerPattern(buf
, escapeUnprintable
);
269 buf
.insert(0, (UChar
)0x20);
270 buf
.append((UChar
)0x20);
271 ICU_Utility::appendToRule(rule
, buf
,
272 TRUE
, escapeUnprintable
, quoteBuf
);
276 // Handle a cursor after the output. Use > rather than >= because
277 // if cursor == output.length() it is at the end of the output,
278 // which is the default position, so we need not emit it.
279 if (hasCursor
&& cursor
> output
.length()) {
280 cursor
-= output
.length();
281 while (cursor
-- > 0) {
282 ICU_Utility::appendToRule(rule
, (UChar
)0x0040 /*@*/, TRUE
, escapeUnprintable
, quoteBuf
);
284 ICU_Utility::appendToRule(rule
, (UChar
)0x007C /*|*/, TRUE
, escapeUnprintable
, quoteBuf
);
286 // Flush quoteBuf out to result
287 ICU_Utility::appendToRule(rule
, -1,
288 TRUE
, escapeUnprintable
, quoteBuf
);
294 * Implement UnicodeReplacer
296 void StringReplacer::addReplacementSetTo(UnicodeSet
& toUnionTo
) const {
298 for (int32_t i
=0; i
<output
.length(); i
+=U16_LENGTH(ch
)) {
299 ch
= output
.char32At(i
);
300 UnicodeReplacer
* r
= data
->lookupReplacer(ch
);
304 r
->addReplacementSetTo(toUnionTo
);
312 void StringReplacer::setData(const TransliterationRuleData
* d
) {
315 while (i
<output
.length()) {
316 UChar32 c
= output
.char32At(i
);
317 UnicodeFunctor
* f
= data
->lookup(c
);
327 #endif /* #if !UCONFIG_NO_TRANSLITERATION */