]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | ||
4 | // edits.h | |
5 | // created: 2016dec30 Markus W. Scherer | |
6 | ||
7 | #ifndef __EDITS_H__ | |
8 | #define __EDITS_H__ | |
9 | ||
10 | #include "unicode/utypes.h" | |
340931cb A |
11 | |
12 | #if U_SHOW_CPLUSPLUS_API | |
13 | ||
f3c0d7a5 A |
14 | #include "unicode/uobject.h" |
15 | ||
16 | /** | |
17 | * \file | |
18 | * \brief C++ API: C++ class Edits for low-level string transformations on styled text. | |
19 | */ | |
20 | ||
f3c0d7a5 A |
21 | U_NAMESPACE_BEGIN |
22 | ||
0f5d89e8 | 23 | class UnicodeString; |
f3c0d7a5 A |
24 | |
25 | /** | |
0f5d89e8 A |
26 | * Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions |
27 | * in linear progression. Does not support moving/reordering of text. | |
28 | * | |
29 | * There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to | |
3d1f044b A |
30 | * instances of this class using {@link #addReplace(int32_t, int32_t)} (for change edits) and |
31 | * {@link #addUnchanged(int32_t)} (for no-change edits). Change edits are retained with full granularity, | |
0f5d89e8 A |
32 | * whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one |
33 | * mapping between code points in the source and destination strings. | |
34 | * | |
35 | * After all edits have been added, instances of this class should be considered immutable, and an | |
36 | * {@link Edits::Iterator} can be used for queries. | |
37 | * | |
38 | * There are four flavors of Edits::Iterator: | |
39 | * | |
40 | * <ul> | |
41 | * <li>{@link #getFineIterator()} retains full granularity of change edits. | |
42 | * <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling | |
43 | * next() on the iterator, skips over no-change edits (unchanged regions). | |
44 | * <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change | |
45 | * edits are automatically merged during the construction phase.) | |
46 | * <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when | |
47 | * calling next() on the iterator, skips over no-change edits (unchanged regions). | |
48 | * </ul> | |
49 | * | |
50 | * For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the | |
51 | * following fine edits: | |
52 | * <ul> | |
53 | * <li>abc ⇨ abc (no-change) | |
54 | * <li>ß ⇨ ss (change) | |
55 | * <li>D ⇨ d (change) | |
56 | * <li>e ⇨ e (no-change) | |
57 | * <li>F ⇨ f (change) | |
58 | * </ul> | |
59 | * and the following coarse edits (note how adjacent change edits get merged together): | |
60 | * <ul> | |
61 | * <li>abc ⇨ abc (no-change) | |
62 | * <li>ßD ⇨ ssd (change) | |
63 | * <li>e ⇨ e (no-change) | |
64 | * <li>F ⇨ f (change) | |
65 | * </ul> | |
66 | * | |
67 | * The "fine changes" and "coarse changes" iterators will step through only the change edits when their | |
3d1f044b A |
68 | * `Edits::Iterator::next()` methods are called. They are identical to the non-change iterators when |
69 | * their `Edits::Iterator::findSourceIndex()` or `Edits::Iterator::findDestinationIndex()` | |
0f5d89e8 A |
70 | * methods are used to walk through the string. |
71 | * | |
3d1f044b | 72 | * For examples of how to use this class, see the test `TestCaseMapEditsIteratorDocs` in |
0f5d89e8 | 73 | * UCharacterCaseTest.java. |
f3c0d7a5 A |
74 | * |
75 | * An Edits object tracks a separate UErrorCode, but ICU string transformation functions | |
76 | * (e.g., case mapping functions) merge any such errors into their API's UErrorCode. | |
77 | * | |
0f5d89e8 | 78 | * @stable ICU 59 |
f3c0d7a5 A |
79 | */ |
80 | class U_COMMON_API Edits U_FINAL : public UMemory { | |
81 | public: | |
82 | /** | |
83 | * Constructs an empty object. | |
0f5d89e8 | 84 | * @stable ICU 59 |
f3c0d7a5 A |
85 | */ |
86 | Edits() : | |
0f5d89e8 A |
87 | array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0), |
88 | errorCode_(U_ZERO_ERROR) {} | |
89 | /** | |
90 | * Copy constructor. | |
91 | * @param other source edits | |
3d1f044b | 92 | * @stable ICU 60 |
0f5d89e8 A |
93 | */ |
94 | Edits(const Edits &other) : | |
95 | array(stackArray), capacity(STACK_CAPACITY), length(other.length), | |
96 | delta(other.delta), numChanges(other.numChanges), | |
97 | errorCode_(other.errorCode_) { | |
98 | copyArray(other); | |
99 | } | |
100 | /** | |
101 | * Move constructor, might leave src empty. | |
102 | * This object will have the same contents that the source object had. | |
103 | * @param src source edits | |
3d1f044b | 104 | * @stable ICU 60 |
0f5d89e8 A |
105 | */ |
106 | Edits(Edits &&src) U_NOEXCEPT : | |
107 | array(stackArray), capacity(STACK_CAPACITY), length(src.length), | |
108 | delta(src.delta), numChanges(src.numChanges), | |
109 | errorCode_(src.errorCode_) { | |
110 | moveArray(src); | |
111 | } | |
112 | ||
f3c0d7a5 A |
113 | /** |
114 | * Destructor. | |
0f5d89e8 | 115 | * @stable ICU 59 |
f3c0d7a5 A |
116 | */ |
117 | ~Edits(); | |
118 | ||
0f5d89e8 A |
119 | /** |
120 | * Assignment operator. | |
121 | * @param other source edits | |
122 | * @return *this | |
3d1f044b | 123 | * @stable ICU 60 |
0f5d89e8 A |
124 | */ |
125 | Edits &operator=(const Edits &other); | |
126 | ||
127 | /** | |
128 | * Move assignment operator, might leave src empty. | |
129 | * This object will have the same contents that the source object had. | |
130 | * The behavior is undefined if *this and src are the same object. | |
131 | * @param src source edits | |
132 | * @return *this | |
3d1f044b | 133 | * @stable ICU 60 |
0f5d89e8 A |
134 | */ |
135 | Edits &operator=(Edits &&src) U_NOEXCEPT; | |
136 | ||
f3c0d7a5 A |
137 | /** |
138 | * Resets the data but may not release memory. | |
0f5d89e8 | 139 | * @stable ICU 59 |
f3c0d7a5 | 140 | */ |
0f5d89e8 | 141 | void reset() U_NOEXCEPT; |
f3c0d7a5 A |
142 | |
143 | /** | |
0f5d89e8 | 144 | * Adds a no-change edit: a record for an unchanged segment of text. |
f3c0d7a5 | 145 | * Normally called from inside ICU string transformation functions, not user code. |
0f5d89e8 | 146 | * @stable ICU 59 |
f3c0d7a5 A |
147 | */ |
148 | void addUnchanged(int32_t unchangedLength); | |
149 | /** | |
0f5d89e8 | 150 | * Adds a change edit: a record for a text replacement/insertion/deletion. |
f3c0d7a5 | 151 | * Normally called from inside ICU string transformation functions, not user code. |
0f5d89e8 | 152 | * @stable ICU 59 |
f3c0d7a5 A |
153 | */ |
154 | void addReplace(int32_t oldLength, int32_t newLength); | |
155 | /** | |
156 | * Sets the UErrorCode if an error occurred while recording edits. | |
157 | * Preserves older error codes in the outErrorCode. | |
158 | * Normally called from inside ICU string transformation functions, not user code. | |
0f5d89e8 A |
159 | * @param outErrorCode Set to an error code if it does not contain one already |
160 | * and an error occurred while recording edits. | |
161 | * Otherwise unchanged. | |
f3c0d7a5 | 162 | * @return TRUE if U_FAILURE(outErrorCode) |
0f5d89e8 | 163 | * @stable ICU 59 |
f3c0d7a5 | 164 | */ |
340931cb | 165 | UBool copyErrorTo(UErrorCode &outErrorCode) const; |
f3c0d7a5 A |
166 | |
167 | /** | |
168 | * How much longer is the new text compared with the old text? | |
169 | * @return new length minus old length | |
0f5d89e8 | 170 | * @stable ICU 59 |
f3c0d7a5 A |
171 | */ |
172 | int32_t lengthDelta() const { return delta; } | |
173 | /** | |
174 | * @return TRUE if there are any change edits | |
0f5d89e8 | 175 | * @stable ICU 59 |
f3c0d7a5 | 176 | */ |
0f5d89e8 A |
177 | UBool hasChanges() const { return numChanges != 0; } |
178 | ||
0f5d89e8 A |
179 | /** |
180 | * @return the number of change edits | |
3d1f044b | 181 | * @stable ICU 60 |
0f5d89e8 A |
182 | */ |
183 | int32_t numberOfChanges() const { return numChanges; } | |
f3c0d7a5 A |
184 | |
185 | /** | |
186 | * Access to the list of edits. | |
0f5d89e8 A |
187 | * |
188 | * At any moment in time, an instance of this class points to a single edit: a "window" into a span | |
189 | * of the source string and the corresponding span of the destination string. The source string span | |
190 | * starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string | |
191 | * span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars. | |
192 | * | |
3d1f044b A |
193 | * The iterator can be moved between edits using the `next()`, `findSourceIndex(int32_t, UErrorCode &)`, |
194 | * and `findDestinationIndex(int32_t, UErrorCode &)` methods. | |
195 | * Calling any of these methods mutates the iterator to make it point to the corresponding edit. | |
0f5d89e8 A |
196 | * |
197 | * For more information, see the documentation for {@link Edits}. | |
198 | * | |
f3c0d7a5 A |
199 | * @see getCoarseIterator |
200 | * @see getFineIterator | |
0f5d89e8 | 201 | * @stable ICU 59 |
f3c0d7a5 A |
202 | */ |
203 | struct U_COMMON_API Iterator U_FINAL : public UMemory { | |
0f5d89e8 A |
204 | /** |
205 | * Default constructor, empty iterator. | |
3d1f044b | 206 | * @stable ICU 60 |
0f5d89e8 A |
207 | */ |
208 | Iterator() : | |
209 | array(nullptr), index(0), length(0), | |
210 | remaining(0), onlyChanges_(FALSE), coarse(FALSE), | |
211 | dir(0), changed(FALSE), oldLength_(0), newLength_(0), | |
212 | srcIndex(0), replIndex(0), destIndex(0) {} | |
f3c0d7a5 A |
213 | /** |
214 | * Copy constructor. | |
0f5d89e8 | 215 | * @stable ICU 59 |
f3c0d7a5 A |
216 | */ |
217 | Iterator(const Iterator &other) = default; | |
218 | /** | |
219 | * Assignment operator. | |
0f5d89e8 | 220 | * @stable ICU 59 |
f3c0d7a5 A |
221 | */ |
222 | Iterator &operator=(const Iterator &other) = default; | |
223 | ||
224 | /** | |
0f5d89e8 A |
225 | * Advances the iterator to the next edit. |
226 | * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, | |
227 | * or else the function returns immediately. Check for U_FAILURE() | |
228 | * on output or use with function chaining. (See User Guide for details.) | |
f3c0d7a5 | 229 | * @return TRUE if there is another edit |
0f5d89e8 | 230 | * @stable ICU 59 |
f3c0d7a5 A |
231 | */ |
232 | UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); } | |
233 | ||
234 | /** | |
0f5d89e8 A |
235 | * Moves the iterator to the edit that contains the source index. |
236 | * The source index may be found in a no-change edit | |
237 | * even if normal iteration would skip no-change edits. | |
f3c0d7a5 A |
238 | * Normal iteration can continue from a found edit. |
239 | * | |
240 | * The iterator state before this search logically does not matter. | |
241 | * (It may affect the performance of the search.) | |
242 | * | |
243 | * The iterator state after this search is undefined | |
244 | * if the source index is out of bounds for the source string. | |
245 | * | |
246 | * @param i source index | |
0f5d89e8 A |
247 | * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, |
248 | * or else the function returns immediately. Check for U_FAILURE() | |
249 | * on output or use with function chaining. (See User Guide for details.) | |
f3c0d7a5 | 250 | * @return TRUE if the edit for the source index was found |
0f5d89e8 A |
251 | * @stable ICU 59 |
252 | */ | |
253 | UBool findSourceIndex(int32_t i, UErrorCode &errorCode) { | |
254 | return findIndex(i, TRUE, errorCode) == 0; | |
255 | } | |
256 | ||
0f5d89e8 A |
257 | /** |
258 | * Moves the iterator to the edit that contains the destination index. | |
259 | * The destination index may be found in a no-change edit | |
260 | * even if normal iteration would skip no-change edits. | |
261 | * Normal iteration can continue from a found edit. | |
262 | * | |
263 | * The iterator state before this search logically does not matter. | |
264 | * (It may affect the performance of the search.) | |
265 | * | |
266 | * The iterator state after this search is undefined | |
267 | * if the source index is out of bounds for the source string. | |
268 | * | |
269 | * @param i destination index | |
270 | * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, | |
271 | * or else the function returns immediately. Check for U_FAILURE() | |
272 | * on output or use with function chaining. (See User Guide for details.) | |
273 | * @return TRUE if the edit for the destination index was found | |
3d1f044b | 274 | * @stable ICU 60 |
0f5d89e8 A |
275 | */ |
276 | UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) { | |
277 | return findIndex(i, FALSE, errorCode) == 0; | |
278 | } | |
279 | ||
280 | /** | |
281 | * Computes the destination index corresponding to the given source index. | |
282 | * If the source index is inside a change edit (not at its start), | |
283 | * then the destination index at the end of that edit is returned, | |
284 | * since there is no information about index mapping inside a change edit. | |
285 | * | |
286 | * (This means that indexes to the start and middle of an edit, | |
287 | * for example around a grapheme cluster, are mapped to indexes | |
288 | * encompassing the entire edit. | |
289 | * The alternative, mapping an interior index to the start, | |
290 | * would map such an interval to an empty one.) | |
291 | * | |
292 | * This operation will usually but not always modify this object. | |
293 | * The iterator state after this search is undefined. | |
294 | * | |
295 | * @param i source index | |
296 | * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, | |
297 | * or else the function returns immediately. Check for U_FAILURE() | |
298 | * on output or use with function chaining. (See User Guide for details.) | |
299 | * @return destination index; undefined if i is not 0..string length | |
3d1f044b | 300 | * @stable ICU 60 |
f3c0d7a5 | 301 | */ |
0f5d89e8 | 302 | int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode); |
f3c0d7a5 A |
303 | |
304 | /** | |
0f5d89e8 A |
305 | * Computes the source index corresponding to the given destination index. |
306 | * If the destination index is inside a change edit (not at its start), | |
307 | * then the source index at the end of that edit is returned, | |
308 | * since there is no information about index mapping inside a change edit. | |
309 | * | |
310 | * (This means that indexes to the start and middle of an edit, | |
311 | * for example around a grapheme cluster, are mapped to indexes | |
312 | * encompassing the entire edit. | |
313 | * The alternative, mapping an interior index to the start, | |
314 | * would map such an interval to an empty one.) | |
315 | * | |
316 | * This operation will usually but not always modify this object. | |
317 | * The iterator state after this search is undefined. | |
318 | * | |
319 | * @param i destination index | |
320 | * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, | |
321 | * or else the function returns immediately. Check for U_FAILURE() | |
322 | * on output or use with function chaining. (See User Guide for details.) | |
323 | * @return source index; undefined if i is not 0..string length | |
3d1f044b | 324 | * @stable ICU 60 |
0f5d89e8 A |
325 | */ |
326 | int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode); | |
0f5d89e8 A |
327 | |
328 | /** | |
329 | * Returns whether the edit currently represented by the iterator is a change edit. | |
330 | * | |
f3c0d7a5 A |
331 | * @return TRUE if this edit replaces oldLength() units with newLength() different ones. |
332 | * FALSE if oldLength units remain unchanged. | |
0f5d89e8 | 333 | * @stable ICU 59 |
f3c0d7a5 A |
334 | */ |
335 | UBool hasChange() const { return changed; } | |
0f5d89e8 | 336 | |
f3c0d7a5 | 337 | /** |
0f5d89e8 A |
338 | * The length of the current span in the source string, which starts at {@link #sourceIndex}. |
339 | * | |
f3c0d7a5 | 340 | * @return the number of units in the original string which are replaced or remain unchanged. |
0f5d89e8 | 341 | * @stable ICU 59 |
f3c0d7a5 A |
342 | */ |
343 | int32_t oldLength() const { return oldLength_; } | |
0f5d89e8 | 344 | |
f3c0d7a5 | 345 | /** |
0f5d89e8 A |
346 | * The length of the current span in the destination string, which starts at |
347 | * {@link #destinationIndex}, or in the replacement string, which starts at | |
348 | * {@link #replacementIndex}. | |
349 | * | |
f3c0d7a5 A |
350 | * @return the number of units in the modified string, if hasChange() is TRUE. |
351 | * Same as oldLength if hasChange() is FALSE. | |
0f5d89e8 | 352 | * @stable ICU 59 |
f3c0d7a5 A |
353 | */ |
354 | int32_t newLength() const { return newLength_; } | |
355 | ||
356 | /** | |
0f5d89e8 A |
357 | * The start index of the current span in the source string; the span has length |
358 | * {@link #oldLength}. | |
359 | * | |
f3c0d7a5 | 360 | * @return the current index into the source string |
0f5d89e8 | 361 | * @stable ICU 59 |
f3c0d7a5 A |
362 | */ |
363 | int32_t sourceIndex() const { return srcIndex; } | |
0f5d89e8 | 364 | |
f3c0d7a5 | 365 | /** |
0f5d89e8 A |
366 | * The start index of the current span in the replacement string; the span has length |
367 | * {@link #newLength}. Well-defined only if the current edit is a change edit. | |
3d1f044b A |
368 | * |
369 | * The *replacement string* is the concatenation of all substrings of the destination | |
0f5d89e8 | 370 | * string corresponding to change edits. |
3d1f044b | 371 | * |
0f5d89e8 | 372 | * This method is intended to be used together with operations that write only replacement |
3d1f044b A |
373 | * characters (e.g. operations specifying the \ref U_OMIT_UNCHANGED_TEXT option). |
374 | * The source string can then be modified in-place. | |
0f5d89e8 | 375 | * |
f3c0d7a5 A |
376 | * @return the current index into the replacement-characters-only string, |
377 | * not counting unchanged spans | |
0f5d89e8 | 378 | * @stable ICU 59 |
f3c0d7a5 | 379 | */ |
0f5d89e8 A |
380 | int32_t replacementIndex() const { |
381 | // TODO: Throw an exception if we aren't in a change edit? | |
382 | return replIndex; | |
383 | } | |
384 | ||
f3c0d7a5 | 385 | /** |
0f5d89e8 A |
386 | * The start index of the current span in the destination string; the span has length |
387 | * {@link #newLength}. | |
388 | * | |
f3c0d7a5 | 389 | * @return the current index into the full destination string |
0f5d89e8 | 390 | * @stable ICU 59 |
f3c0d7a5 A |
391 | */ |
392 | int32_t destinationIndex() const { return destIndex; } | |
393 | ||
0f5d89e8 A |
394 | #ifndef U_HIDE_INTERNAL_API |
395 | /** | |
396 | * A string representation of the current edit represented by the iterator for debugging. You | |
397 | * should not depend on the contents of the return string. | |
398 | * @internal | |
399 | */ | |
400 | UnicodeString& toString(UnicodeString& appendTo) const; | |
401 | #endif // U_HIDE_INTERNAL_API | |
402 | ||
f3c0d7a5 A |
403 | private: |
404 | friend class Edits; | |
405 | ||
406 | Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs); | |
407 | ||
408 | int32_t readLength(int32_t head); | |
0f5d89e8 A |
409 | void updateNextIndexes(); |
410 | void updatePreviousIndexes(); | |
f3c0d7a5 A |
411 | UBool noNext(); |
412 | UBool next(UBool onlyChanges, UErrorCode &errorCode); | |
0f5d89e8 A |
413 | UBool previous(UErrorCode &errorCode); |
414 | /** @return -1: error or i<0; 0: found; 1: i>=string length */ | |
415 | int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode); | |
f3c0d7a5 A |
416 | |
417 | const uint16_t *array; | |
418 | int32_t index, length; | |
0f5d89e8 A |
419 | // 0 if we are not within compressed equal-length changes. |
420 | // Otherwise the number of remaining changes, including the current one. | |
f3c0d7a5 A |
421 | int32_t remaining; |
422 | UBool onlyChanges_, coarse; | |
423 | ||
0f5d89e8 | 424 | int8_t dir; // iteration direction: back(<0), initial(0), forward(>0) |
f3c0d7a5 A |
425 | UBool changed; |
426 | int32_t oldLength_, newLength_; | |
427 | int32_t srcIndex, replIndex, destIndex; | |
428 | }; | |
429 | ||
430 | /** | |
0f5d89e8 A |
431 | * Returns an Iterator for coarse-grained change edits |
432 | * (adjacent change edits are treated as one). | |
433 | * Can be used to perform simple string updates. | |
434 | * Skips no-change edits. | |
f3c0d7a5 | 435 | * @return an Iterator that merges adjacent changes. |
0f5d89e8 | 436 | * @stable ICU 59 |
f3c0d7a5 A |
437 | */ |
438 | Iterator getCoarseChangesIterator() const { | |
439 | return Iterator(array, length, TRUE, TRUE); | |
440 | } | |
441 | ||
442 | /** | |
0f5d89e8 A |
443 | * Returns an Iterator for coarse-grained change and no-change edits |
444 | * (adjacent change edits are treated as one). | |
445 | * Can be used to perform simple string updates. | |
446 | * Adjacent change edits are treated as one edit. | |
f3c0d7a5 | 447 | * @return an Iterator that merges adjacent changes. |
0f5d89e8 | 448 | * @stable ICU 59 |
f3c0d7a5 A |
449 | */ |
450 | Iterator getCoarseIterator() const { | |
451 | return Iterator(array, length, FALSE, TRUE); | |
452 | } | |
453 | ||
454 | /** | |
0f5d89e8 A |
455 | * Returns an Iterator for fine-grained change edits |
456 | * (full granularity of change edits is retained). | |
457 | * Can be used for modifying styled text. | |
458 | * Skips no-change edits. | |
f3c0d7a5 | 459 | * @return an Iterator that separates adjacent changes. |
0f5d89e8 | 460 | * @stable ICU 59 |
f3c0d7a5 A |
461 | */ |
462 | Iterator getFineChangesIterator() const { | |
463 | return Iterator(array, length, TRUE, FALSE); | |
464 | } | |
465 | ||
466 | /** | |
0f5d89e8 A |
467 | * Returns an Iterator for fine-grained change and no-change edits |
468 | * (full granularity of change edits is retained). | |
469 | * Can be used for modifying styled text. | |
f3c0d7a5 | 470 | * @return an Iterator that separates adjacent changes. |
0f5d89e8 | 471 | * @stable ICU 59 |
f3c0d7a5 A |
472 | */ |
473 | Iterator getFineIterator() const { | |
474 | return Iterator(array, length, FALSE, FALSE); | |
475 | } | |
476 | ||
0f5d89e8 A |
477 | /** |
478 | * Merges the two input Edits and appends the result to this object. | |
479 | * | |
480 | * Consider two string transformations (for example, normalization and case mapping) | |
481 | * where each records Edits in addition to writing an output string.<br> | |
482 | * Edits ab reflect how substrings of input string a | |
483 | * map to substrings of intermediate string b.<br> | |
484 | * Edits bc reflect how substrings of intermediate string b | |
485 | * map to substrings of output string c.<br> | |
486 | * This function merges ab and bc such that the additional edits | |
487 | * recorded in this object reflect how substrings of input string a | |
488 | * map to substrings of output string c. | |
489 | * | |
490 | * If unrelated Edits are passed in where the output string of the first | |
491 | * has a different length than the input string of the second, | |
492 | * then a U_ILLEGAL_ARGUMENT_ERROR is reported. | |
493 | * | |
494 | * @param ab reflects how substrings of input string a | |
495 | * map to substrings of intermediate string b. | |
496 | * @param bc reflects how substrings of intermediate string b | |
497 | * map to substrings of output string c. | |
498 | * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, | |
499 | * or else the function returns immediately. Check for U_FAILURE() | |
500 | * on output or use with function chaining. (See User Guide for details.) | |
501 | * @return *this, with the merged edits appended | |
3d1f044b | 502 | * @stable ICU 60 |
0f5d89e8 A |
503 | */ |
504 | Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode); | |
0f5d89e8 | 505 | |
f3c0d7a5 | 506 | private: |
0f5d89e8 A |
507 | void releaseArray() U_NOEXCEPT; |
508 | Edits ©Array(const Edits &other); | |
509 | Edits &moveArray(Edits &src) U_NOEXCEPT; | |
f3c0d7a5 A |
510 | |
511 | void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; } | |
512 | int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; } | |
513 | ||
514 | void append(int32_t r); | |
515 | UBool growArray(); | |
516 | ||
517 | static const int32_t STACK_CAPACITY = 100; | |
518 | uint16_t *array; | |
519 | int32_t capacity; | |
520 | int32_t length; | |
521 | int32_t delta; | |
0f5d89e8 A |
522 | int32_t numChanges; |
523 | UErrorCode errorCode_; | |
f3c0d7a5 A |
524 | uint16_t stackArray[STACK_CAPACITY]; |
525 | }; | |
526 | ||
f3c0d7a5 | 527 | U_NAMESPACE_END |
340931cb A |
528 | |
529 | #endif /* U_SHOW_CPLUSPLUS_API */ | |
f3c0d7a5 A |
530 | |
531 | #endif // __EDITS_H__ |