]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/unicode/edits.h
ICU-59117.0.1.tar.gz
[apple/icu.git] / icuSources / common / unicode / edits.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // edits.h
5 // created: 2016dec30 Markus W. Scherer
6
7 #ifndef __EDITS_H__
8 #define __EDITS_H__
9
10 #include "unicode/utypes.h"
11 #include "unicode/uobject.h"
12
13 /**
14 * \file
15 * \brief C++ API: C++ class Edits for low-level string transformations on styled text.
16 */
17
18 #if U_SHOW_CPLUSPLUS_API
19 U_NAMESPACE_BEGIN
20
21 #ifndef U_HIDE_DRAFT_API
22
23 /**
24 * Records lengths of string edits but not replacement text.
25 * Supports replacements, insertions, deletions in linear progression.
26 * Does not support moving/reordering of text.
27 *
28 * An Edits object tracks a separate UErrorCode, but ICU string transformation functions
29 * (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
30 *
31 * @draft ICU 59
32 */
33 class U_COMMON_API Edits U_FINAL : public UMemory {
34 public:
35 /**
36 * Constructs an empty object.
37 * @draft ICU 59
38 */
39 Edits() :
40 array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0),
41 errorCode(U_ZERO_ERROR) {}
42 /**
43 * Destructor.
44 * @draft ICU 59
45 */
46 ~Edits();
47
48 /**
49 * Resets the data but may not release memory.
50 * @draft ICU 59
51 */
52 void reset();
53
54 /**
55 * Adds a record for an unchanged segment of text.
56 * Normally called from inside ICU string transformation functions, not user code.
57 * @draft ICU 59
58 */
59 void addUnchanged(int32_t unchangedLength);
60 /**
61 * Adds a record for a text replacement/insertion/deletion.
62 * Normally called from inside ICU string transformation functions, not user code.
63 * @draft ICU 59
64 */
65 void addReplace(int32_t oldLength, int32_t newLength);
66 /**
67 * Sets the UErrorCode if an error occurred while recording edits.
68 * Preserves older error codes in the outErrorCode.
69 * Normally called from inside ICU string transformation functions, not user code.
70 * @return TRUE if U_FAILURE(outErrorCode)
71 * @draft ICU 59
72 */
73 UBool copyErrorTo(UErrorCode &outErrorCode);
74
75 /**
76 * How much longer is the new text compared with the old text?
77 * @return new length minus old length
78 * @draft ICU 59
79 */
80 int32_t lengthDelta() const { return delta; }
81 /**
82 * @return TRUE if there are any change edits
83 * @draft ICU 59
84 */
85 UBool hasChanges() const;
86
87 /**
88 * Access to the list of edits.
89 * @see getCoarseIterator
90 * @see getFineIterator
91 * @draft ICU 59
92 */
93 struct U_COMMON_API Iterator U_FINAL : public UMemory {
94 /**
95 * Copy constructor.
96 * @draft ICU 59
97 */
98 Iterator(const Iterator &other) = default;
99 /**
100 * Assignment operator.
101 * @draft ICU 59
102 */
103 Iterator &operator=(const Iterator &other) = default;
104
105 /**
106 * Advances to the next edit.
107 * @return TRUE if there is another edit
108 * @draft ICU 59
109 */
110 UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
111
112 /**
113 * Finds the edit that contains the source index.
114 * The source index may be found in a non-change
115 * even if normal iteration would skip non-changes.
116 * Normal iteration can continue from a found edit.
117 *
118 * The iterator state before this search logically does not matter.
119 * (It may affect the performance of the search.)
120 *
121 * The iterator state after this search is undefined
122 * if the source index is out of bounds for the source string.
123 *
124 * @param i source index
125 * @return TRUE if the edit for the source index was found
126 * @draft ICU 59
127 */
128 UBool findSourceIndex(int32_t i, UErrorCode &errorCode);
129
130 /**
131 * @return TRUE if this edit replaces oldLength() units with newLength() different ones.
132 * FALSE if oldLength units remain unchanged.
133 * @draft ICU 59
134 */
135 UBool hasChange() const { return changed; }
136 /**
137 * @return the number of units in the original string which are replaced or remain unchanged.
138 * @draft ICU 59
139 */
140 int32_t oldLength() const { return oldLength_; }
141 /**
142 * @return the number of units in the modified string, if hasChange() is TRUE.
143 * Same as oldLength if hasChange() is FALSE.
144 * @draft ICU 59
145 */
146 int32_t newLength() const { return newLength_; }
147
148 /**
149 * @return the current index into the source string
150 * @draft ICU 59
151 */
152 int32_t sourceIndex() const { return srcIndex; }
153 /**
154 * @return the current index into the replacement-characters-only string,
155 * not counting unchanged spans
156 * @draft ICU 59
157 */
158 int32_t replacementIndex() const { return replIndex; }
159 /**
160 * @return the current index into the full destination string
161 * @draft ICU 59
162 */
163 int32_t destinationIndex() const { return destIndex; }
164
165 private:
166 friend class Edits;
167
168 Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
169
170 int32_t readLength(int32_t head);
171 void updateIndexes();
172 UBool noNext();
173 UBool next(UBool onlyChanges, UErrorCode &errorCode);
174
175 const uint16_t *array;
176 int32_t index, length;
177 int32_t remaining;
178 UBool onlyChanges_, coarse;
179
180 UBool changed;
181 int32_t oldLength_, newLength_;
182 int32_t srcIndex, replIndex, destIndex;
183 };
184
185 /**
186 * Returns an Iterator for coarse-grained changes for simple string updates.
187 * Skips non-changes.
188 * @return an Iterator that merges adjacent changes.
189 * @draft ICU 59
190 */
191 Iterator getCoarseChangesIterator() const {
192 return Iterator(array, length, TRUE, TRUE);
193 }
194
195 /**
196 * Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
197 * @return an Iterator that merges adjacent changes.
198 * @draft ICU 59
199 */
200 Iterator getCoarseIterator() const {
201 return Iterator(array, length, FALSE, TRUE);
202 }
203
204 /**
205 * Returns an Iterator for fine-grained changes for modifying styled text.
206 * Skips non-changes.
207 * @return an Iterator that separates adjacent changes.
208 * @draft ICU 59
209 */
210 Iterator getFineChangesIterator() const {
211 return Iterator(array, length, TRUE, FALSE);
212 }
213
214 /**
215 * Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
216 * @return an Iterator that separates adjacent changes.
217 * @draft ICU 59
218 */
219 Iterator getFineIterator() const {
220 return Iterator(array, length, FALSE, FALSE);
221 }
222
223 private:
224 Edits(const Edits &) = delete;
225 Edits &operator=(const Edits &) = delete;
226
227 void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
228 int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
229
230 void append(int32_t r);
231 UBool growArray();
232
233 static const int32_t STACK_CAPACITY = 100;
234 uint16_t *array;
235 int32_t capacity;
236 int32_t length;
237 int32_t delta;
238 UErrorCode errorCode;
239 uint16_t stackArray[STACK_CAPACITY];
240 };
241
242 #endif // U_HIDE_DRAFT_API
243
244 U_NAMESPACE_END
245 #endif // U_SHOW_CPLUSPLUS_API
246
247 #endif // __EDITS_H__