1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
5 // created: 2016dec30 Markus W. Scherer
10 #include "unicode/utypes.h"
11 #include "unicode/uobject.h"
15 * \brief C++ API: C++ class Edits for low-level string transformations on styled text.
18 #if U_SHOW_CPLUSPLUS_API
21 #ifndef U_HIDE_DRAFT_API
24 * Records lengths of string edits but not replacement text.
25 * Supports replacements, insertions, deletions in linear progression.
26 * Does not support moving/reordering of text.
28 * An Edits object tracks a separate UErrorCode, but ICU string transformation functions
29 * (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
33 class U_COMMON_API Edits U_FINAL
: public UMemory
{
36 * Constructs an empty object.
40 array(stackArray
), capacity(STACK_CAPACITY
), length(0), delta(0),
41 errorCode(U_ZERO_ERROR
) {}
49 * Resets the data but may not release memory.
55 * Adds a record for an unchanged segment of text.
56 * Normally called from inside ICU string transformation functions, not user code.
59 void addUnchanged(int32_t unchangedLength
);
61 * Adds a record for a text replacement/insertion/deletion.
62 * Normally called from inside ICU string transformation functions, not user code.
65 void addReplace(int32_t oldLength
, int32_t newLength
);
67 * Sets the UErrorCode if an error occurred while recording edits.
68 * Preserves older error codes in the outErrorCode.
69 * Normally called from inside ICU string transformation functions, not user code.
70 * @return TRUE if U_FAILURE(outErrorCode)
73 UBool
copyErrorTo(UErrorCode
&outErrorCode
);
76 * How much longer is the new text compared with the old text?
77 * @return new length minus old length
80 int32_t lengthDelta() const { return delta
; }
82 * @return TRUE if there are any change edits
85 UBool
hasChanges() const;
88 * Access to the list of edits.
89 * @see getCoarseIterator
90 * @see getFineIterator
93 struct U_COMMON_API Iterator U_FINAL
: public UMemory
{
98 Iterator(const Iterator
&other
) = default;
100 * Assignment operator.
103 Iterator
&operator=(const Iterator
&other
) = default;
106 * Advances to the next edit.
107 * @return TRUE if there is another edit
110 UBool
next(UErrorCode
&errorCode
) { return next(onlyChanges_
, errorCode
); }
113 * Finds the edit that contains the source index.
114 * The source index may be found in a non-change
115 * even if normal iteration would skip non-changes.
116 * Normal iteration can continue from a found edit.
118 * The iterator state before this search logically does not matter.
119 * (It may affect the performance of the search.)
121 * The iterator state after this search is undefined
122 * if the source index is out of bounds for the source string.
124 * @param i source index
125 * @return TRUE if the edit for the source index was found
128 UBool
findSourceIndex(int32_t i
, UErrorCode
&errorCode
);
131 * @return TRUE if this edit replaces oldLength() units with newLength() different ones.
132 * FALSE if oldLength units remain unchanged.
135 UBool
hasChange() const { return changed
; }
137 * @return the number of units in the original string which are replaced or remain unchanged.
140 int32_t oldLength() const { return oldLength_
; }
142 * @return the number of units in the modified string, if hasChange() is TRUE.
143 * Same as oldLength if hasChange() is FALSE.
146 int32_t newLength() const { return newLength_
; }
149 * @return the current index into the source string
152 int32_t sourceIndex() const { return srcIndex
; }
154 * @return the current index into the replacement-characters-only string,
155 * not counting unchanged spans
158 int32_t replacementIndex() const { return replIndex
; }
160 * @return the current index into the full destination string
163 int32_t destinationIndex() const { return destIndex
; }
168 Iterator(const uint16_t *a
, int32_t len
, UBool oc
, UBool crs
);
170 int32_t readLength(int32_t head
);
171 void updateIndexes();
173 UBool
next(UBool onlyChanges
, UErrorCode
&errorCode
);
175 const uint16_t *array
;
176 int32_t index
, length
;
178 UBool onlyChanges_
, coarse
;
181 int32_t oldLength_
, newLength_
;
182 int32_t srcIndex
, replIndex
, destIndex
;
186 * Returns an Iterator for coarse-grained changes for simple string updates.
188 * @return an Iterator that merges adjacent changes.
191 Iterator
getCoarseChangesIterator() const {
192 return Iterator(array
, length
, TRUE
, TRUE
);
196 * Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
197 * @return an Iterator that merges adjacent changes.
200 Iterator
getCoarseIterator() const {
201 return Iterator(array
, length
, FALSE
, TRUE
);
205 * Returns an Iterator for fine-grained changes for modifying styled text.
207 * @return an Iterator that separates adjacent changes.
210 Iterator
getFineChangesIterator() const {
211 return Iterator(array
, length
, TRUE
, FALSE
);
215 * Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
216 * @return an Iterator that separates adjacent changes.
219 Iterator
getFineIterator() const {
220 return Iterator(array
, length
, FALSE
, FALSE
);
224 Edits(const Edits
&) = delete;
225 Edits
&operator=(const Edits
&) = delete;
227 void setLastUnit(int32_t last
) { array
[length
- 1] = (uint16_t)last
; }
228 int32_t lastUnit() const { return length
> 0 ? array
[length
- 1] : 0xffff; }
230 void append(int32_t r
);
233 static const int32_t STACK_CAPACITY
= 100;
238 UErrorCode errorCode
;
239 uint16_t stackArray
[STACK_CAPACITY
];
242 #endif // U_HIDE_DRAFT_API
245 #endif // U_SHOW_CPLUSPLUS_API
247 #endif // __EDITS_H__