]>
Commit | Line | Data |
---|---|---|
1 | // © 2016 and later: Unicode, Inc. and others. | |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | /* | |
4 | ********************************************************************** | |
5 | * Copyright (C) 2001-2009, International Business Machines | |
6 | * Corporation and others. All Rights Reserved. | |
7 | ********************************************************************** | |
8 | * Date Name Description | |
9 | * 05/23/00 aliu Creation. | |
10 | ********************************************************************** | |
11 | */ | |
12 | ||
13 | #include <algorithm> | |
14 | #include <vector> | |
15 | #include "unicode/utypes.h" | |
16 | #include "unicode/edits.h" | |
17 | #include "unicode/unistr.h" | |
18 | #include "unicode/utf16.h" | |
19 | #include "cmemory.h" | |
20 | #include "testutil.h" | |
21 | #include "intltest.h" | |
22 | ||
23 | static const UChar HEX[] = u"0123456789ABCDEF"; | |
24 | ||
25 | UnicodeString &TestUtility::appendHex(UnicodeString &buf, UChar32 ch) { | |
26 | if (ch >= 0x10000) { | |
27 | if (ch >= 0x100000) { | |
28 | buf.append(HEX[0xF&(ch>>20)]); | |
29 | } | |
30 | buf.append(HEX[0xF&(ch>>16)]); | |
31 | } | |
32 | buf.append(HEX[0xF&(ch>>12)]); | |
33 | buf.append(HEX[0xF&(ch>>8)]); | |
34 | buf.append(HEX[0xF&(ch>>4)]); | |
35 | buf.append(HEX[0xF&ch]); | |
36 | return buf; | |
37 | } | |
38 | ||
39 | UnicodeString TestUtility::hex(UChar32 ch) { | |
40 | UnicodeString buf; | |
41 | appendHex(buf, ch); | |
42 | return buf; | |
43 | } | |
44 | ||
45 | UnicodeString TestUtility::hex(const UnicodeString& s) { | |
46 | return hex(s, u','); | |
47 | } | |
48 | ||
49 | UnicodeString TestUtility::hex(const UnicodeString& s, UChar sep) { | |
50 | UnicodeString result; | |
51 | if (s.isEmpty()) return result; | |
52 | UChar32 c; | |
53 | for (int32_t i = 0; i < s.length(); i += U16_LENGTH(c)) { | |
54 | c = s.char32At(i); | |
55 | if (i > 0) { | |
56 | result.append(sep); | |
57 | } | |
58 | appendHex(result, c); | |
59 | } | |
60 | return result; | |
61 | } | |
62 | ||
63 | UnicodeString TestUtility::hex(const uint8_t* bytes, int32_t len) { | |
64 | UnicodeString buf; | |
65 | for (int32_t i = 0; i < len; ++i) { | |
66 | buf.append(HEX[0x0F & (bytes[i] >> 4)]); | |
67 | buf.append(HEX[0x0F & bytes[i]]); | |
68 | } | |
69 | return buf; | |
70 | } | |
71 | ||
72 | namespace { | |
73 | ||
74 | UnicodeString printOneEdit(const Edits::Iterator &ei) { | |
75 | if (ei.hasChange()) { | |
76 | return UnicodeString() + ei.oldLength() + u"->" + ei.newLength(); | |
77 | } else { | |
78 | return UnicodeString() + ei.oldLength() + u"=" + ei.newLength(); | |
79 | } | |
80 | } | |
81 | ||
82 | /** | |
83 | * Maps indexes according to the expected edits. | |
84 | * A destination index can occur multiple times when there are source deletions. | |
85 | * Map according to the last occurrence, normally in a non-empty destination span. | |
86 | * Simplest is to search from the back. | |
87 | */ | |
88 | int32_t srcIndexFromDest(const EditChange expected[], int32_t expLength, | |
89 | int32_t srcLength, int32_t destLength, int32_t index) { | |
90 | int32_t srcIndex = srcLength; | |
91 | int32_t destIndex = destLength; | |
92 | int32_t i = expLength; | |
93 | while (index < destIndex && i > 0) { | |
94 | --i; | |
95 | int32_t prevSrcIndex = srcIndex - expected[i].oldLength; | |
96 | int32_t prevDestIndex = destIndex - expected[i].newLength; | |
97 | if (index == prevDestIndex) { | |
98 | return prevSrcIndex; | |
99 | } else if (index > prevDestIndex) { | |
100 | if (expected[i].change) { | |
101 | // In a change span, map to its end. | |
102 | return srcIndex; | |
103 | } else { | |
104 | // In an unchanged span, offset within it. | |
105 | return prevSrcIndex + (index - prevDestIndex); | |
106 | } | |
107 | } | |
108 | srcIndex = prevSrcIndex; | |
109 | destIndex = prevDestIndex; | |
110 | } | |
111 | // index is outside the string. | |
112 | return srcIndex; | |
113 | } | |
114 | ||
115 | int32_t destIndexFromSrc(const EditChange expected[], int32_t expLength, | |
116 | int32_t srcLength, int32_t destLength, int32_t index) { | |
117 | int32_t srcIndex = srcLength; | |
118 | int32_t destIndex = destLength; | |
119 | int32_t i = expLength; | |
120 | while (index < srcIndex && i > 0) { | |
121 | --i; | |
122 | int32_t prevSrcIndex = srcIndex - expected[i].oldLength; | |
123 | int32_t prevDestIndex = destIndex - expected[i].newLength; | |
124 | if (index == prevSrcIndex) { | |
125 | return prevDestIndex; | |
126 | } else if (index > prevSrcIndex) { | |
127 | if (expected[i].change) { | |
128 | // In a change span, map to its end. | |
129 | return destIndex; | |
130 | } else { | |
131 | // In an unchanged span, offset within it. | |
132 | return prevDestIndex + (index - prevSrcIndex); | |
133 | } | |
134 | } | |
135 | srcIndex = prevSrcIndex; | |
136 | destIndex = prevDestIndex; | |
137 | } | |
138 | // index is outside the string. | |
139 | return destIndex; | |
140 | } | |
141 | ||
142 | } // namespace | |
143 | ||
144 | // For debugging, set -v to see matching edits up to a failure. | |
145 | UBool TestUtility::checkEqualEdits(IntlTest &test, const UnicodeString &name, | |
146 | const Edits &e1, const Edits &e2, UErrorCode &errorCode) { | |
147 | Edits::Iterator ei1 = e1.getFineIterator(); | |
148 | Edits::Iterator ei2 = e2.getFineIterator(); | |
149 | UBool ok = TRUE; | |
150 | for (int32_t i = 0; ok; ++i) { | |
151 | UBool ei1HasNext = ei1.next(errorCode); | |
152 | UBool ei2HasNext = ei2.next(errorCode); | |
153 | ok &= test.assertEquals(name + u" next()[" + i + u"]" + __LINE__, | |
154 | ei1HasNext, ei2HasNext); | |
155 | ok &= test.assertSuccess(name + u" errorCode[" + i + u"]" + __LINE__, errorCode); | |
156 | ok &= test.assertEquals(name + u" edit[" + i + u"]" + __LINE__, | |
157 | printOneEdit(ei1), printOneEdit(ei2)); | |
158 | if (!ei1HasNext || !ei2HasNext) { | |
159 | break; | |
160 | } | |
161 | test.logln(); | |
162 | } | |
163 | return ok; | |
164 | } | |
165 | ||
166 | void TestUtility::checkEditsIter( | |
167 | IntlTest &test, | |
168 | const UnicodeString &name, | |
169 | Edits::Iterator ei1, Edits::Iterator ei2, // two equal iterators | |
170 | const EditChange expected[], int32_t expLength, UBool withUnchanged, | |
171 | UErrorCode &errorCode) { | |
172 | test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(-1, errorCode)); | |
173 | test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(-1, errorCode)); | |
174 | ||
175 | int32_t expSrcIndex = 0; | |
176 | int32_t expDestIndex = 0; | |
177 | int32_t expReplIndex = 0; | |
178 | for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) { | |
179 | const EditChange &expect = expected[expIndex]; | |
180 | UnicodeString msg = UnicodeString(name).append(u' ') + expIndex; | |
181 | if (withUnchanged || expect.change) { | |
182 | test.assertTrue(msg + u":" + __LINE__, ei1.next(errorCode)); | |
183 | test.assertEquals(msg + u":" + __LINE__, expect.change, ei1.hasChange()); | |
184 | test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei1.oldLength()); | |
185 | test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei1.newLength()); | |
186 | test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei1.sourceIndex()); | |
187 | test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei1.destinationIndex()); | |
188 | test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex()); | |
189 | } | |
190 | ||
191 | if (expect.oldLength > 0) { | |
192 | test.assertTrue(msg + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode)); | |
193 | test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange()); | |
194 | test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength()); | |
195 | test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei2.newLength()); | |
196 | test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei2.sourceIndex()); | |
197 | test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei2.destinationIndex()); | |
198 | test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei2.replacementIndex()); | |
199 | if (!withUnchanged) { | |
200 | // For some iterators, move past the current range | |
201 | // so that findSourceIndex() has to look before the current index. | |
202 | ei2.next(errorCode); | |
203 | ei2.next(errorCode); | |
204 | } | |
205 | } | |
206 | ||
207 | if (expect.newLength > 0) { | |
208 | test.assertTrue(msg + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode)); | |
209 | test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange()); | |
210 | test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength()); | |
211 | test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei2.newLength()); | |
212 | test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei2.sourceIndex()); | |
213 | test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei2.destinationIndex()); | |
214 | test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei2.replacementIndex()); | |
215 | if (!withUnchanged) { | |
216 | // For some iterators, move past the current range | |
217 | // so that findSourceIndex() has to look before the current index. | |
218 | ei2.next(errorCode); | |
219 | ei2.next(errorCode); | |
220 | } | |
221 | } | |
222 | ||
223 | expSrcIndex += expect.oldLength; | |
224 | expDestIndex += expect.newLength; | |
225 | if (expect.change) { | |
226 | expReplIndex += expect.newLength; | |
227 | } | |
228 | } | |
229 | UnicodeString msg = UnicodeString(name).append(u" end"); | |
230 | test.assertFalse(msg + u":" + __LINE__, ei1.next(errorCode)); | |
231 | test.assertFalse(msg + u":" + __LINE__, ei1.hasChange()); | |
232 | test.assertEquals(msg + u":" + __LINE__, 0, ei1.oldLength()); | |
233 | test.assertEquals(msg + u":" + __LINE__, 0, ei1.newLength()); | |
234 | test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei1.sourceIndex()); | |
235 | test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei1.destinationIndex()); | |
236 | test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex()); | |
237 | ||
238 | test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode)); | |
239 | test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode)); | |
240 | ||
241 | // Check mapping of all indexes against a simple implementation | |
242 | // that works on the expected changes. | |
243 | // Iterate once forward, once backward, to cover more runtime conditions. | |
244 | int32_t srcLength = expSrcIndex; | |
245 | int32_t destLength = expDestIndex; | |
246 | std::vector<int32_t> srcIndexes; | |
247 | std::vector<int32_t> destIndexes; | |
248 | srcIndexes.push_back(-1); | |
249 | destIndexes.push_back(-1); | |
250 | int32_t srcIndex = 0; | |
251 | int32_t destIndex = 0; | |
252 | for (int32_t i = 0; i < expLength; ++i) { | |
253 | if (expected[i].oldLength > 0) { | |
254 | srcIndexes.push_back(srcIndex); | |
255 | if (expected[i].oldLength > 1) { | |
256 | srcIndexes.push_back(srcIndex + 1); | |
257 | if (expected[i].oldLength > 2) { | |
258 | srcIndexes.push_back(srcIndex + expected[i].oldLength - 1); | |
259 | } | |
260 | } | |
261 | } | |
262 | if (expected[i].newLength > 0) { | |
263 | destIndexes.push_back(destIndex); | |
264 | if (expected[i].newLength > 1) { | |
265 | destIndexes.push_back(destIndex + 1); | |
266 | if (expected[i].newLength > 2) { | |
267 | destIndexes.push_back(destIndex + expected[i].newLength - 1); | |
268 | } | |
269 | } | |
270 | } | |
271 | srcIndex += expected[i].oldLength; | |
272 | destIndex += expected[i].newLength; | |
273 | } | |
274 | srcIndexes.push_back(srcLength); | |
275 | destIndexes.push_back(destLength); | |
276 | srcIndexes.push_back(srcLength + 1); | |
277 | destIndexes.push_back(destLength + 1); | |
278 | std::reverse(destIndexes.begin(), destIndexes.end()); | |
279 | // Zig-zag across the indexes to stress next() <-> previous(). | |
280 | static const int32_t ZIG_ZAG[] = { 0, 1, 2, 3, 2, 1 }; | |
281 | for (auto i = 0; i < (int32_t)srcIndexes.size(); ++i) { | |
282 | for (int32_t ij = 0; ij < UPRV_LENGTHOF(ZIG_ZAG); ++ij) { | |
283 | int32_t j = ZIG_ZAG[ij]; | |
284 | if ((i + j) < (int32_t)srcIndexes.size()) { | |
285 | int32_t si = srcIndexes[i + j]; | |
286 | test.assertEquals(name + u" destIndexFromSrc(" + si + u"):" + __LINE__, | |
287 | destIndexFromSrc(expected, expLength, srcLength, destLength, si), | |
288 | ei2.destinationIndexFromSourceIndex(si, errorCode)); | |
289 | } | |
290 | } | |
291 | } | |
292 | for (auto i = 0; i < (int32_t)destIndexes.size(); ++i) { | |
293 | for (int32_t ij = 0; ij < UPRV_LENGTHOF(ZIG_ZAG); ++ij) { | |
294 | int32_t j = ZIG_ZAG[ij]; | |
295 | if ((i + j) < (int32_t)destIndexes.size()) { | |
296 | int32_t di = destIndexes[i + j]; | |
297 | test.assertEquals(name + u" srcIndexFromDest(" + di + u"):" + __LINE__, | |
298 | srcIndexFromDest(expected, expLength, srcLength, destLength, di), | |
299 | ei2.sourceIndexFromDestinationIndex(di, errorCode)); | |
300 | } | |
301 | } | |
302 | } | |
303 | } |