]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
729e4ab9 A |
3 | /* |
4 | ****************************************************************************** | |
2ca993e8 | 5 | * Copyright (C) 1996-2016, International Business Machines |
57a6839d | 6 | * Corporation and others. All Rights Reserved. |
729e4ab9 A |
7 | ****************************************************************************** |
8 | */ | |
9 | ||
10 | #include "unicode/utypes.h" | |
11 | ||
12 | #if !UCONFIG_NO_COLLATION | |
13 | ||
14 | #include "unicode/unistr.h" | |
729e4ab9 A |
15 | #include "unicode/usearch.h" |
16 | ||
17 | #include "cmemory.h" | |
18 | #include "unicode/coll.h" | |
19 | #include "unicode/tblcoll.h" | |
20 | #include "unicode/coleitr.h" | |
21 | #include "unicode/ucoleitr.h" | |
22 | ||
23 | #include "unicode/regex.h" // TODO: make conditional on regexp being built. | |
24 | ||
25 | #include "unicode/uniset.h" | |
26 | #include "unicode/uset.h" | |
57a6839d | 27 | #include "unicode/usetiter.h" |
729e4ab9 A |
28 | #include "unicode/ustring.h" |
29 | #include "hash.h" | |
57a6839d | 30 | #include "normalizer2impl.h" |
729e4ab9 | 31 | #include "uhash.h" |
57a6839d | 32 | #include "usrchimp.h" |
4388f060 | 33 | #include "uassert.h" |
729e4ab9 | 34 | |
51004dcb | 35 | #include "colldata.h" |
729e4ab9 | 36 | |
a62d09fc | 37 | #define NEW_ARRAY(type, count) (type *) uprv_malloc((size_t)(count) * sizeof(type)) |
729e4ab9 | 38 | #define DELETE_ARRAY(array) uprv_free((void *) (array)) |
a62d09fc | 39 | #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (size_t)(count) * sizeof (src)[0]) |
729e4ab9 | 40 | |
729e4ab9 A |
41 | CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status) |
42 | : ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0) | |
43 | { | |
44 | UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status); | |
45 | UCollationStrength strength = ucol_getStrength(coll); | |
46 | UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED; | |
47 | uint32_t variableTop = ucol_getVariableTop(coll, &status); | |
48 | uint32_t strengthMask = 0; | |
49 | int32_t order; | |
50 | ||
51 | if (U_FAILURE(status)) { | |
52 | return; | |
53 | } | |
54 | ||
55 | // **** only set flag if string has Han(gul) **** | |
57a6839d | 56 | // ucol_forceHanImplicit(elems, &status); -- removed for ticket #10476 |
729e4ab9 A |
57 | |
58 | switch (strength) | |
59 | { | |
60 | default: | |
61 | strengthMask |= UCOL_TERTIARYORDERMASK; | |
2ca993e8 | 62 | U_FALLTHROUGH; |
729e4ab9 A |
63 | case UCOL_SECONDARY: |
64 | strengthMask |= UCOL_SECONDARYORDERMASK; | |
2ca993e8 | 65 | U_FALLTHROUGH; |
729e4ab9 A |
66 | case UCOL_PRIMARY: |
67 | strengthMask |= UCOL_PRIMARYORDERMASK; | |
68 | } | |
69 | ||
729e4ab9 A |
70 | ces = ceBuffer; |
71 | ||
72 | while ((order = ucol_next(elems, &status)) != UCOL_NULLORDER) { | |
73 | UBool cont = isContinuation(order); | |
74 | ||
75 | order &= strengthMask; | |
76 | ||
77 | if (toShift && variableTop > (uint32_t)order && (order & UCOL_PRIMARYORDERMASK) != 0) { | |
78 | if (strength >= UCOL_QUATERNARY) { | |
79 | order &= UCOL_PRIMARYORDERMASK; | |
80 | } else { | |
81 | order = UCOL_IGNORABLE; | |
82 | } | |
83 | } | |
84 | ||
85 | if (order == UCOL_IGNORABLE) { | |
86 | continue; | |
87 | } | |
88 | ||
89 | if (cont) { | |
90 | order |= UCOL_CONTINUATION_MARKER; | |
91 | } | |
92 | ||
93 | add(order, status); | |
94 | } | |
95 | ||
96 | ucol_closeElements(elems); | |
97 | } | |
98 | ||
99 | CEList::~CEList() | |
100 | { | |
729e4ab9 A |
101 | if (ces != ceBuffer) { |
102 | DELETE_ARRAY(ces); | |
103 | } | |
104 | } | |
105 | ||
106 | void CEList::add(uint32_t ce, UErrorCode &status) | |
107 | { | |
108 | if (U_FAILURE(status)) { | |
109 | return; | |
110 | } | |
111 | ||
112 | if (listSize >= listMax) { | |
113 | int32_t newMax = listMax + CELIST_BUFFER_SIZE; | |
729e4ab9 A |
114 | uint32_t *newCEs = NEW_ARRAY(uint32_t, newMax); |
115 | ||
116 | if (newCEs == NULL) { | |
117 | status = U_MEMORY_ALLOCATION_ERROR; | |
118 | return; | |
119 | } | |
120 | ||
121 | uprv_memcpy(newCEs, ces, listSize * sizeof(uint32_t)); | |
122 | ||
123 | if (ces != ceBuffer) { | |
124 | DELETE_ARRAY(ces); | |
125 | } | |
126 | ||
127 | ces = newCEs; | |
128 | listMax = newMax; | |
129 | } | |
130 | ||
131 | ces[listSize++] = ce; | |
132 | } | |
133 | ||
134 | uint32_t CEList::get(int32_t index) const | |
135 | { | |
136 | if (index >= 0 && index < listSize) { | |
137 | return ces[index]; | |
138 | } | |
139 | ||
51004dcb | 140 | return (uint32_t)UCOL_NULLORDER; |
729e4ab9 A |
141 | } |
142 | ||
143 | uint32_t &CEList::operator[](int32_t index) const | |
144 | { | |
145 | return ces[index]; | |
146 | } | |
147 | ||
148 | UBool CEList::matchesAt(int32_t offset, const CEList *other) const | |
149 | { | |
150 | if (other == NULL || listSize - offset < other->size()) { | |
151 | return FALSE; | |
152 | } | |
153 | ||
154 | for (int32_t i = offset, j = 0; j < other->size(); i += 1, j += 1) { | |
155 | if (ces[i] != (*other)[j]) { | |
156 | return FALSE; | |
157 | } | |
158 | } | |
159 | ||
160 | return TRUE; | |
161 | } | |
162 | ||
163 | int32_t CEList::size() const | |
164 | { | |
165 | return listSize; | |
166 | } | |
167 | ||
729e4ab9 A |
168 | StringList::StringList(UErrorCode &status) |
169 | : strings(NULL), listMax(STRING_LIST_BUFFER_SIZE), listSize(0) | |
170 | { | |
171 | if (U_FAILURE(status)) { | |
172 | return; | |
173 | } | |
174 | ||
175 | strings = new UnicodeString [listMax]; | |
176 | ||
177 | if (strings == NULL) { | |
178 | status = U_MEMORY_ALLOCATION_ERROR; | |
179 | return; | |
180 | } | |
729e4ab9 A |
181 | } |
182 | ||
183 | StringList::~StringList() | |
184 | { | |
185 | delete[] strings; | |
186 | } | |
187 | ||
188 | void StringList::add(const UnicodeString *string, UErrorCode &status) | |
189 | { | |
190 | if (U_FAILURE(status)) { | |
191 | return; | |
192 | } | |
729e4ab9 A |
193 | if (listSize >= listMax) { |
194 | int32_t newMax = listMax + STRING_LIST_BUFFER_SIZE; | |
729e4ab9 | 195 | UnicodeString *newStrings = new UnicodeString[newMax]; |
4388f060 A |
196 | if (newStrings == NULL) { |
197 | status = U_MEMORY_ALLOCATION_ERROR; | |
198 | return; | |
199 | } | |
200 | for (int32_t i=0; i<listSize; ++i) { | |
201 | newStrings[i] = strings[i]; | |
202 | } | |
729e4ab9 A |
203 | delete[] strings; |
204 | strings = newStrings; | |
205 | listMax = newMax; | |
206 | } | |
207 | ||
208 | // The ctor initialized all the strings in | |
209 | // the array to empty strings, so this | |
210 | // is the same as copying the source string. | |
211 | strings[listSize++].append(*string); | |
212 | } | |
213 | ||
214 | void StringList::add(const UChar *chars, int32_t count, UErrorCode &status) | |
215 | { | |
216 | const UnicodeString string(chars, count); | |
217 | ||
218 | add(&string, status); | |
219 | } | |
220 | ||
221 | const UnicodeString *StringList::get(int32_t index) const | |
222 | { | |
223 | if (index >= 0 && index < listSize) { | |
224 | return &strings[index]; | |
225 | } | |
226 | ||
227 | return NULL; | |
228 | } | |
229 | ||
230 | int32_t StringList::size() const | |
231 | { | |
232 | return listSize; | |
233 | } | |
234 | ||
235 | ||
51004dcb A |
236 | U_CDECL_BEGIN |
237 | static void U_CALLCONV | |
238 | deleteStringList(void *obj) | |
239 | { | |
240 | StringList *strings = (StringList *) obj; | |
241 | ||
242 | delete strings; | |
243 | } | |
244 | U_CDECL_END | |
729e4ab9 | 245 | |
51004dcb | 246 | class CEToStringsMap |
729e4ab9 A |
247 | { |
248 | public: | |
729e4ab9 A |
249 | CEToStringsMap(UErrorCode &status); |
250 | ~CEToStringsMap(); | |
251 | ||
252 | void put(uint32_t ce, UnicodeString *string, UErrorCode &status); | |
253 | StringList *getStringList(uint32_t ce) const; | |
254 | ||
255 | private: | |
729e4ab9 A |
256 | void putStringList(uint32_t ce, StringList *stringList, UErrorCode &status); |
257 | UHashtable *map; | |
258 | }; | |
259 | ||
260 | CEToStringsMap::CEToStringsMap(UErrorCode &status) | |
261 | : map(NULL) | |
262 | { | |
263 | if (U_FAILURE(status)) { | |
264 | return; | |
265 | } | |
266 | ||
267 | map = uhash_open(uhash_hashLong, uhash_compareLong, | |
268 | uhash_compareCaselessUnicodeString, | |
269 | &status); | |
270 | ||
271 | if (U_FAILURE(status)) { | |
272 | return; | |
273 | } | |
274 | ||
275 | uhash_setValueDeleter(map, deleteStringList); | |
276 | } | |
277 | ||
278 | CEToStringsMap::~CEToStringsMap() | |
279 | { | |
280 | uhash_close(map); | |
281 | } | |
282 | ||
283 | void CEToStringsMap::put(uint32_t ce, UnicodeString *string, UErrorCode &status) | |
284 | { | |
285 | StringList *strings = getStringList(ce); | |
286 | ||
287 | if (strings == NULL) { | |
288 | strings = new StringList(status); | |
289 | ||
290 | if (strings == NULL || U_FAILURE(status)) { | |
291 | status = U_MEMORY_ALLOCATION_ERROR; | |
292 | return; | |
293 | } | |
294 | ||
295 | putStringList(ce, strings, status); | |
296 | } | |
297 | ||
298 | strings->add(string, status); | |
299 | } | |
300 | ||
301 | StringList *CEToStringsMap::getStringList(uint32_t ce) const | |
302 | { | |
303 | return (StringList *) uhash_iget(map, ce); | |
304 | } | |
305 | ||
306 | void CEToStringsMap::putStringList(uint32_t ce, StringList *stringList, UErrorCode &status) | |
307 | { | |
308 | uhash_iput(map, ce, (void *) stringList, &status); | |
309 | } | |
310 | ||
729e4ab9 A |
311 | #define CLONE_COLLATOR |
312 | ||
51004dcb A |
313 | CollData::CollData(UCollator *collator, UErrorCode &status) |
314 | : coll(NULL), ceToCharsStartingWith(NULL) | |
729e4ab9 A |
315 | { |
316 | // [:c:] == [[:cn:][:cc:][:co:][:cf:][:cs:]] | |
317 | // i.e. other, control, private use, format, surrogate | |
318 | U_STRING_DECL(test_pattern, "[[:assigned:]-[:c:]]", 20); | |
319 | U_STRING_INIT(test_pattern, "[[:assigned:]-[:c:]]", 20); | |
320 | USet *charsToTest = uset_openPattern(test_pattern, 20, &status); | |
321 | ||
322 | // Han ext. A, Han, Jamo, Hangul, Han Ext. B | |
323 | // i.e. all the characers we handle implicitly | |
324 | U_STRING_DECL(remove_pattern, "[[\\u3400-\\u9FFF][\\u1100-\\u11F9][\\uAC00-\\uD7AF][\\U00020000-\\U0002A6DF]]", 70); | |
325 | U_STRING_INIT(remove_pattern, "[[\\u3400-\\u9FFF][\\u1100-\\u11F9][\\uAC00-\\uD7AF][\\U00020000-\\U0002A6DF]]", 70); | |
326 | USet *charsToRemove = uset_openPattern(remove_pattern, 70, &status); | |
327 | ||
328 | if (U_FAILURE(status)) { | |
329 | return; | |
330 | } | |
331 | ||
332 | USet *expansions = uset_openEmpty(); | |
333 | USet *contractions = uset_openEmpty(); | |
334 | int32_t itemCount; | |
335 | ||
729e4ab9 A |
336 | ceToCharsStartingWith = new CEToStringsMap(status); |
337 | ||
338 | if (U_FAILURE(status)) { | |
339 | goto bail; | |
340 | } | |
341 | ||
729e4ab9 A |
342 | #ifdef CLONE_COLLATOR |
343 | coll = ucol_safeClone(collator, NULL, NULL, &status); | |
344 | ||
345 | if (U_FAILURE(status)) { | |
346 | goto bail; | |
347 | } | |
348 | #else | |
349 | coll = collator; | |
350 | #endif | |
351 | ||
352 | ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status); | |
353 | ||
354 | uset_addAll(charsToTest, contractions); | |
355 | uset_addAll(charsToTest, expansions); | |
356 | uset_removeAll(charsToTest, charsToRemove); | |
357 | ||
358 | itemCount = uset_getItemCount(charsToTest); | |
359 | for(int32_t item = 0; item < itemCount; item += 1) { | |
360 | UChar32 start = 0, end = 0; | |
361 | UChar buffer[16]; | |
362 | int32_t len = uset_getItem(charsToTest, item, &start, &end, | |
363 | buffer, 16, &status); | |
364 | ||
365 | if (len == 0) { | |
366 | for (UChar32 ch = start; ch <= end; ch += 1) { | |
367 | UnicodeString *st = new UnicodeString(ch); | |
368 | ||
369 | if (st == NULL) { | |
370 | status = U_MEMORY_ALLOCATION_ERROR; | |
371 | break; | |
372 | } | |
373 | ||
374 | CEList *ceList = new CEList(coll, *st, status); | |
375 | ||
376 | ceToCharsStartingWith->put(ceList->get(0), st, status); | |
377 | ||
729e4ab9 A |
378 | delete ceList; |
379 | delete st; | |
729e4ab9 A |
380 | } |
381 | } else if (len > 0) { | |
382 | UnicodeString *st = new UnicodeString(buffer, len); | |
383 | ||
384 | if (st == NULL) { | |
385 | status = U_MEMORY_ALLOCATION_ERROR; | |
386 | break; | |
387 | } | |
388 | ||
389 | CEList *ceList = new CEList(coll, *st, status); | |
390 | ||
391 | ceToCharsStartingWith->put(ceList->get(0), st, status); | |
392 | ||
729e4ab9 A |
393 | delete ceList; |
394 | delete st; | |
729e4ab9 A |
395 | } else { |
396 | // shouldn't happen... | |
397 | } | |
398 | ||
399 | if (U_FAILURE(status)) { | |
400 | break; | |
401 | } | |
402 | } | |
403 | ||
404 | bail: | |
405 | uset_close(contractions); | |
406 | uset_close(expansions); | |
407 | uset_close(charsToRemove); | |
408 | uset_close(charsToTest); | |
409 | ||
410 | if (U_FAILURE(status)) { | |
411 | return; | |
412 | } | |
413 | ||
57a6839d A |
414 | UnicodeSet hanRanges(UNICODE_STRING_SIMPLE("[:Unified_Ideograph:]"), status); |
415 | if (U_FAILURE(status)) { | |
416 | return; | |
417 | } | |
418 | UnicodeSetIterator hanIter(hanRanges); | |
419 | UnicodeString hanString; | |
420 | while(hanIter.nextRange()) { | |
421 | hanString.append(hanIter.getCodepoint()); | |
422 | hanString.append(hanIter.getCodepointEnd()); | |
423 | } | |
424 | // TODO: Why U+11FF? The old code had an outdated UCOL_LAST_T_JAMO=0x11F9, | |
425 | // but as of Unicode 6.3 the 11xx block is filled, | |
426 | // and there are also more Jamo T at U+D7CB..U+D7FB. | |
427 | // Maybe use [:HST=T:] and look for the end of the last range? | |
428 | // Maybe use script boundary mappings instead of this code?? | |
429 | UChar jamoRanges[] = {Hangul::JAMO_L_BASE, Hangul::JAMO_V_BASE, Hangul::JAMO_T_BASE + 1, 0x11FF}; | |
2ca993e8 | 430 | UnicodeString jamoString(FALSE, jamoRanges, UPRV_LENGTHOF(jamoRanges)); |
729e4ab9 A |
431 | CEList hanList(coll, hanString, status); |
432 | CEList jamoList(coll, jamoString, status); | |
433 | int32_t j = 0; | |
434 | ||
435 | if (U_FAILURE(status)) { | |
436 | return; | |
437 | } | |
438 | ||
439 | for (int32_t c = 0; c < jamoList.size(); c += 1) { | |
440 | uint32_t jce = jamoList[c]; | |
441 | ||
442 | if (! isContinuation(jce)) { | |
443 | jamoLimits[j++] = jce; | |
444 | } | |
445 | } | |
446 | ||
447 | jamoLimits[3] += (1 << UCOL_PRIMARYORDERSHIFT); | |
448 | ||
449 | minHan = 0xFFFFFFFF; | |
450 | maxHan = 0; | |
451 | ||
452 | for(int32_t h = 0; h < hanList.size(); h += 2) { | |
453 | uint32_t han = (uint32_t) hanList[h]; | |
454 | ||
455 | if (han < minHan) { | |
456 | minHan = han; | |
457 | } | |
458 | ||
459 | if (han > maxHan) { | |
460 | maxHan = han; | |
461 | } | |
462 | } | |
463 | ||
464 | maxHan += (1 << UCOL_PRIMARYORDERSHIFT); | |
465 | } | |
466 | ||
467 | CollData::~CollData() | |
468 | { | |
469 | #ifdef CLONE_COLLATOR | |
470 | ucol_close(coll); | |
471 | #endif | |
472 | ||
729e4ab9 | 473 | delete ceToCharsStartingWith; |
729e4ab9 A |
474 | } |
475 | ||
476 | UCollator *CollData::getCollator() const | |
477 | { | |
478 | return coll; | |
479 | } | |
480 | ||
481 | const StringList *CollData::getStringList(int32_t ce) const | |
482 | { | |
483 | return ceToCharsStartingWith->getStringList(ce); | |
484 | } | |
485 | ||
486 | const CEList *CollData::getCEList(const UnicodeString *string) const | |
487 | { | |
729e4ab9 A |
488 | UErrorCode status = U_ZERO_ERROR; |
489 | const CEList *list = new CEList(coll, *string, status); | |
490 | ||
491 | if (U_FAILURE(status)) { | |
492 | delete list; | |
493 | list = NULL; | |
494 | } | |
495 | ||
496 | return list; | |
729e4ab9 A |
497 | } |
498 | ||
499 | void CollData::freeCEList(const CEList *list) | |
500 | { | |
729e4ab9 | 501 | delete list; |
729e4ab9 A |
502 | } |
503 | ||
504 | int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t *history) const | |
505 | { | |
506 | // find out shortest string for the longest sequence of ces. | |
507 | // this can probably be folded with the minLengthCache... | |
508 | ||
509 | if (history[offset] >= 0) { | |
510 | return history[offset]; | |
511 | } | |
512 | ||
513 | uint32_t ce = ceList->get(offset); | |
514 | int32_t maxOffset = ceList->size(); | |
515 | int32_t shortestLength = INT32_MAX; | |
516 | const StringList *strings = ceToCharsStartingWith->getStringList(ce); | |
517 | ||
518 | if (strings != NULL) { | |
519 | int32_t stringCount = strings->size(); | |
520 | ||
521 | for (int32_t s = 0; s < stringCount; s += 1) { | |
522 | const UnicodeString *string = strings->get(s); | |
729e4ab9 A |
523 | UErrorCode status = U_ZERO_ERROR; |
524 | const CEList *ceList2 = new CEList(coll, *string, status); | |
525 | ||
526 | if (U_FAILURE(status)) { | |
527 | delete ceList2; | |
528 | ceList2 = NULL; | |
529 | } | |
729e4ab9 A |
530 | |
531 | if (ceList->matchesAt(offset, ceList2)) { | |
4388f060 | 532 | U_ASSERT(ceList2 != NULL); |
729e4ab9 A |
533 | int32_t clength = ceList2->size(); |
534 | int32_t slength = string->length(); | |
535 | int32_t roffset = offset + clength; | |
536 | int32_t rlength = 0; | |
537 | ||
538 | if (roffset < maxOffset) { | |
539 | rlength = minLengthInChars(ceList, roffset, history); | |
540 | ||
541 | if (rlength <= 0) { | |
542 | // delete before continue to avoid memory leak. | |
729e4ab9 | 543 | delete ceList2; |
51004dcb | 544 | |
729e4ab9 A |
545 | // ignore any dead ends |
546 | continue; | |
547 | } | |
548 | } | |
549 | ||
550 | if (shortestLength > slength + rlength) { | |
551 | shortestLength = slength + rlength; | |
552 | } | |
553 | } | |
554 | ||
729e4ab9 | 555 | delete ceList2; |
729e4ab9 A |
556 | } |
557 | } | |
558 | ||
559 | if (shortestLength == INT32_MAX) { | |
560 | // No matching strings at this offset. See if | |
561 | // the CE is in a range we can handle manually. | |
562 | if (ce >= minHan && ce < maxHan) { | |
563 | // all han have implicit orders which | |
564 | // generate two CEs. | |
565 | int32_t roffset = offset + 2; | |
566 | int32_t rlength = 0; | |
567 | ||
568 | //history[roffset++] = -1; | |
569 | //history[roffset++] = 1; | |
570 | ||
571 | if (roffset < maxOffset) { | |
572 | rlength = minLengthInChars(ceList, roffset, history); | |
573 | } | |
574 | ||
575 | if (rlength < 0) { | |
576 | return -1; | |
577 | } | |
578 | ||
579 | shortestLength = 1 + rlength; | |
580 | goto have_shortest; | |
581 | } else if (ce >= jamoLimits[0] && ce < jamoLimits[3]) { | |
582 | int32_t roffset = offset; | |
583 | int32_t rlength = 0; | |
584 | ||
585 | // **** this loop may not handle archaic Hangul correctly **** | |
586 | for (int32_t j = 0; roffset < maxOffset && j < 4; j += 1, roffset += 1) { | |
587 | uint32_t jce = ceList->get(roffset); | |
588 | ||
589 | // Some Jamo have 24-bit primary order; skip the | |
590 | // 2nd CE. This should always be OK because if | |
591 | // we're still in the loop all we've seen are | |
592 | // a series of Jamo in LVT order. | |
593 | if (isContinuation(jce)) { | |
594 | continue; | |
595 | } | |
596 | ||
597 | if (j >= 3 || jce < jamoLimits[j] || jce >= jamoLimits[j + 1]) { | |
598 | break; | |
599 | } | |
600 | } | |
601 | ||
602 | if (roffset == offset) { | |
603 | // we started with a non-L Jamo... | |
604 | // just say it comes from a single character | |
605 | roffset += 1; | |
606 | ||
607 | // See if the single Jamo has a 24-bit order. | |
608 | if (roffset < maxOffset && isContinuation(ceList->get(roffset))) { | |
609 | roffset += 1; | |
610 | } | |
611 | } | |
612 | ||
613 | if (roffset < maxOffset) { | |
614 | rlength = minLengthInChars(ceList, roffset, history); | |
615 | } | |
616 | ||
617 | if (rlength < 0) { | |
618 | return -1; | |
619 | } | |
620 | ||
621 | shortestLength = 1 + rlength; | |
622 | goto have_shortest; | |
623 | } | |
624 | ||
625 | // Can't handle it manually either. Just move on. | |
626 | return -1; | |
627 | } | |
628 | ||
629 | have_shortest: | |
630 | history[offset] = shortestLength; | |
631 | ||
632 | return shortestLength; | |
633 | } | |
634 | ||
635 | int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset) const | |
636 | { | |
637 | int32_t clength = ceList->size(); | |
638 | int32_t *history = NEW_ARRAY(int32_t, clength); | |
639 | ||
640 | for (int32_t i = 0; i < clength; i += 1) { | |
641 | history[i] = -1; | |
642 | } | |
643 | ||
644 | int32_t minLength = minLengthInChars(ceList, offset, history); | |
645 | ||
646 | DELETE_ARRAY(history); | |
647 | ||
648 | return minLength; | |
649 | } | |
650 | ||
729e4ab9 | 651 | #endif // #if !UCONFIG_NO_COLLATION |