X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/374ca955a76ecab1204ca8bfa63ff9238d998416..cecc3f9394f261e71def48cf396d137687dbd0a7:/icuSources/common/caniter.cpp diff --git a/icuSources/common/caniter.cpp b/icuSources/common/caniter.cpp index db9328c9..b28acfc8 100644 --- a/icuSources/common/caniter.cpp +++ b/icuSources/common/caniter.cpp @@ -1,7 +1,9 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ***************************************************************************** - * Copyright (C) 1996-2004, International Business Machines Corporation and * - * others. All Rights Reserved. * + * Copyright (C) 1996-2015, International Business Machines Corporation and + * others. All Rights Reserved. ***************************************************************************** */ @@ -9,14 +11,16 @@ #if !UCONFIG_NO_NORMALIZATION -#include "unicode/uset.h" -#include "unicode/ustring.h" -#include "hash.h" -#include "unormimp.h" #include "unicode/caniter.h" -#include "unicode/normlzr.h" +#include "unicode/normalizer2.h" #include "unicode/uchar.h" +#include "unicode/uniset.h" +#include "unicode/usetiter.h" +#include "unicode/ustring.h" +#include "unicode/utf16.h" #include "cmemory.h" +#include "hash.h" +#include "normalizer2impl.h" /** * This class allows one to iterate through all the strings that are canonically equivalent to a given @@ -51,29 +55,7 @@ Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMB *@author M. Davis *@draft */ -#if 0 -static UBool PROGRESS = FALSE; - -#include -#include "unicode/translit.h" - -UErrorCode status = U_ZERO_ERROR; - -// Just for testing - remove, not thread safe. -static const char* UToS(const UnicodeString &source) { - static char buffer[256]; - buffer[source.extract(0, source.length(), buffer)] = 0; - return buffer; -} -static const UnicodeString &Tr(const UnicodeString &source) { - static Transliterator *NAME = Transliterator::createInstance("name", UTRANS_FORWARD, status); - static UnicodeString result; - result = source; - NAME->transliterate(result); - return result; -} -#endif // public U_NAMESPACE_BEGIN @@ -90,9 +72,11 @@ CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode pieces_length(0), pieces_lengths(NULL), current(NULL), - current_length(0) + current_length(0), + nfd(*Normalizer2::getNFDInstance(status)), + nfcImpl(*Normalizer2Factory::getNFCImpl(status)) { - if(U_SUCCESS(status)) { + if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) { setSource(sourceStr, status); } } @@ -102,24 +86,26 @@ CanonicalIterator::~CanonicalIterator() { } void CanonicalIterator::cleanPieces() { - int32_t i = 0; - if(pieces != NULL) { - for(i = 0; i < pieces_length; i++) { - if(pieces[i] != NULL) { - delete[] pieces[i]; - } - } - uprv_free(pieces); - pieces = NULL; + int32_t i = 0; + if(pieces != NULL) { + for(i = 0; i < pieces_length; i++) { + if(pieces[i] != NULL) { + delete[] pieces[i]; + } + } + uprv_free(pieces); + pieces = NULL; + pieces_length = 0; + } if(pieces_lengths != NULL) { - uprv_free(pieces_lengths); + uprv_free(pieces_lengths); + pieces_lengths = NULL; } - pieces_lengths = NULL; if(current != NULL) { - uprv_free(current); + uprv_free(current); + current = NULL; + current_length = 0; } - current = NULL; - } } /** @@ -180,7 +166,13 @@ UnicodeString CanonicalIterator::next() { * while changing the source string, saving object creation. */ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &status) { - Normalizer::normalize(newSource, UNORM_NFD, 0, source, status); + int32_t list_length = 0; + UChar32 cp = 0; + int32_t start = 0; + int32_t i = 0; + UnicodeString *list = NULL; + + nfd.normalize(newSource, source, status); if(U_FAILURE(status)) { return; } @@ -190,69 +182,44 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st // catch degenerate case if (newSource.length() == 0) { - pieces_length = 1; pieces = (UnicodeString **)uprv_malloc(sizeof(UnicodeString *)); - /* test for NULL */ - if (pieces == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - current_length = 1; + pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t)); + pieces_length = 1; current = (int32_t*)uprv_malloc(1 * sizeof(int32_t)); - /* test for NULL */ - if (current == NULL) { + current_length = 1; + if (pieces == NULL || pieces_lengths == NULL || current == NULL) { status = U_MEMORY_ALLOCATION_ERROR; - uprv_free(pieces); - pieces = NULL; - return; + goto CleanPartialInitialization; } current[0] = 0; pieces[0] = new UnicodeString[1]; - /* test for NULL */ + pieces_lengths[0] = 1; if (pieces[0] == 0) { status = U_MEMORY_ALLOCATION_ERROR; - uprv_free(pieces); - pieces = NULL; - uprv_free(current); - return; + goto CleanPartialInitialization; } - pieces[0][0] = UnicodeString(); - pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t)); - /* test for NULL */ - if (pieces_lengths == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - uprv_free(pieces); - pieces = NULL; - uprv_free(current); - return; - } - pieces_lengths[0] = 1; return; } - UnicodeString *list = new UnicodeString[source.length()]; - /* test for NULL */ + list = new UnicodeString[source.length()]; if (list == 0) { status = U_MEMORY_ALLOCATION_ERROR; - return; + goto CleanPartialInitialization; } - int32_t list_length = 0; - UChar32 cp = 0; - int32_t start = 0; // i should initialy be the number of code units at the // start of the string - int32_t i = UTF16_CHAR_LENGTH(source.char32At(0)); + i = U16_LENGTH(source.char32At(0)); //int32_t i = 1; // find the segments // This code iterates through the source string and // extracts segments that end up on a codepoint that // doesn't start any decompositions. (Analysis is done // on the NFD form - see above). - for (; i < source.length(); i += UTF16_CHAR_LENGTH(cp)) { + for (; i < source.length(); i += U16_LENGTH(cp)) { cp = source.char32At(i); - if (unorm_isCanonSafeStart(cp)) { + if (nfcImpl.isCanonSegmentStarter(cp)) { source.extract(start, i-start, list[list_length++]); // add up to i start = i; } @@ -262,36 +229,17 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st // allocate the arrays, and find the strings that are CE to each segment pieces = (UnicodeString **)uprv_malloc(list_length * sizeof(UnicodeString *)); - /* test for NULL */ - if (pieces == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - delete[] list; - return; - } pieces_length = list_length; pieces_lengths = (int32_t*)uprv_malloc(list_length * sizeof(int32_t)); - /* test for NULL */ - if (pieces_lengths == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - delete[] list; - uprv_free(pieces); - pieces = NULL; - return; - } - - current_length = list_length; current = (int32_t*)uprv_malloc(list_length * sizeof(int32_t)); - /* test for NULL */ - if (current == 0) { + current_length = list_length; + if (pieces == NULL || pieces_lengths == NULL || current == NULL) { status = U_MEMORY_ALLOCATION_ERROR; - delete[] list; - uprv_free(pieces); - pieces = NULL; - uprv_free(pieces_lengths); - return; + goto CleanPartialInitialization; } + for (i = 0; i < current_length; i++) { - current[i] = 0; + current[i] = 0; } // for each segment, get all the combinations that can produce // it after NFD normalization @@ -301,6 +249,13 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st } delete[] list; + return; +// Common section to cleanup all local variables and reset object variables. +CleanPartialInitialization: + if (list != NULL) { + delete[] list; + } + cleanPieces(); } /** @@ -311,7 +266,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st */ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) { if(U_FAILURE(status)) { - return; + return; } //if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source))); int32_t i = 0; @@ -320,32 +275,28 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros // if zero or one character, just return a set with it // we check for length < 2 to keep from counting code points all the time if (source.length() <= 2 && source.countChar32() <= 1) { - UnicodeString *toPut = new UnicodeString(source); - /* test for NULL */ - if (toPut == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - result->put(source, toPut, status); - return; + UnicodeString *toPut = new UnicodeString(source); + /* test for NULL */ + if (toPut == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + result->put(source, toPut, status); + return; } // otherwise iterate through the string, and recursively permute all the other characters UChar32 cp; - Hashtable *subpermute = new Hashtable(status); - /* test for NULL */ - if (subpermute == 0) { - status = U_MEMORY_ALLOCATION_ERROR; + Hashtable subpermute(status); + if(U_FAILURE(status)) { return; } - if (U_SUCCESS(status)) { - subpermute->setValueDeleter(uhash_deleteUnicodeString); - } + subpermute.setValueDeleter(uprv_deleteUObject); - for (i = 0; i < source.length(); i += UTF16_CHAR_LENGTH(cp)) { + for (i = 0; i < source.length(); i += U16_LENGTH(cp)) { cp = source.char32At(i); const UHashElement *ne = NULL; - int32_t el = -1; + int32_t el = UHASH_FIRST; UnicodeString subPermuteString = source; // optimization: @@ -356,37 +307,34 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros continue; } - subpermute->removeAll(); + subpermute.removeAll(); // see what the permutations of the characters before and after this one are //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp))); - permute(subPermuteString.replace(i, UTF16_CHAR_LENGTH(cp), NULL, 0), skipZeros, subpermute, status); + permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status); /* Test for buffer overflows */ if(U_FAILURE(status)) { - delete subpermute; return; } - // The upper replace is destructive. The question is do we have to make a copy, or we don't care about the contents + // The upper remove is destructive. The question is do we have to make a copy, or we don't care about the contents // of source at this point. // prefix this character to all of them - ne = subpermute->nextElement(el); + ne = subpermute.nextElement(el); while (ne != NULL) { - UnicodeString *permRes = (UnicodeString *)(ne->value.pointer); - UnicodeString *chStr = new UnicodeString(cp); - //test for NULL - if (chStr == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - delete subpermute; - return; - } + UnicodeString *permRes = (UnicodeString *)(ne->value.pointer); + UnicodeString *chStr = new UnicodeString(cp); + //test for NULL + if (chStr == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } chStr->append(*permRes); //*((UnicodeString *)(ne->value.pointer)); //if (PROGRESS) printf(" Piece: %s\n", UToS(*chStr)); result->put(*chStr, chStr, status); - ne = subpermute->nextElement(el); + ne = subpermute.nextElement(el); } } - delete subpermute; //return result; } @@ -394,188 +342,154 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros // we have a segment, in NFD. Find all the strings that are canonically equivalent to it. UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status) { - //private String[] getEquivalents(String segment) - - Hashtable *result = new Hashtable(status); - /* test for NULL */ - if (result == 0) { - status = U_MEMORY_ALLOCATION_ERROR; + Hashtable result(status); + Hashtable permutations(status); + Hashtable basic(status); + if (U_FAILURE(status)) { return 0; } - if (U_SUCCESS(status)) { - result->setValueDeleter(uhash_deleteUnicodeString); - } + result.setValueDeleter(uprv_deleteUObject); + permutations.setValueDeleter(uprv_deleteUObject); + basic.setValueDeleter(uprv_deleteUObject); + UChar USeg[256]; int32_t segLen = segment.extract(USeg, 256, status); - Hashtable *basic = getEquivalents2(USeg, segLen, status); - //Hashtable *basic = getEquivalents2(segment, segLen, status); + getEquivalents2(&basic, USeg, segLen, status); // now get all the permutations // add only the ones that are canonically equivalent // TODO: optimize by not permuting any class zero. - Hashtable *permutations = new Hashtable(status); - /* test for NULL */ - if (permutations == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - delete result; - delete basic; - return 0; - } - if (U_SUCCESS(status)) { - permutations->setValueDeleter(uhash_deleteUnicodeString); - } - const UHashElement *ne = NULL; - int32_t el = -1; + int32_t el = UHASH_FIRST; //Iterator it = basic.iterator(); - ne = basic->nextElement(el); + ne = basic.nextElement(el); //while (it.hasNext()) while (ne != NULL) { //String item = (String) it.next(); UnicodeString item = *((UnicodeString *)(ne->value.pointer)); - permutations->removeAll(); - permute(item, CANITER_SKIP_ZEROES, permutations, status); + permutations.removeAll(); + permute(item, CANITER_SKIP_ZEROES, &permutations, status); const UHashElement *ne2 = NULL; - int32_t el2 = -1; + int32_t el2 = UHASH_FIRST; //Iterator it2 = permutations.iterator(); - ne2 = permutations->nextElement(el2); + ne2 = permutations.nextElement(el2); //while (it2.hasNext()) while (ne2 != NULL) { //String possible = (String) it2.next(); //UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer))); UnicodeString possible(*((UnicodeString *)(ne2->value.pointer))); UnicodeString attempt; - Normalizer::normalize(possible, UNORM_NFD, 0, attempt, status); + nfd.normalize(possible, attempt, status); // TODO: check if operator == is semanticaly the same as attempt.equals(segment) if (attempt==segment) { //if (PROGRESS) printf("Adding Permutation: %s\n", UToS(Tr(*possible))); // TODO: use the hashtable just to catch duplicates - store strings directly (somehow). - result->put(possible, new UnicodeString(possible), status); //add(possible); + result.put(possible, new UnicodeString(possible), status); //add(possible); } else { //if (PROGRESS) printf("-Skipping Permutation: %s\n", UToS(Tr(*possible))); } - ne2 = permutations->nextElement(el2); + ne2 = permutations.nextElement(el2); } - ne = basic->nextElement(el); + ne = basic.nextElement(el); } /* Test for buffer overflows */ if(U_FAILURE(status)) { - delete result; - delete permutations; - delete basic; return 0; } // convert into a String[] to clean up storage //String[] finalResult = new String[result.size()]; UnicodeString *finalResult = NULL; int32_t resultCount; - if((resultCount = result->count())) { - finalResult = new UnicodeString[resultCount]; - } else { - status = U_ILLEGAL_ARGUMENT_ERROR; - } - /* test for NULL */ - if (finalResult == 0) { - if(U_SUCCESS(status)) { - status = U_MEMORY_ALLOCATION_ERROR; - } - delete result; - delete permutations; - delete basic; - return 0; + if((resultCount = result.count()) != 0) { + finalResult = new UnicodeString[resultCount]; + if (finalResult == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + } + else { + status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; } //result.toArray(finalResult); result_len = 0; - el = -1; - ne = result->nextElement(el); + el = UHASH_FIRST; + ne = result.nextElement(el); while(ne != NULL) { - UnicodeString finResult = *((UnicodeString *)(ne->value.pointer)); - finalResult[result_len++] = finResult; - ne = result->nextElement(el); + finalResult[result_len++] = *((UnicodeString *)(ne->value.pointer)); + ne = result.nextElement(el); } - delete permutations; - delete basic; - delete result; return finalResult; } -Hashtable *CanonicalIterator::getEquivalents2(const UChar *segment, int32_t segLen, UErrorCode &status) { -//Hashtable *CanonicalIterator::getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status) { +Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) { - Hashtable *result = new Hashtable(status); - /* test for NULL */ - if (result == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - if (U_SUCCESS(status)) { - result->setValueDeleter(uhash_deleteUnicodeString); + if (U_FAILURE(status)) { + return NULL; } //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment))); UnicodeString toPut(segment, segLen); - result->put(toPut, new UnicodeString(toPut), status); + fillinResult->put(toPut, new UnicodeString(toPut), status); - USerializedSet starts; + UnicodeSet starts; // cycle through all the characters - UChar32 cp, end = 0; - int32_t i = 0, j; - for (i = 0; i < segLen; i += UTF16_CHAR_LENGTH(cp)) { + UChar32 cp; + for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) { // see if any character is at the start of some decomposition - UTF_GET_CHAR(segment, 0, i, segLen, cp); - if (!unorm_getCanonStartSet(cp, &starts)) { - continue; + U16_GET(segment, 0, i, segLen, cp); + if (!nfcImpl.getCanonStartSet(cp, starts)) { + continue; } - // if so, see which decompositions match - for(j = 0, cp = end+1; cp <= end || uset_getSerializedRange(&starts, j++, &cp, &end); ++cp) { - //Hashtable *remainder = extract(cp, segment, segLen, i, status); - Hashtable *remainder = extract(cp, segment, segLen, i, status); - if (remainder == NULL) continue; + // if so, see which decompositions match + UnicodeSetIterator iter(starts); + while (iter.next()) { + UChar32 cp2 = iter.getCodepoint(); + Hashtable remainder(status); + remainder.setValueDeleter(uprv_deleteUObject); + if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) { + continue; + } // there were some matches, so add all the possibilities to the set. UnicodeString prefix(segment, i); - prefix += cp; + prefix += cp2; - const UHashElement *ne = NULL; - int32_t el = -1; - ne = remainder->nextElement(el); + int32_t el = UHASH_FIRST; + const UHashElement *ne = remainder.nextElement(el); while (ne != NULL) { UnicodeString item = *((UnicodeString *)(ne->value.pointer)); UnicodeString *toAdd = new UnicodeString(prefix); /* test for NULL */ if (toAdd == 0) { status = U_MEMORY_ALLOCATION_ERROR; - delete result; - delete remainder; - return 0; + return NULL; } *toAdd += item; - result->put(*toAdd, toAdd, status); + fillinResult->put(*toAdd, toAdd, status); //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd))); - ne = remainder->nextElement(el); + ne = remainder.nextElement(el); } - - delete remainder; } } /* Test for buffer overflows */ if(U_FAILURE(status)) { - return 0; + return NULL; } - return result; + return fillinResult; } /** @@ -583,78 +497,55 @@ Hashtable *CanonicalIterator::getEquivalents2(const UChar *segment, int32_t segL * (with canonical rearrangment!) * If so, take the remainder, and return the equivalents */ -Hashtable *CanonicalIterator::extract(UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) { +Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) { //Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) { //if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp)))); //if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos); - const int32_t bufSize = 256; - int32_t bufLen = 0; - UChar temp[bufSize]; - - int32_t inputLen = 0, decompLen; - UChar stackBuffer[4]; - const UChar *decomp; - - U16_APPEND_UNSAFE(temp, inputLen, comp); - decomp = unorm_getCanonicalDecomposition(comp, stackBuffer, &decompLen); - if(decomp == NULL) { - /* copy temp */ - stackBuffer[0] = temp[0]; - if(inputLen > 1) { - stackBuffer[1] = temp[1]; - } - decomp = stackBuffer; - decompLen = inputLen; + if (U_FAILURE(status)) { + return NULL; } - UChar *buff = temp+inputLen; + UnicodeString temp(comp); + int32_t inputLen=temp.length(); + UnicodeString decompString; + nfd.normalize(temp, decompString, status); + if (U_FAILURE(status)) { + return NULL; + } + if (decompString.isBogus()) { + status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + const UChar *decomp=decompString.getBuffer(); + int32_t decompLen=decompString.length(); // See if it matches the start of segment (at segmentPos) UBool ok = FALSE; UChar32 cp; int32_t decompPos = 0; UChar32 decompCp; - UTF_NEXT_CHAR(decomp, decompPos, decompLen, decompCp); - - int32_t i; - UBool overflow = FALSE; + U16_NEXT(decomp, decompPos, decompLen, decompCp); - i = segmentPos; + int32_t i = segmentPos; while(i < segLen) { - UTF_NEXT_CHAR(segment, i, segLen, cp); + U16_NEXT(segment, i, segLen, cp); if (cp == decompCp) { // if equal, eat another cp from decomp //if (PROGRESS) printf(" matches: %s\n", UToS(Tr(UnicodeString(cp)))); if (decompPos == decompLen) { // done, have all decomp characters! - //u_strcat(buff+bufLen, segment+i); - uprv_memcpy(buff+bufLen, segment+i, (segLen-i)*sizeof(UChar)); - bufLen+=segLen-i; - + temp.append(segment+i, segLen-i); ok = TRUE; break; } - UTF_NEXT_CHAR(decomp, decompPos, decompLen, decompCp); + U16_NEXT(decomp, decompPos, decompLen, decompCp); } else { //if (PROGRESS) printf(" buffer: %s\n", UToS(Tr(UnicodeString(cp)))); // brute force approach - - U16_APPEND(buff, bufLen, bufSize, cp, overflow); - - if(overflow) { - /* - * ### TODO handle buffer overflow - * The buffer is large, but an overflow may still happen with - * unusual input (many combining marks?). - * Reallocate buffer and continue. - * markus 20020929 - */ - - overflow = FALSE; - } + temp.append(cp); /* TODO: optimize // since we know that the classes are monotonically increasing, after zero @@ -669,39 +560,25 @@ Hashtable *CanonicalIterator::extract(UChar32 comp, const UChar *segment, int32_ */ } } - if (!ok) return NULL; // we failed, characters left over + if (!ok) + return NULL; // we failed, characters left over //if (PROGRESS) printf("Matches\n"); - if (bufLen == 0) { - Hashtable *result = new Hashtable(status); - /* test for NULL */ - if (result == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - result->setValueDeleter(uhash_deleteUnicodeString); - result->put(UnicodeString(), new UnicodeString(), status); - return result; // succeed, but no remainder + if (inputLen == temp.length()) { + fillinResult->put(UnicodeString(), new UnicodeString(), status); + return fillinResult; // succeed, but no remainder } // brute force approach // check to make sure result is canonically equivalent - int32_t tempLen = inputLen + bufLen; - - UChar trial[bufSize]; - unorm_decompose(trial, bufSize, temp, tempLen, FALSE, 0, &status); - - /* Test for buffer overflows */ - if(U_FAILURE(status)) { - return 0; - } - - if(uprv_memcmp(segment+segmentPos, trial, (segLen - segmentPos)*sizeof(UChar)) != 0) { - return NULL; + UnicodeString trial; + nfd.normalize(temp, trial, status); + if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) { + return NULL; } - return getEquivalents2(buff, bufLen, status); + return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status); } U_NAMESPACE_END