-
-//--------------------------------------------------------------------------------------
-//
-// confusableLookup() This is the heart of the confusable skeleton generation
-// implementation.
-//
-// Given a source character, produce the corresponding
-// replacement character(s)
-//
-//---------------------------------------------------------------------------------------
-int32_t SpoofImpl::confusableLookup(UChar32 inChar, int32_t tableMask, UChar *destBuf) const {
-
- // Binary search the spoof data key table for the inChar
- int32_t *low = fSpoofData->fCFUKeys;
- int32_t *mid = NULL;
- int32_t *limit = low + fSpoofData->fRawData->fCFUKeysSize;
- UChar32 midc;
- do {
- int32_t delta = ((int32_t)(limit-low))/2;
- mid = low + delta;
- midc = *mid & 0x1fffff;
- if (inChar == midc) {
- goto foundChar;
- } else if (inChar < midc) {
- limit = mid;
- } else {
- low = mid;
- }
- } while (low < limit-1);
- mid = low;
- midc = *mid & 0x1fffff;
- if (inChar != midc) {
- // Char not found. It maps to itself.
- int i = 0;
- U16_APPEND_UNSAFE(destBuf, i, inChar)
- return i;
- }
- foundChar:
- int32_t keyFlags = *mid & 0xff000000;
- if ((keyFlags & tableMask) == 0) {
- // We found the right key char, but the entry doesn't pertain to the
- // table we need. See if there is an adjacent key that does
- if (keyFlags & USPOOF_KEY_MULTIPLE_VALUES) {
- int32_t *altMid;
- for (altMid = mid-1; (*altMid&0x00ffffff) == inChar; altMid--) {
- keyFlags = *altMid & 0xff000000;
- if (keyFlags & tableMask) {
- mid = altMid;
- goto foundKey;
- }
- }
- for (altMid = mid+1; (*altMid&0x00ffffff) == inChar; altMid++) {
- keyFlags = *altMid & 0xff000000;
- if (keyFlags & tableMask) {
- mid = altMid;
- goto foundKey;
- }
- }
- }
- // No key entry for this char & table.
- // The input char maps to itself.
- int i = 0;
- U16_APPEND_UNSAFE(destBuf, i, inChar)
- return i;
- }
-
- foundKey:
- int32_t stringLen = USPOOF_KEY_LENGTH_FIELD(keyFlags) + 1;
- int32_t keyTableIndex = (int32_t)(mid - fSpoofData->fCFUKeys);
-
- // Value is either a UChar (for strings of length 1) or
- // an index into the string table (for longer strings)
- uint16_t value = fSpoofData->fCFUValues[keyTableIndex];
- if (stringLen == 1) {
- destBuf[0] = value;
- return 1;
- }
-
- // String length of 4 from the above lookup is used for all strings of length >= 4.
- // For these, get the real length from the string lengths table,
- // which maps string table indexes to lengths.
- // All strings of the same length are stored contiguously in the string table.
- // 'value' from the lookup above is the starting index for the desired string.
-
- int32_t ix;
- if (stringLen == 4) {
- int32_t stringLengthsLimit = fSpoofData->fRawData->fCFUStringLengthsSize;
- for (ix = 0; ix < stringLengthsLimit; ix++) {
- if (fSpoofData->fCFUStringLengths[ix].fLastString >= value) {
- stringLen = fSpoofData->fCFUStringLengths[ix].fStrLength;
- break;
- }
- }
- U_ASSERT(ix < stringLengthsLimit);
- }
-
- U_ASSERT(value + stringLen <= fSpoofData->fRawData->fCFUStringTableLen);
- UChar *src = &fSpoofData->fCFUStrings[value];
- for (ix=0; ix<stringLen; ix++) {
- destBuf[ix] = src[ix];
- }
- return stringLen;
-}
-
-
-//---------------------------------------------------------------------------------------
-//
-// wholeScriptCheck()
-//
-// Input text is already normalized to NFD
-// Return the set of scripts, each of which can represent something that is
-// confusable with the input text. The script of the input text
-// is included; input consisting of characters from a single script will
-// always produce a result consisting of a set containing that script.
-//
-//---------------------------------------------------------------------------------------
-void SpoofImpl::wholeScriptCheck(
- const UChar *text, int32_t length, ScriptSet *result, UErrorCode &status) const {
-
- int32_t inputIdx = 0;
- UChar32 c;
-
- UTrie2 *table =
- (fChecks & USPOOF_ANY_CASE) ? fSpoofData->fAnyCaseTrie : fSpoofData->fLowerCaseTrie;
- result->setAll();
- while (inputIdx < length) {
- U16_NEXT(text, inputIdx, length, c);
- uint32_t index = utrie2_get32(table, c);
- if (index == 0) {
- // No confusables in another script for this char.
- // TODO: we should change the data to have sets with just the single script
- // bit for the script of this char. Gets rid of this special case.
- // Until then, grab the script from the char and intersect it with the set.
- UScriptCode cpScript = uscript_getScript(c, &status);
- U_ASSERT(cpScript > USCRIPT_INHERITED);
- result->intersect(cpScript);
- } else if (index == 1) {
- // Script == Common or Inherited. Nothing to do.
- } else {
- result->intersect(fSpoofData->fScriptSets[index]);
- }
- }
-}
-
-