X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/46f4442e9a5a4f3b98b7c1083586332f6a8a99a4..4f1e1a09ce4daed860e35d359ce2fceccb0764e8:/icuSources/i18n/translit.cpp diff --git a/icuSources/i18n/translit.cpp b/icuSources/i18n/translit.cpp index a39d6e91..8ff0c9f5 100644 --- a/icuSources/i18n/translit.cpp +++ b/icuSources/i18n/translit.cpp @@ -1,6 +1,8 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** - * Copyright (C) 1999-2008, International Business Machines + * Copyright (C) 1999-2016, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -8,6 +10,8 @@ ********************************************************************** */ +#include "utypeinfo.h" // for 'typeid' to work + #include "unicode/utypes.h" #if !UCONFIG_NO_TRANSLITERATION @@ -22,6 +26,7 @@ #include "unicode/uniset.h" #include "unicode/uscript.h" #include "unicode/strenum.h" +#include "unicode/utf16.h" #include "cpdtrans.h" #include "nultrans.h" #include "rbt_data.h" @@ -86,20 +91,17 @@ static const char RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs"; /** * The mutex controlling access to registry object. */ -static UMTX registryMutex = 0; +static UMutex registryMutex = U_MUTEX_INITIALIZER; /** * System transliterator registry; non-null when initialized. */ -static U_NAMESPACE_QUALIFIER TransliteratorRegistry* registry = 0; +static icu::TransliteratorRegistry* registry = 0; // Macro to check/initialize the registry. ONLY USE WITHIN // MUTEX. Avoids function call when registry is initialized. #define HAVE_REGISTRY(status) (registry!=0 || initializeRegistry(status)) -// Empty string -static const UChar EMPTY[] = {0}; //"" - U_NAMESPACE_BEGIN UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(Transliterator) @@ -369,7 +371,7 @@ void Transliterator::_transliterate(Replaceable& text, } if (index.limit > 0 && - UTF_IS_LEAD(text.charAt(index.limit - 1))) { + U16_IS_LEAD(text.charAt(index.limit - 1))) { // Oops, there is a dangling lead surrogate in the buffer. // This will break most transliterators, since they will // assume it is part of a pair. Don't transliterate until @@ -408,7 +410,7 @@ void Transliterator::_transliterate(Replaceable& text, int32_t n = getMaximumContextLength(); while (newCS > originalStart && n-- > 0) { --newCS; - newCS -= UTF_CHAR_LENGTH(text.char32At(newCS)) - 1; + newCS -= U16_LENGTH(text.char32At(newCS)) - 1; } index.contextStart = uprv_max(newCS, originalStart); #endif @@ -479,14 +481,14 @@ void Transliterator::filteredTransliterate(Replaceable& text, UChar32 c; while (index.start < globalLimit && !filter->contains(c=text.char32At(index.start))) { - index.start += UTF_CHAR_LENGTH(c); + index.start += U16_LENGTH(c); } // Find the end of this run of unfiltered chars index.limit = index.start; while (index.limit < globalLimit && filter->contains(c=text.char32At(index.limit))) { - index.limit += UTF_CHAR_LENGTH(c); + index.limit += U16_LENGTH(c); } } @@ -569,8 +571,7 @@ void Transliterator::filteredTransliterate(Replaceable& text, // transliterations and commit complete transliterations. for (;;) { // Length of additional code point, either one or two - int32_t charLength = - UTF_CHAR_LENGTH(text.char32At(passLimit)); + int32_t charLength = U16_LENGTH(text.char32At(passLimit)); passLimit += charLength; if (passLimit > runLimit) { break; @@ -596,7 +597,7 @@ void Transliterator::filteredTransliterate(Replaceable& text, int32_t rs = rollbackStart + delta - (index.limit - passStart); // Delete the partially transliterated text - text.handleReplaceBetween(passStart, index.limit, EMPTY); + text.handleReplaceBetween(passStart, index.limit, UnicodeString()); // Copy the rollback text back text.copy(rs, rs + uncommittedLength, passStart); @@ -634,7 +635,7 @@ void Transliterator::filteredTransliterate(Replaceable& text, globalLimit += totalDelta; // Delete the rollback copy - text.handleReplaceBetween(rollbackOrigin, rollbackOrigin + runLength, EMPTY); + text.handleReplaceBetween(rollbackOrigin, rollbackOrigin + runLength, UnicodeString()); // Move start past committed text index.start = passStart; @@ -977,7 +978,6 @@ Transliterator* Transliterator::createBasicInstance(const UnicodeString& id, TransliteratorAlias* alias = 0; Transliterator* t = 0; - umtx_init(®istryMutex); umtx_lock(®istryMutex); if (HAVE_REGISTRY(ec)) { t = registry->get(id, alias, ec); @@ -1101,7 +1101,7 @@ Transliterator::createFromRules(const UnicodeString& ID, UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i); if (!idBlock->isEmpty()) { Transliterator* temp = createInstance(*idBlock, UTRANS_FORWARD, parseError, status); - if (temp != NULL && temp->getDynamicClassID() != NullTransliterator::getStaticClassID()) + if (temp != NULL && typeid(*temp) != typeid(NullTransliterator)) transliterators.addElement(temp, status); else delete temp; @@ -1109,12 +1109,13 @@ Transliterator::createFromRules(const UnicodeString& ID, } if (!parser.dataVector.isEmpty()) { TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); - RuleBasedTransliterator* temprbt = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + (passNumber++), + // TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")? + RuleBasedTransliterator* temprbt = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++), data, TRUE); // Check if NULL before adding it to transliterators to avoid future usage of NULL pointer. if (temprbt == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return t; + status = U_MEMORY_ALLOCATION_ERROR; + return t; } transliterators.addElement(temprbt, status); } @@ -1145,7 +1146,7 @@ UnicodeString& Transliterator::toRules(UnicodeString& rulesSource, if (!ICU_Utility::escapeUnprintable(rulesSource, c)) { rulesSource.append(c); } - i += UTF_CHAR_LENGTH(c); + i += U16_LENGTH(c); } } else { rulesSource = getID(); @@ -1157,18 +1158,15 @@ UnicodeString& Transliterator::toRules(UnicodeString& rulesSource, } int32_t Transliterator::countElements() const { - return (this->getDynamicClassID() == - CompoundTransliterator::getStaticClassID()) ? - ((const CompoundTransliterator*) this)->getCount() : 0; + const CompoundTransliterator* ct = dynamic_cast(this); + return ct != NULL ? ct->getCount() : 0; } const Transliterator& Transliterator::getElement(int32_t index, UErrorCode& ec) const { if (U_FAILURE(ec)) { return *this; } - const CompoundTransliterator* cpd = - (this->getDynamicClassID() == CompoundTransliterator::getStaticClassID()) ? - (const CompoundTransliterator*) this : 0; + const CompoundTransliterator* cpd = dynamic_cast(this); int32_t n = (cpd == NULL) ? 1 : cpd->getCount(); if (index < 0 || index >= n) { ec = U_INDEX_OUTOFBOUNDS_ERROR; @@ -1181,13 +1179,11 @@ const Transliterator& Transliterator::getElement(int32_t index, UErrorCode& ec) UnicodeSet& Transliterator::getSourceSet(UnicodeSet& result) const { handleGetSourceSet(result); if (filter != NULL) { - UnicodeSet* filterSet; + UnicodeSet* filterSet = dynamic_cast(filter); UBool deleteFilterSet = FALSE; // Most, but not all filters will be UnicodeSets. Optimize for // the high-runner case. - if (filter->getDynamicClassID() == UnicodeSet::getStaticClassID()) { - filterSet = (UnicodeSet*) filter; - } else { + if (filterSet == NULL) { filterSet = new UnicodeSet(); // Check null pointer if (filterSet == NULL) { @@ -1216,7 +1212,6 @@ UnicodeSet& Transliterator::getTargetSet(UnicodeSet& result) const { void U_EXPORT2 Transliterator::registerFactory(const UnicodeString& id, Transliterator::Factory factory, Transliterator::Token context) { - umtx_init(®istryMutex); Mutex lock(®istryMutex); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { @@ -1256,7 +1251,6 @@ void Transliterator::_registerSpecialInverse(const UnicodeString& target, * @see #unregister */ void U_EXPORT2 Transliterator::registerInstance(Transliterator* adoptedPrototype) { - umtx_init(®istryMutex); Mutex lock(®istryMutex); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { @@ -1271,7 +1265,6 @@ void Transliterator::_registerInstance(Transliterator* adoptedPrototype) { void U_EXPORT2 Transliterator::registerAlias(const UnicodeString& aliasID, const UnicodeString& realID) { - umtx_init(®istryMutex); Mutex lock(®istryMutex); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { @@ -1288,13 +1281,12 @@ void Transliterator::_registerAlias(const UnicodeString& aliasID, /** * Unregisters a transliterator or class. This may be either * a system transliterator or a user transliterator or class. - * + * * @param ID the ID of the transliterator or class * @see #registerInstance */ void U_EXPORT2 Transliterator::unregister(const UnicodeString& ID) { - umtx_init(®istryMutex); Mutex lock(®istryMutex); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { @@ -1310,7 +1302,6 @@ void U_EXPORT2 Transliterator::unregister(const UnicodeString& ID) { */ int32_t U_EXPORT2 Transliterator::countAvailableIDs(void) { int32_t retVal = 0; - umtx_init(®istryMutex); Mutex lock(®istryMutex); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { @@ -1327,7 +1318,6 @@ int32_t U_EXPORT2 Transliterator::countAvailableIDs(void) { */ const UnicodeString& U_EXPORT2 Transliterator::getAvailableID(int32_t index) { const UnicodeString* result = NULL; - umtx_init(®istryMutex); umtx_lock(®istryMutex); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { @@ -1341,7 +1331,6 @@ const UnicodeString& U_EXPORT2 Transliterator::getAvailableID(int32_t index) { StringEnumeration* U_EXPORT2 Transliterator::getAvailableIDs(UErrorCode& ec) { if (U_FAILURE(ec)) return NULL; StringEnumeration* result = NULL; - umtx_init(®istryMutex); umtx_lock(®istryMutex); if (HAVE_REGISTRY(ec)) { result = registry->getAvailableIDs(); @@ -1354,7 +1343,6 @@ StringEnumeration* U_EXPORT2 Transliterator::getAvailableIDs(UErrorCode& ec) { } int32_t U_EXPORT2 Transliterator::countAvailableSources(void) { - umtx_init(®istryMutex); Mutex lock(®istryMutex); UErrorCode ec = U_ZERO_ERROR; return HAVE_REGISTRY(ec) ? _countAvailableSources() : 0; @@ -1362,7 +1350,6 @@ int32_t U_EXPORT2 Transliterator::countAvailableSources(void) { UnicodeString& U_EXPORT2 Transliterator::getAvailableSource(int32_t index, UnicodeString& result) { - umtx_init(®istryMutex); Mutex lock(®istryMutex); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { @@ -1372,7 +1359,6 @@ UnicodeString& U_EXPORT2 Transliterator::getAvailableSource(int32_t index, } int32_t U_EXPORT2 Transliterator::countAvailableTargets(const UnicodeString& source) { - umtx_init(®istryMutex); Mutex lock(®istryMutex); UErrorCode ec = U_ZERO_ERROR; return HAVE_REGISTRY(ec) ? _countAvailableTargets(source) : 0; @@ -1381,7 +1367,6 @@ int32_t U_EXPORT2 Transliterator::countAvailableTargets(const UnicodeString& sou UnicodeString& U_EXPORT2 Transliterator::getAvailableTarget(int32_t index, const UnicodeString& source, UnicodeString& result) { - umtx_init(®istryMutex); Mutex lock(®istryMutex); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { @@ -1392,7 +1377,6 @@ UnicodeString& U_EXPORT2 Transliterator::getAvailableTarget(int32_t index, int32_t U_EXPORT2 Transliterator::countAvailableVariants(const UnicodeString& source, const UnicodeString& target) { - umtx_init(®istryMutex); Mutex lock(®istryMutex); UErrorCode ec = U_ZERO_ERROR; return HAVE_REGISTRY(ec) ? _countAvailableVariants(source, target) : 0; @@ -1402,7 +1386,6 @@ UnicodeString& U_EXPORT2 Transliterator::getAvailableVariant(int32_t index, const UnicodeString& source, const UnicodeString& target, UnicodeString& result) { - umtx_init(®istryMutex); Mutex lock(®istryMutex); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { @@ -1464,7 +1447,7 @@ UChar Transliterator::filteredCharAt(const Replaceable& text, int32_t i) const { * and return TRUE. If the registry cannot be initialized, return * FALSE (rare). * - * IMPORTANT: Upon entry, registryMutex must be LOCKED. The entirely + * IMPORTANT: Upon entry, registryMutex must be LOCKED. The entire * initialization is done with the lock held. There is NO REASON to * unlock, since no other thread that is waiting on the registryMutex * cannot itself proceed until the registry is initialized. @@ -1502,13 +1485,13 @@ UBool Transliterator::initializeRegistry(UErrorCode &status) { * is the ID of the system transliterator being defined. These * are public IDs enumerated by Transliterator.getAvailableIDs(), * unless the second field is "internal". - * + * * is a ResourceReader resource name. Currently these refer * to file names under com/ibm/text/resources. This string is passed * directly to ResourceReader, together with . - * + * * is either "FORWARD" or "REVERSE". - * + * * is a string to be passed directly to * Transliterator.getInstance(). The returned Transliterator object * then has its ID changed to and is returned. @@ -1517,52 +1500,53 @@ UBool Transliterator::initializeRegistry(UErrorCode &status) { */ //static const char translit_index[] = "translit_index"; - UResourceBundle *bundle, *transIDs, *colBund; - bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open default locale*/, &status); - transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status); - - int32_t row, maxRows; + UResourceBundle *bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open default locale*/, &status); + UResourceBundle *transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status); if (U_SUCCESS(status)) { - maxRows = ures_getSize(transIDs); + UResourceBundle *colBund = NULL; + UResourceBundle* res = NULL; + int32_t row, maxRows = ures_getSize(transIDs); for (row = 0; row < maxRows; row++) { - colBund = ures_getByIndex(transIDs, row, 0, &status); - if (U_SUCCESS(status)) { - UnicodeString id(ures_getKey(colBund), -1, US_INV); - UResourceBundle* res = ures_getNextResource(colBund, NULL, &status); - const char* typeStr = ures_getKey(res); - UChar type; - u_charsToUChars(typeStr, &type, 1); - - if (U_SUCCESS(status)) { - int32_t len = 0; - const UChar *resString; - switch (type) { - case 0x66: // 'f' - case 0x69: // 'i' - // 'file' or 'internal'; - // row[2]=resource, row[3]=direction - { - - resString = ures_getStringByKey(res, "resource", &len, &status); - UBool visible = (type == 0x0066 /*f*/); - UTransDirection dir = - (ures_getUnicodeStringByKey(res, "direction", &status).charAt(0) == - 0x0046 /*F*/) ? - UTRANS_FORWARD : UTRANS_REVERSE; - registry->put(id, UnicodeString(TRUE, resString, len), dir, TRUE, visible, status); - } - break; - case 0x61: // 'a' - // 'alias'; row[2]=createInstance argument - resString = ures_getString(res, &len, &status); - registry->put(id, UnicodeString(TRUE, resString, len), TRUE, TRUE, status); - break; + colBund = ures_getByIndex(transIDs, row, colBund, &status); + if (U_FAILURE(status)) { + break; + } + const char *tridKey = ures_getKey(colBund); + if (tridKey == NULL || uprv_strstr(tridKey, "-t-") != NULL) { + continue; // Apple version should not get any of these, eliminated the root.txt entries + } + res = ures_getNextResource(colBund, res, &status); + if (U_FAILURE(status)) { + break; + } + UnicodeString trID(tridKey, -1, US_INV); + const char* typeStr = ures_getKey(res); + int32_t len = 0, dlen = 0; + UBool visible = FALSE; + const UChar *resString; + switch (typeStr[0]) { + case 'f': // "file" + visible = TRUE; + // FALLTHROUGH + case 'i': // "internal" => visible = FALSE + // child resources are resource and direction + { + resString = ures_getStringByKey(res, "resource", &len, &status); + const UChar* dirString = ures_getStringByKey(res, "direction", &dlen, &status); + UTransDirection dir = (dlen <= 0 || dirString[0] == 0x0046 /*F*/)? UTRANS_FORWARD : UTRANS_REVERSE; + registry->put(trID, UnicodeString(TRUE, resString, len), dir, TRUE, visible, status); } - } - ures_close(res); + break; + case 'a': // "alias", string argument is alias + resString = ures_getString(res, &len, &status); + registry->put(trID, UnicodeString(TRUE, resString, len), TRUE, TRUE, status); + break; + default: // do nothing + break; } - ures_close(colBund); } + ures_close(res); + ures_close(colBund); } ures_close(transIDs); @@ -1571,7 +1555,7 @@ UBool Transliterator::initializeRegistry(UErrorCode &status) { // Manually add prototypes that the system knows about to the // cache. This is how new non-rule-based transliterators are // added to the system. - + // This is to allow for null pointer check NullTransliterator* tempNullTranslit = new NullTransliterator(); LowercaseTransliterator* tempLowercaseTranslit = new LowercaseTransliterator(); @@ -1585,7 +1569,7 @@ UBool Transliterator::initializeRegistry(UErrorCode &status) { #endif // Check for null pointers if (tempNullTranslit == NULL || tempLowercaseTranslit == NULL || tempUppercaseTranslit == NULL || - tempTitlecaseTranslit == NULL || tempUnicodeTranslit == NULL || + tempTitlecaseTranslit == NULL || tempUnicodeTranslit == NULL || #if !UCONFIG_NO_BREAK_ITERATION tempBreakTranslit == NULL || #endif @@ -1631,28 +1615,27 @@ UBool Transliterator::initializeRegistry(UErrorCode &status) { _registerSpecialInverse(UNICODE_STRING_SIMPLE("Title"), UNICODE_STRING_SIMPLE("Lower"), FALSE); - ucln_i18n_registerCleanup(UCLN_I18N_TRANSLITERATOR, transliterator_cleanup); + ucln_i18n_registerCleanup(UCLN_I18N_TRANSLITERATOR, utrans_transliterator_cleanup); return TRUE; } U_NAMESPACE_END -// Defined in ucln_in.h: +// Defined in transreg.h: /** * Release all static memory held by transliterator. This will * necessarily invalidate any rule-based transliterators held by the * user, because RBTs hold pointers to common data objects. */ -U_CFUNC UBool transliterator_cleanup(void) { +U_CFUNC UBool utrans_transliterator_cleanup(void) { U_NAMESPACE_USE TransliteratorIDParser::cleanup(); if (registry) { delete registry; registry = NULL; } - umtx_destroy(®istryMutex); return TRUE; }