X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/729e4ab9bc6618bc3d8a898e575df7f4019e29ca..3d1f044b704633e2e541231cd17ae9ecf9ad5c7a:/icuSources/i18n/translit.cpp diff --git a/icuSources/i18n/translit.cpp b/icuSources/i18n/translit.cpp index 84364796..aaaee8c9 100644 --- a/icuSources/i18n/translit.cpp +++ b/icuSources/i18n/translit.cpp @@ -1,6 +1,8 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** - * Copyright (C) 1999-2010, International Business Machines + * Copyright (C) 1999-2016, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -8,7 +10,7 @@ ********************************************************************** */ -#include // for 'typeid' to work +#include "utypeinfo.h" // for 'typeid' to work #include "unicode/utypes.h" @@ -24,6 +26,7 @@ #include "unicode/uniset.h" #include "unicode/uscript.h" #include "unicode/strenum.h" +#include "unicode/utf16.h" #include "cpdtrans.h" #include "nultrans.h" #include "rbt_data.h" @@ -88,20 +91,20 @@ static const char RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs"; /** * The mutex controlling access to registry object. */ -static UMTX registryMutex = 0; +static icu::UMutex *registryMutex() { + static icu::UMutex *m = STATIC_NEW(icu::UMutex); + return m; +} /** * System transliterator registry; non-null when initialized. */ -static U_NAMESPACE_QUALIFIER TransliteratorRegistry* registry = 0; +static icu::TransliteratorRegistry* registry = 0; // Macro to check/initialize the registry. ONLY USE WITHIN // MUTEX. Avoids function call when registry is initialized. #define HAVE_REGISTRY(status) (registry!=0 || initializeRegistry(status)) -// Empty string -static const UChar EMPTY[] = {0}; //"" - U_NAMESPACE_BEGIN UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(Transliterator) @@ -371,7 +374,7 @@ void Transliterator::_transliterate(Replaceable& text, } if (index.limit > 0 && - UTF_IS_LEAD(text.charAt(index.limit - 1))) { + U16_IS_LEAD(text.charAt(index.limit - 1))) { // Oops, there is a dangling lead surrogate in the buffer. // This will break most transliterators, since they will // assume it is part of a pair. Don't transliterate until @@ -410,7 +413,7 @@ void Transliterator::_transliterate(Replaceable& text, int32_t n = getMaximumContextLength(); while (newCS > originalStart && n-- > 0) { --newCS; - newCS -= UTF_CHAR_LENGTH(text.char32At(newCS)) - 1; + newCS -= U16_LENGTH(text.char32At(newCS)) - 1; } index.contextStart = uprv_max(newCS, originalStart); #endif @@ -481,14 +484,14 @@ void Transliterator::filteredTransliterate(Replaceable& text, UChar32 c; while (index.start < globalLimit && !filter->contains(c=text.char32At(index.start))) { - index.start += UTF_CHAR_LENGTH(c); + index.start += U16_LENGTH(c); } // Find the end of this run of unfiltered chars index.limit = index.start; while (index.limit < globalLimit && filter->contains(c=text.char32At(index.limit))) { - index.limit += UTF_CHAR_LENGTH(c); + index.limit += U16_LENGTH(c); } } @@ -571,8 +574,7 @@ void Transliterator::filteredTransliterate(Replaceable& text, // transliterations and commit complete transliterations. for (;;) { // Length of additional code point, either one or two - int32_t charLength = - UTF_CHAR_LENGTH(text.char32At(passLimit)); + int32_t charLength = U16_LENGTH(text.char32At(passLimit)); passLimit += charLength; if (passLimit > runLimit) { break; @@ -598,7 +600,7 @@ void Transliterator::filteredTransliterate(Replaceable& text, int32_t rs = rollbackStart + delta - (index.limit - passStart); // Delete the partially transliterated text - text.handleReplaceBetween(passStart, index.limit, EMPTY); + text.handleReplaceBetween(passStart, index.limit, UnicodeString()); // Copy the rollback text back text.copy(rs, rs + uncommittedLength, passStart); @@ -636,7 +638,7 @@ void Transliterator::filteredTransliterate(Replaceable& text, globalLimit += totalDelta; // Delete the rollback copy - text.handleReplaceBetween(rollbackOrigin, rollbackOrigin + runLength, EMPTY); + text.handleReplaceBetween(rollbackOrigin, rollbackOrigin + runLength, UnicodeString()); // Move start past committed text index.start = passStart; @@ -979,11 +981,11 @@ Transliterator* Transliterator::createBasicInstance(const UnicodeString& id, TransliteratorAlias* alias = 0; Transliterator* t = 0; - umtx_lock(®istryMutex); + umtx_lock(registryMutex()); if (HAVE_REGISTRY(ec)) { t = registry->get(id, alias, ec); } - umtx_unlock(®istryMutex); + umtx_unlock(registryMutex()); if (U_FAILURE(ec)) { delete t; @@ -1011,11 +1013,11 @@ Transliterator* Transliterator::createBasicInstance(const UnicodeString& id, alias = 0; // Step 2. reget - umtx_lock(®istryMutex); + umtx_lock(registryMutex()); if (HAVE_REGISTRY(ec)) { t = registry->reget(id, parser, alias, ec); } - umtx_unlock(®istryMutex); + umtx_unlock(registryMutex()); // Step 3. Loop back around! } else { @@ -1110,12 +1112,13 @@ Transliterator::createFromRules(const UnicodeString& ID, } if (!parser.dataVector.isEmpty()) { TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); - RuleBasedTransliterator* temprbt = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + (passNumber++), + // TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")? + RuleBasedTransliterator* temprbt = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++), data, TRUE); // Check if NULL before adding it to transliterators to avoid future usage of NULL pointer. if (temprbt == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return t; + status = U_MEMORY_ALLOCATION_ERROR; + return t; } transliterators.addElement(temprbt, status); } @@ -1146,7 +1149,7 @@ UnicodeString& Transliterator::toRules(UnicodeString& rulesSource, if (!ICU_Utility::escapeUnprintable(rulesSource, c)) { rulesSource.append(c); } - i += UTF_CHAR_LENGTH(c); + i += U16_LENGTH(c); } } else { rulesSource = getID(); @@ -1212,7 +1215,7 @@ UnicodeSet& Transliterator::getTargetSet(UnicodeSet& result) const { void U_EXPORT2 Transliterator::registerFactory(const UnicodeString& id, Transliterator::Factory factory, Transliterator::Token context) { - Mutex lock(®istryMutex); + Mutex lock(registryMutex()); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { _registerFactory(id, factory, context); @@ -1251,7 +1254,7 @@ void Transliterator::_registerSpecialInverse(const UnicodeString& target, * @see #unregister */ void U_EXPORT2 Transliterator::registerInstance(Transliterator* adoptedPrototype) { - Mutex lock(®istryMutex); + Mutex lock(registryMutex()); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { _registerInstance(adoptedPrototype); @@ -1265,7 +1268,7 @@ void Transliterator::_registerInstance(Transliterator* adoptedPrototype) { void U_EXPORT2 Transliterator::registerAlias(const UnicodeString& aliasID, const UnicodeString& realID) { - Mutex lock(®istryMutex); + Mutex lock(registryMutex()); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { _registerAlias(aliasID, realID); @@ -1281,13 +1284,13 @@ void Transliterator::_registerAlias(const UnicodeString& aliasID, /** * Unregisters a transliterator or class. This may be either * a system transliterator or a user transliterator or class. - * + * * @param ID the ID of the transliterator or class * @see #registerInstance */ void U_EXPORT2 Transliterator::unregister(const UnicodeString& ID) { - Mutex lock(®istryMutex); + Mutex lock(registryMutex()); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { registry->remove(ID); @@ -1302,7 +1305,7 @@ void U_EXPORT2 Transliterator::unregister(const UnicodeString& ID) { */ int32_t U_EXPORT2 Transliterator::countAvailableIDs(void) { int32_t retVal = 0; - Mutex lock(®istryMutex); + Mutex lock(registryMutex()); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { retVal = registry->countAvailableIDs(); @@ -1318,12 +1321,12 @@ int32_t U_EXPORT2 Transliterator::countAvailableIDs(void) { */ const UnicodeString& U_EXPORT2 Transliterator::getAvailableID(int32_t index) { const UnicodeString* result = NULL; - umtx_lock(®istryMutex); + umtx_lock(registryMutex()); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { result = ®istry->getAvailableID(index); } - umtx_unlock(®istryMutex); + umtx_unlock(registryMutex()); U_ASSERT(result != NULL); // fail if no registry return *result; } @@ -1331,11 +1334,11 @@ const UnicodeString& U_EXPORT2 Transliterator::getAvailableID(int32_t index) { StringEnumeration* U_EXPORT2 Transliterator::getAvailableIDs(UErrorCode& ec) { if (U_FAILURE(ec)) return NULL; StringEnumeration* result = NULL; - umtx_lock(®istryMutex); + umtx_lock(registryMutex()); if (HAVE_REGISTRY(ec)) { result = registry->getAvailableIDs(); } - umtx_unlock(®istryMutex); + umtx_unlock(registryMutex()); if (result == NULL) { ec = U_INTERNAL_TRANSLITERATOR_ERROR; } @@ -1343,14 +1346,14 @@ StringEnumeration* U_EXPORT2 Transliterator::getAvailableIDs(UErrorCode& ec) { } int32_t U_EXPORT2 Transliterator::countAvailableSources(void) { - Mutex lock(®istryMutex); + Mutex lock(registryMutex()); UErrorCode ec = U_ZERO_ERROR; return HAVE_REGISTRY(ec) ? _countAvailableSources() : 0; } UnicodeString& U_EXPORT2 Transliterator::getAvailableSource(int32_t index, UnicodeString& result) { - Mutex lock(®istryMutex); + Mutex lock(registryMutex()); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { _getAvailableSource(index, result); @@ -1359,7 +1362,7 @@ UnicodeString& U_EXPORT2 Transliterator::getAvailableSource(int32_t index, } int32_t U_EXPORT2 Transliterator::countAvailableTargets(const UnicodeString& source) { - Mutex lock(®istryMutex); + Mutex lock(registryMutex()); UErrorCode ec = U_ZERO_ERROR; return HAVE_REGISTRY(ec) ? _countAvailableTargets(source) : 0; } @@ -1367,7 +1370,7 @@ int32_t U_EXPORT2 Transliterator::countAvailableTargets(const UnicodeString& sou UnicodeString& U_EXPORT2 Transliterator::getAvailableTarget(int32_t index, const UnicodeString& source, UnicodeString& result) { - Mutex lock(®istryMutex); + Mutex lock(registryMutex()); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { _getAvailableTarget(index, source, result); @@ -1377,7 +1380,7 @@ UnicodeString& U_EXPORT2 Transliterator::getAvailableTarget(int32_t index, int32_t U_EXPORT2 Transliterator::countAvailableVariants(const UnicodeString& source, const UnicodeString& target) { - Mutex lock(®istryMutex); + Mutex lock(registryMutex()); UErrorCode ec = U_ZERO_ERROR; return HAVE_REGISTRY(ec) ? _countAvailableVariants(source, target) : 0; } @@ -1386,7 +1389,7 @@ UnicodeString& U_EXPORT2 Transliterator::getAvailableVariant(int32_t index, const UnicodeString& source, const UnicodeString& target, UnicodeString& result) { - Mutex lock(®istryMutex); + Mutex lock(registryMutex()); UErrorCode ec = U_ZERO_ERROR; if (HAVE_REGISTRY(ec)) { _getAvailableVariant(index, source, target, result); @@ -1485,13 +1488,13 @@ UBool Transliterator::initializeRegistry(UErrorCode &status) { * is the ID of the system transliterator being defined. These * are public IDs enumerated by Transliterator.getAvailableIDs(), * unless the second field is "internal". - * + * * is a ResourceReader resource name. Currently these refer * to file names under com/ibm/text/resources. This string is passed * directly to ResourceReader, together with . - * + * * is either "FORWARD" or "REVERSE". - * + * * is a string to be passed directly to * Transliterator.getInstance(). The returned Transliterator object * then has its ID changed to and is returned. @@ -1500,52 +1503,53 @@ UBool Transliterator::initializeRegistry(UErrorCode &status) { */ //static const char translit_index[] = "translit_index"; - UResourceBundle *bundle, *transIDs, *colBund; - bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open default locale*/, &status); - transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status); - - int32_t row, maxRows; + UResourceBundle *bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open default locale*/, &status); + UResourceBundle *transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status); if (U_SUCCESS(status)) { - maxRows = ures_getSize(transIDs); + UResourceBundle *colBund = NULL; + UResourceBundle* res = NULL; + int32_t row, maxRows = ures_getSize(transIDs); for (row = 0; row < maxRows; row++) { - colBund = ures_getByIndex(transIDs, row, 0, &status); - if (U_SUCCESS(status)) { - UnicodeString id(ures_getKey(colBund), -1, US_INV); - UResourceBundle* res = ures_getNextResource(colBund, NULL, &status); - const char* typeStr = ures_getKey(res); - UChar type; - u_charsToUChars(typeStr, &type, 1); - - if (U_SUCCESS(status)) { - int32_t len = 0; - const UChar *resString; - switch (type) { - case 0x66: // 'f' - case 0x69: // 'i' - // 'file' or 'internal'; - // row[2]=resource, row[3]=direction - { - - resString = ures_getStringByKey(res, "resource", &len, &status); - UBool visible = (type == 0x0066 /*f*/); - UTransDirection dir = - (ures_getUnicodeStringByKey(res, "direction", &status).charAt(0) == - 0x0046 /*F*/) ? - UTRANS_FORWARD : UTRANS_REVERSE; - registry->put(id, UnicodeString(TRUE, resString, len), dir, TRUE, visible, status); - } - break; - case 0x61: // 'a' - // 'alias'; row[2]=createInstance argument - resString = ures_getString(res, &len, &status); - registry->put(id, UnicodeString(TRUE, resString, len), TRUE, TRUE, status); - break; + colBund = ures_getByIndex(transIDs, row, colBund, &status); + if (U_FAILURE(status)) { + break; + } + const char *tridKey = ures_getKey(colBund); + if (tridKey == NULL || uprv_strstr(tridKey, "-t-") != NULL) { + continue; // Apple version should not get any of these, eliminated the root.txt entries + } + res = ures_getNextResource(colBund, res, &status); + if (U_FAILURE(status)) { + break; + } + UnicodeString trID(tridKey, -1, US_INV); + const char* typeStr = ures_getKey(res); + int32_t len = 0, dlen = 0; + UBool visible = FALSE; + const UChar *resString; + switch (typeStr[0]) { + case 'f': // "file" + visible = TRUE; + // FALLTHROUGH + case 'i': // "internal" => visible = FALSE + // child resources are resource and direction + { + resString = ures_getStringByKey(res, "resource", &len, &status); + const UChar* dirString = ures_getStringByKey(res, "direction", &dlen, &status); + UTransDirection dir = (dlen <= 0 || dirString[0] == 0x0046 /*F*/)? UTRANS_FORWARD : UTRANS_REVERSE; + registry->put(trID, UnicodeString(TRUE, resString, len), dir, TRUE, visible, status); } - } - ures_close(res); + break; + case 'a': // "alias", string argument is alias + resString = ures_getString(res, &len, &status); + registry->put(trID, UnicodeString(TRUE, resString, len), TRUE, TRUE, status); + break; + default: // do nothing + break; } - ures_close(colBund); } + ures_close(res); + ures_close(colBund); } ures_close(transIDs); @@ -1554,7 +1558,7 @@ UBool Transliterator::initializeRegistry(UErrorCode &status) { // Manually add prototypes that the system knows about to the // cache. This is how new non-rule-based transliterators are // added to the system. - + // This is to allow for null pointer check NullTransliterator* tempNullTranslit = new NullTransliterator(); LowercaseTransliterator* tempLowercaseTranslit = new LowercaseTransliterator(); @@ -1568,7 +1572,7 @@ UBool Transliterator::initializeRegistry(UErrorCode &status) { #endif // Check for null pointers if (tempNullTranslit == NULL || tempLowercaseTranslit == NULL || tempUppercaseTranslit == NULL || - tempTitlecaseTranslit == NULL || tempUnicodeTranslit == NULL || + tempTitlecaseTranslit == NULL || tempUnicodeTranslit == NULL || #if !UCONFIG_NO_BREAK_ITERATION tempBreakTranslit == NULL || #endif @@ -1621,7 +1625,7 @@ UBool Transliterator::initializeRegistry(UErrorCode &status) { U_NAMESPACE_END -// Defined in ucln_in.h: +// Defined in transreg.h: /** * Release all static memory held by transliterator. This will @@ -1635,7 +1639,6 @@ U_CFUNC UBool utrans_transliterator_cleanup(void) { delete registry; registry = NULL; } - umtx_destroy(®istryMutex); return TRUE; }