+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
-**********************************************************************
-* Copyright (C) 1999-2004, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Date Name Description
-* 11/17/99 aliu Creation.
-**********************************************************************
-*/
+ **********************************************************************
+ * Copyright (C) 1999-2016, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ * Date Name Description
+ * 11/17/99 aliu Creation.
+ **********************************************************************
+ */
+
+#include "utypeinfo.h" // for 'typeid' to work
#include "unicode/utypes.h"
#include "unicode/uniset.h"
#include "unicode/uscript.h"
#include "unicode/strenum.h"
+#include "unicode/utf16.h"
#include "cpdtrans.h"
#include "nultrans.h"
#include "rbt_data.h"
#include "tolowtrn.h"
#include "toupptrn.h"
#include "uni2name.h"
+#include "brktrans.h"
#include "esctrn.h"
#include "unesctrn.h"
#include "tridpars.h"
#include "uassert.h"
#include "cmemory.h"
#include "cstring.h"
+#include "uinvchar.h"
static const UChar TARGET_SEP = 0x002D; /*-*/
static const UChar ID_DELIM = 0x003B; /*;*/
/**
* The mutex controlling access to registry object.
*/
-static UMTX registryMutex = 0;
+static icu::UMutex *registryMutex() {
+ static icu::UMutex *m = STATIC_NEW(icu::UMutex);
+ return m;
+}
/**
* System transliterator registry; non-null when initialized.
*/
-static TransliteratorRegistry* registry = 0;
+static icu::TransliteratorRegistry* registry = 0;
// Macro to check/initialize the registry. ONLY USE WITHIN
// MUTEX. Avoids function call when registry is initialized.
-#define HAVE_REGISTRY (registry!=0 || initializeRegistry())
-
-// Empty string
-static const UChar EMPTY[] = {0}; //""
+#define HAVE_REGISTRY(status) (registry!=0 || initializeRegistry(status))
U_NAMESPACE_BEGIN
* Return TRUE if the given UTransPosition is valid for text of
* the given length.
*/
-inline UBool positionIsValid(UTransPosition& index, int32_t len) {
+static inline UBool positionIsValid(UTransPosition& index, int32_t len) {
return !(index.contextStart < 0 ||
index.start < index.contextStart ||
index.limit < index.start ||
Transliterator::Transliterator(const UnicodeString& theID,
UnicodeFilter* adoptedFilter) :
UObject(), ID(theID), filter(adoptedFilter),
- maximumContextLength(0) {
-
- // NUL-terminate the ID string
- ID.getTerminatedBuffer();
+ maximumContextLength(0)
+{
+ // NUL-terminate the ID string, which is a non-aliased copy.
+ ID.append((UChar)0);
+ ID.truncate(ID.length()-1);
}
/**
* Destructor.
*/
Transliterator::~Transliterator() {
- delete filter;
+ if (filter) {
+ delete filter;
+ }
}
/**
*/
Transliterator::Transliterator(const Transliterator& other) :
UObject(other), ID(other.ID), filter(0),
- maximumContextLength(other.maximumContextLength) {
-
- // NUL-terminate the ID string
- ID.getTerminatedBuffer();
+ maximumContextLength(other.maximumContextLength)
+{
+ // NUL-terminate the ID string, which is a non-aliased copy.
+ ID.append((UChar)0);
+ ID.truncate(ID.length()-1);
if (other.filter != 0) {
// We own the filter, so we must have our own copy
}
}
+Transliterator* Transliterator::clone() const {
+ return NULL;
+}
+
/**
* Assignment operator.
*/
}
if (index.limit > 0 &&
- UTF_IS_LEAD(text.charAt(index.limit - 1))) {
+ U16_IS_LEAD(text.charAt(index.limit - 1))) {
// Oops, there is a dangling lead surrogate in the buffer.
// This will break most transliterators, since they will
// assume it is part of a pair. Don't transliterate until
int32_t n = getMaximumContextLength();
while (newCS > originalStart && n-- > 0) {
--newCS;
- newCS -= UTF_CHAR_LENGTH(text.char32At(newCS)) - 1;
+ newCS -= U16_LENGTH(text.char32At(newCS)) - 1;
}
index.contextStart = uprv_max(newCS, originalStart);
#endif
UChar32 c;
while (index.start < globalLimit &&
!filter->contains(c=text.char32At(index.start))) {
- index.start += UTF_CHAR_LENGTH(c);
+ index.start += U16_LENGTH(c);
}
// Find the end of this run of unfiltered chars
index.limit = index.start;
while (index.limit < globalLimit &&
filter->contains(c=text.char32At(index.limit))) {
- index.limit += UTF_CHAR_LENGTH(c);
+ index.limit += U16_LENGTH(c);
}
}
// transliterations and commit complete transliterations.
for (;;) {
// Length of additional code point, either one or two
- int32_t charLength =
- UTF_CHAR_LENGTH(text.char32At(passLimit));
+ int32_t charLength = U16_LENGTH(text.char32At(passLimit));
passLimit += charLength;
if (passLimit > runLimit) {
break;
int32_t rs = rollbackStart + delta - (index.limit - passStart);
// Delete the partially transliterated text
- text.handleReplaceBetween(passStart, index.limit, EMPTY);
+ text.handleReplaceBetween(passStart, index.limit, UnicodeString());
// Copy the rollback text back
text.copy(rs, rs + uncommittedLength, passStart);
globalLimit += totalDelta;
// Delete the rollback copy
- text.handleReplaceBetween(rollbackOrigin, rollbackOrigin + runLength, EMPTY);
+ text.handleReplaceBetween(rollbackOrigin, rollbackOrigin + runLength, UnicodeString());
// Move start past committed text
index.start = passStart;
ID.append(TARGET_SEP).append(target).append(variant);
// build the char* key
- char key[200];
- uprv_strcpy(key, RB_DISPLAY_NAME_PREFIX);
- int32_t length=(int32_t)uprv_strlen(RB_DISPLAY_NAME_PREFIX);
- ID.extract(0, (int32_t)(sizeof(key)-length), key+length, "");
+ if (uprv_isInvariantUString(ID.getBuffer(), ID.length())) {
+ char key[200];
+ uprv_strcpy(key, RB_DISPLAY_NAME_PREFIX);
+ int32_t length=(int32_t)uprv_strlen(RB_DISPLAY_NAME_PREFIX);
+ ID.extract(0, (int32_t)(sizeof(key)-length), key+length, (int32_t)(sizeof(key)-length), US_INV);
- // Try to retrieve a UnicodeString from the bundle.
- UnicodeString resString = bundle.getStringEx(key, status);
+ // Try to retrieve a UnicodeString from the bundle.
+ UnicodeString resString = bundle.getStringEx(key, status);
- if (U_SUCCESS(status) && resString.length() != 0) {
- return result = resString; // [sic] assign & return
- }
+ if (U_SUCCESS(status) && resString.length() != 0) {
+ return result = resString; // [sic] assign & return
+ }
#if !UCONFIG_NO_FORMATTING
- // We have failed to get a name from the locale data. This is
- // typical, since most transliterators will not have localized
- // name data. The next step is to retrieve the MessageFormat
- // pattern from the locale data and to use it to synthesize the
- // name from the ID.
-
- status = U_ZERO_ERROR;
- resString = bundle.getStringEx(RB_DISPLAY_NAME_PATTERN, status);
-
- if (U_SUCCESS(status) && resString.length() != 0) {
- MessageFormat msg(resString, inLocale, status);
- // Suspend checking status until later...
-
- // We pass either 2 or 3 Formattable objects to msg.
- Formattable args[3];
- int32_t nargs;
- args[0].setLong(2); // # of args to follow
- args[1].setString(source);
- args[2].setString(target);
- nargs = 3;
-
- // Use display names for the scripts, if they exist
- UnicodeString s;
- length=(int32_t)uprv_strlen(RB_SCRIPT_DISPLAY_NAME_PREFIX);
- for (int j=1; j<=2; ++j) {
- status = U_ZERO_ERROR;
- uprv_strcpy(key, RB_SCRIPT_DISPLAY_NAME_PREFIX);
- args[j].getString(s);
- s.extract(0, sizeof(key)-length-1, key+length, "");
+ // We have failed to get a name from the locale data. This is
+ // typical, since most transliterators will not have localized
+ // name data. The next step is to retrieve the MessageFormat
+ // pattern from the locale data and to use it to synthesize the
+ // name from the ID.
- resString = bundle.getStringEx(key, status);
+ status = U_ZERO_ERROR;
+ resString = bundle.getStringEx(RB_DISPLAY_NAME_PATTERN, status);
+
+ if (U_SUCCESS(status) && resString.length() != 0) {
+ MessageFormat msg(resString, inLocale, status);
+ // Suspend checking status until later...
+
+ // We pass either 2 or 3 Formattable objects to msg.
+ Formattable args[3];
+ int32_t nargs;
+ args[0].setLong(2); // # of args to follow
+ args[1].setString(source);
+ args[2].setString(target);
+ nargs = 3;
+
+ // Use display names for the scripts, if they exist
+ UnicodeString s;
+ length=(int32_t)uprv_strlen(RB_SCRIPT_DISPLAY_NAME_PREFIX);
+ for (int j=1; j<=2; ++j) {
+ status = U_ZERO_ERROR;
+ uprv_strcpy(key, RB_SCRIPT_DISPLAY_NAME_PREFIX);
+ args[j].getString(s);
+ if (uprv_isInvariantUString(s.getBuffer(), s.length())) {
+ s.extract(0, sizeof(key)-length-1, key+length, (int32_t)sizeof(key)-length-1, US_INV);
+
+ resString = bundle.getStringEx(key, status);
+
+ if (U_SUCCESS(status)) {
+ args[j] = resString;
+ }
+ }
+ }
+ status = U_ZERO_ERROR;
+ FieldPosition pos; // ignored by msg
+ msg.format(args, nargs, result, pos, status);
if (U_SUCCESS(status)) {
- args[j] = resString;
+ result.append(variant);
+ return result;
}
}
-
- status = U_ZERO_ERROR;
- FieldPosition pos; // ignored by msg
- msg.format(args, nargs, result, pos, status);
- if (U_SUCCESS(status)) {
- result.append(variant);
- return result;
- }
- }
#endif
+ }
// We should not reach this point unless there is something
// wrong with the build or the RB_DISPLAY_NAME_PATTERN has
return NULL;
}
- TransliteratorIDParser::instantiateList(list, NULL, -1, status);
+ TransliteratorIDParser::instantiateList(list, status);
if (U_FAILURE(status)) {
return NULL;
}
U_ASSERT(list.size() > 0);
Transliterator* t = NULL;
- switch (list.size()) {
- case 1:
- t = (Transliterator*) list.elementAt(0);
- break;
- default:
+
+ if (list.size() > 1 || canonID.indexOf(ID_DELIM) >= 0) {
+ // [NOTE: If it's a compoundID, we instantiate a CompoundTransliterator even if it only
+ // has one child transliterator. This is so that toRules() will return the right thing
+ // (without any inactive ID), but our main ID still comes out correct. That is, if we
+ // instantiate "(Lower);Latin-Greek;", we want the rules to come out as "::Latin-Greek;"
+ // even though the ID is "(Lower);Latin-Greek;".
t = new CompoundTransliterator(list, parseError, status);
- /* test for NULL */
- if (t == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- if (U_FAILURE(status)) {
- delete t;
- return NULL;
+ }
+ else {
+ t = (Transliterator*)list.elementAt(0);
+ }
+ // Check null pointer
+ if (t != NULL) {
+ t->setID(canonID);
+ if (globalFilter != NULL) {
+ t->adoptFilter(globalFilter);
}
- break;
}
- t->setID(canonID);
- if (globalFilter != NULL) {
- t->adoptFilter(globalFilter);
+ else if (U_SUCCESS(status)) {
+ status = U_MEMORY_ALLOCATION_ERROR;
}
return t;
}
TransliteratorAlias* alias = 0;
Transliterator* t = 0;
- umtx_init(®istryMutex);
- umtx_lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ umtx_lock(registryMutex());
+ if (HAVE_REGISTRY(ec)) {
t = registry->get(id, alias, ec);
}
- umtx_unlock(®istryMutex);
+ umtx_unlock(registryMutex());
if (U_FAILURE(ec)) {
delete t;
// Other aliases are handled with TransliteratorAlias::create().
if (alias->isRuleBased()) {
// Step 1. parse
- TransliteratorParser parser;
+ TransliteratorParser parser(ec);
alias->parse(parser, pe, ec);
delete alias;
alias = 0;
// Step 2. reget
- umtx_lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ umtx_lock(registryMutex());
+ if (HAVE_REGISTRY(ec)) {
t = registry->reget(id, parser, alias, ec);
}
- umtx_unlock(®istryMutex);
+ umtx_unlock(registryMutex());
// Step 3. Loop back around!
} else {
{
Transliterator* t = NULL;
- TransliteratorParser parser;
+ TransliteratorParser parser(status);
parser.parse(rules, dir, parseError, status);
if (U_FAILURE(status)) {
}
// NOTE: The logic here matches that in TransliteratorRegistry.
- if (parser.idBlock.length() == 0) {
- if (parser.data == NULL) {
- // No idBlock, no data -- this is just an
- // alias for Null
- t = new NullTransliterator();
- } else {
- // No idBlock, data != 0 -- this is an
- // ordinary RBT_DATA.
- t = new RuleBasedTransliterator(ID, parser.orphanData(), TRUE); // TRUE == adopt data object
+ if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 0) {
+ t = new NullTransliterator();
+ }
+ else if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 1) {
+ t = new RuleBasedTransliterator(ID, (TransliterationRuleData*)parser.dataVector.orphanElementAt(0), TRUE);
+ }
+ else if (parser.idBlockVector.size() == 1 && parser.dataVector.size() == 0) {
+ // idBlock, no data -- this is an alias. The ID has
+ // been munged from reverse into forward mode, if
+ // necessary, so instantiate the ID in the forward
+ // direction.
+ if (parser.compoundFilter != NULL) {
+ UnicodeString filterPattern;
+ parser.compoundFilter->toPattern(filterPattern, FALSE);
+ t = createInstance(filterPattern + UnicodeString(ID_DELIM)
+ + *((UnicodeString*)parser.idBlockVector.elementAt(0)), UTRANS_FORWARD, parseError, status);
}
- /* test for NULL */
- if (t == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
+ else
+ t = createInstance(*((UnicodeString*)parser.idBlockVector.elementAt(0)), UTRANS_FORWARD, parseError, status);
+
+
+ if (t != NULL) {
+ t->setID(ID);
}
- } else {
- if (parser.data == NULL) {
- // idBlock, no data -- this is an alias. The ID has
- // been munged from reverse into forward mode, if
- // necessary, so instantiate the ID in the forward
- // direction.
- t = createInstance(parser.idBlock, UTRANS_FORWARD, parseError, status);
- if (t != NULL) {
- t->setID(ID);
- }
- } else {
- // idBlock and data -- this is a compound
- // RBT
- UnicodeString id((UChar)0x005F); // '_'
- t = new RuleBasedTransliterator(id, parser.orphanData(), TRUE); // TRUE == adopt data object
- /* test for NULL */
- if (t == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- t = new CompoundTransliterator(ID, parser.idBlock, parser.idSplitPoint,
- t, status);
- /* test for NULL */
- if (t == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- if (U_FAILURE(status)) {
- delete t;
- t = 0;
+ }
+ else {
+ UVector transliterators(status);
+ int32_t passNumber = 1;
+
+ int32_t limit = parser.idBlockVector.size();
+ if (parser.dataVector.size() > limit)
+ limit = parser.dataVector.size();
+
+ for (int32_t i = 0; i < limit; i++) {
+ if (i < parser.idBlockVector.size()) {
+ UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i);
+ if (!idBlock->isEmpty()) {
+ Transliterator* temp = createInstance(*idBlock, UTRANS_FORWARD, parseError, status);
+ if (temp != NULL && typeid(*temp) != typeid(NullTransliterator))
+ transliterators.addElement(temp, status);
+ else
+ delete temp;
+ }
}
- if (parser.compoundFilter != NULL) {
- t->adoptFilter(parser.orphanCompoundFilter());
+ if (!parser.dataVector.isEmpty()) {
+ TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
+ // TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")?
+ RuleBasedTransliterator* temprbt = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++),
+ data, TRUE);
+ // Check if NULL before adding it to transliterators to avoid future usage of NULL pointer.
+ if (temprbt == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return t;
+ }
+ transliterators.addElement(temprbt, status);
}
- return t;
}
- }
+ t = new CompoundTransliterator(transliterators, passNumber - 1, parseError, status);
+ // Null pointer check
+ if (t != NULL) {
+ t->setID(ID);
+ t->adoptFilter(parser.orphanCompoundFilter());
+ }
+ }
+ if (U_SUCCESS(status) && t == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
return t;
}
if (!ICU_Utility::escapeUnprintable(rulesSource, c)) {
rulesSource.append(c);
}
- i += UTF_CHAR_LENGTH(c);
+ i += U16_LENGTH(c);
}
} else {
rulesSource = getID();
}
int32_t Transliterator::countElements() const {
- return (this->getDynamicClassID() ==
- CompoundTransliterator::getStaticClassID()) ?
- ((const CompoundTransliterator*) this)->getCount() : 0;
+ const CompoundTransliterator* ct = dynamic_cast<const CompoundTransliterator*>(this);
+ return ct != NULL ? ct->getCount() : 0;
}
const Transliterator& Transliterator::getElement(int32_t index, UErrorCode& ec) const {
if (U_FAILURE(ec)) {
return *this;
}
- const CompoundTransliterator* cpd =
- (this->getDynamicClassID() == CompoundTransliterator::getStaticClassID()) ?
- (const CompoundTransliterator*) this : 0;
+ const CompoundTransliterator* cpd = dynamic_cast<const CompoundTransliterator*>(this);
int32_t n = (cpd == NULL) ? 1 : cpd->getCount();
if (index < 0 || index >= n) {
ec = U_INDEX_OUTOFBOUNDS_ERROR;
UnicodeSet& Transliterator::getSourceSet(UnicodeSet& result) const {
handleGetSourceSet(result);
if (filter != NULL) {
- UnicodeSet* filterSet;
- UBool deleteFilterSet = FALSE;
- // Most, but not all filters will be UnicodeSets. Optimize for
- // the high-runner case.
- if (filter->getDynamicClassID() == UnicodeSet::getStaticClassID()) {
- filterSet = (UnicodeSet*) filter;
- } else {
- filterSet = new UnicodeSet();
- deleteFilterSet = TRUE;
- filter->addMatchSetTo(*filterSet);
- }
- result.retainAll(*filterSet);
- if (deleteFilterSet) {
- delete filterSet;
- }
+ UnicodeSet* filterSet = dynamic_cast<UnicodeSet*>(filter);
+ UBool deleteFilterSet = FALSE;
+ // Most, but not all filters will be UnicodeSets. Optimize for
+ // the high-runner case.
+ if (filterSet == NULL) {
+ filterSet = new UnicodeSet();
+ // Check null pointer
+ if (filterSet == NULL) {
+ return result;
+ }
+ deleteFilterSet = TRUE;
+ filter->addMatchSetTo(*filterSet);
+ }
+ result.retainAll(*filterSet);
+ if (deleteFilterSet) {
+ delete filterSet;
+ }
}
return result;
}
void U_EXPORT2 Transliterator::registerFactory(const UnicodeString& id,
Transliterator::Factory factory,
Transliterator::Token context) {
- umtx_init(®istryMutex);
- Mutex lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ Mutex lock(registryMutex());
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
_registerFactory(id, factory, context);
}
}
void Transliterator::_registerFactory(const UnicodeString& id,
Transliterator::Factory factory,
Transliterator::Token context) {
- registry->put(id, factory, context, TRUE);
+ UErrorCode ec = U_ZERO_ERROR;
+ registry->put(id, factory, context, TRUE, ec);
}
// To be called only by Transliterator subclasses that are called
* @see #unregister
*/
void U_EXPORT2 Transliterator::registerInstance(Transliterator* adoptedPrototype) {
- umtx_init(®istryMutex);
- Mutex lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ Mutex lock(registryMutex());
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
_registerInstance(adoptedPrototype);
}
}
void Transliterator::_registerInstance(Transliterator* adoptedPrototype) {
- registry->put(adoptedPrototype, TRUE);
+ UErrorCode ec = U_ZERO_ERROR;
+ registry->put(adoptedPrototype, TRUE, ec);
+}
+
+void U_EXPORT2 Transliterator::registerAlias(const UnicodeString& aliasID,
+ const UnicodeString& realID) {
+ Mutex lock(registryMutex());
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
+ _registerAlias(aliasID, realID);
+ }
+}
+
+void Transliterator::_registerAlias(const UnicodeString& aliasID,
+ const UnicodeString& realID) {
+ UErrorCode ec = U_ZERO_ERROR;
+ registry->put(aliasID, realID, FALSE, TRUE, ec);
}
/**
* Unregisters a transliterator or class. This may be either
* a system transliterator or a user transliterator or class.
- *
+ *
* @param ID the ID of the transliterator or class
* @see #registerInstance
*/
void U_EXPORT2 Transliterator::unregister(const UnicodeString& ID) {
- umtx_init(®istryMutex);
- Mutex lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ Mutex lock(registryMutex());
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
registry->remove(ID);
}
}
* i from 0 to countAvailableIDs() - 1.
*/
int32_t U_EXPORT2 Transliterator::countAvailableIDs(void) {
- umtx_init(®istryMutex);
- Mutex lock(®istryMutex);
- return HAVE_REGISTRY ? registry->countAvailableIDs() : 0;
+ int32_t retVal = 0;
+ Mutex lock(registryMutex());
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
+ retVal = registry->countAvailableIDs();
+ }
+ return retVal;
}
/**
*/
const UnicodeString& U_EXPORT2 Transliterator::getAvailableID(int32_t index) {
const UnicodeString* result = NULL;
- umtx_init(®istryMutex);
- umtx_lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ umtx_lock(registryMutex());
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
result = ®istry->getAvailableID(index);
}
- umtx_unlock(®istryMutex);
+ umtx_unlock(registryMutex());
U_ASSERT(result != NULL); // fail if no registry
return *result;
}
StringEnumeration* U_EXPORT2 Transliterator::getAvailableIDs(UErrorCode& ec) {
if (U_FAILURE(ec)) return NULL;
StringEnumeration* result = NULL;
- umtx_init(®istryMutex);
- umtx_lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ umtx_lock(registryMutex());
+ if (HAVE_REGISTRY(ec)) {
result = registry->getAvailableIDs();
}
- umtx_unlock(®istryMutex);
+ umtx_unlock(registryMutex());
if (result == NULL) {
ec = U_INTERNAL_TRANSLITERATOR_ERROR;
}
}
int32_t U_EXPORT2 Transliterator::countAvailableSources(void) {
- umtx_init(®istryMutex);
- Mutex lock(®istryMutex);
- return HAVE_REGISTRY ? _countAvailableSources() : 0;
+ Mutex lock(registryMutex());
+ UErrorCode ec = U_ZERO_ERROR;
+ return HAVE_REGISTRY(ec) ? _countAvailableSources() : 0;
}
UnicodeString& U_EXPORT2 Transliterator::getAvailableSource(int32_t index,
UnicodeString& result) {
- umtx_init(®istryMutex);
- Mutex lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ Mutex lock(registryMutex());
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
_getAvailableSource(index, result);
}
return result;
}
int32_t U_EXPORT2 Transliterator::countAvailableTargets(const UnicodeString& source) {
- umtx_init(®istryMutex);
- Mutex lock(®istryMutex);
- return HAVE_REGISTRY ? _countAvailableTargets(source) : 0;
+ Mutex lock(registryMutex());
+ UErrorCode ec = U_ZERO_ERROR;
+ return HAVE_REGISTRY(ec) ? _countAvailableTargets(source) : 0;
}
UnicodeString& U_EXPORT2 Transliterator::getAvailableTarget(int32_t index,
const UnicodeString& source,
UnicodeString& result) {
- umtx_init(®istryMutex);
- Mutex lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ Mutex lock(registryMutex());
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
_getAvailableTarget(index, source, result);
}
return result;
int32_t U_EXPORT2 Transliterator::countAvailableVariants(const UnicodeString& source,
const UnicodeString& target) {
- umtx_init(®istryMutex);
- Mutex lock(®istryMutex);
- return HAVE_REGISTRY ? _countAvailableVariants(source, target) : 0;
+ Mutex lock(registryMutex());
+ UErrorCode ec = U_ZERO_ERROR;
+ return HAVE_REGISTRY(ec) ? _countAvailableVariants(source, target) : 0;
}
UnicodeString& U_EXPORT2 Transliterator::getAvailableVariant(int32_t index,
const UnicodeString& source,
const UnicodeString& target,
UnicodeString& result) {
- umtx_init(®istryMutex);
- Mutex lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ Mutex lock(registryMutex());
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
_getAvailableVariant(index, source, target, result);
}
return result;
* and return TRUE. If the registry cannot be initialized, return
* FALSE (rare).
*
- * IMPORTANT: Upon entry, registryMutex must be LOCKED. The entirely
+ * IMPORTANT: Upon entry, registryMutex must be LOCKED. The entire
* initialization is done with the lock held. There is NO REASON to
* unlock, since no other thread that is waiting on the registryMutex
* cannot itself proceed until the registry is initialized.
*/
-UBool Transliterator::initializeRegistry() {
+UBool Transliterator::initializeRegistry(UErrorCode &status) {
if (registry != 0) {
return TRUE;
}
- UErrorCode status = U_ZERO_ERROR;
-
registry = new TransliteratorRegistry(status);
if (registry == 0 || U_FAILURE(status)) {
delete registry;
* <id> is the ID of the system transliterator being defined. These
* are public IDs enumerated by Transliterator.getAvailableIDs(),
* unless the second field is "internal".
- *
+ *
* <resource> is a ResourceReader resource name. Currently these refer
* to file names under com/ibm/text/resources. This string is passed
* directly to ResourceReader, together with <encoding>.
- *
+ *
* <direction> is either "FORWARD" or "REVERSE".
- *
+ *
* <getInstanceArg> is a string to be passed directly to
* Transliterator.getInstance(). The returned Transliterator object
* then has its ID changed to <id> and is returned.
*/
//static const char translit_index[] = "translit_index";
- UResourceBundle *bundle, *transIDs, *colBund;
- bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open root bundle*/, &status);
- transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status);
-
- int32_t row, maxRows;
+ UResourceBundle *bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open default locale*/, &status);
+ UResourceBundle *transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status);
if (U_SUCCESS(status)) {
- maxRows = ures_getSize(transIDs);
+ UResourceBundle *colBund = NULL;
+ UResourceBundle* res = NULL;
+ int32_t row, maxRows = ures_getSize(transIDs);
for (row = 0; row < maxRows; row++) {
- colBund = ures_getByIndex(transIDs, row, 0, &status);
- if (U_SUCCESS(status)) {
- UnicodeString id(ures_getKey(colBund));
- UResourceBundle* res = ures_getNextResource(colBund, NULL, &status);
- const char* typeStr = ures_getKey(res);
- UChar type;
- u_charsToUChars(typeStr, &type, 1);
-
- if (U_SUCCESS(status)) {
- switch (type) {
- case 0x66: // 'f'
- case 0x69: // 'i'
- // 'file' or 'internal';
- // row[2]=resource, row[3]=direction
- {
-
- UnicodeString resString = ures_getUnicodeStringByKey(res, "resource", &status);
- UBool visible = (type == 0x0066 /*f*/);
- UTransDirection dir =
- (ures_getUnicodeStringByKey(res, "direction", &status).charAt(0) ==
- 0x0046 /*F*/) ?
- UTRANS_FORWARD : UTRANS_REVERSE;
- registry->put(id, resString, dir, visible);
- }
- break;
- case 0x61: // 'a'
- // 'alias'; row[2]=createInstance argument
- UnicodeString resString = ures_getUnicodeString(res, &status);
- registry->put(id, resString, TRUE);
- break;
+ colBund = ures_getByIndex(transIDs, row, colBund, &status);
+ if (U_FAILURE(status)) {
+ break;
+ }
+ const char *tridKey = ures_getKey(colBund);
+ if (tridKey == NULL || uprv_strstr(tridKey, "-t-") != NULL) {
+ continue; // Apple version should not get any of these, eliminated the root.txt entries
+ }
+ res = ures_getNextResource(colBund, res, &status);
+ if (U_FAILURE(status)) {
+ break;
+ }
+ UnicodeString trID(tridKey, -1, US_INV);
+ const char* typeStr = ures_getKey(res);
+ int32_t len = 0, dlen = 0;
+ UBool visible = FALSE;
+ const UChar *resString;
+ switch (typeStr[0]) {
+ case 'f': // "file"
+ visible = TRUE;
+ // FALLTHROUGH
+ case 'i': // "internal" => visible = FALSE
+ // child resources are resource and direction
+ {
+ resString = ures_getStringByKey(res, "resource", &len, &status);
+ const UChar* dirString = ures_getStringByKey(res, "direction", &dlen, &status);
+ UTransDirection dir = (dlen <= 0 || dirString[0] == 0x0046 /*F*/)? UTRANS_FORWARD : UTRANS_REVERSE;
+ registry->put(trID, UnicodeString(TRUE, resString, len), dir, TRUE, visible, status);
}
- }
- ures_close(res);
+ break;
+ case 'a': // "alias", string argument is alias
+ resString = ures_getString(res, &len, &status);
+ registry->put(trID, UnicodeString(TRUE, resString, len), TRUE, TRUE, status);
+ break;
+ default: // do nothing
+ break;
}
- ures_close(colBund);
}
+ ures_close(res);
+ ures_close(colBund);
}
ures_close(transIDs);
// cache. This is how new non-rule-based transliterators are
// added to the system.
- registry->put(new NullTransliterator(), TRUE);
- registry->put(new LowercaseTransliterator(), TRUE);
- registry->put(new UppercaseTransliterator(), TRUE);
- registry->put(new TitlecaseTransliterator(), TRUE);
- registry->put(new UnicodeNameTransliterator(), TRUE);
- registry->put(new NameUnicodeTransliterator(), TRUE);
+ // This is to allow for null pointer check
+ NullTransliterator* tempNullTranslit = new NullTransliterator();
+ LowercaseTransliterator* tempLowercaseTranslit = new LowercaseTransliterator();
+ UppercaseTransliterator* tempUppercaseTranslit = new UppercaseTransliterator();
+ TitlecaseTransliterator* tempTitlecaseTranslit = new TitlecaseTransliterator();
+ UnicodeNameTransliterator* tempUnicodeTranslit = new UnicodeNameTransliterator();
+ NameUnicodeTransliterator* tempNameUnicodeTranslit = new NameUnicodeTransliterator();
+#if !UCONFIG_NO_BREAK_ITERATION
+ // TODO: could or should these transliterators be referenced polymorphically once constructed?
+ BreakTransliterator* tempBreakTranslit = new BreakTransliterator();
+#endif
+ // Check for null pointers
+ if (tempNullTranslit == NULL || tempLowercaseTranslit == NULL || tempUppercaseTranslit == NULL ||
+ tempTitlecaseTranslit == NULL || tempUnicodeTranslit == NULL ||
+#if !UCONFIG_NO_BREAK_ITERATION
+ tempBreakTranslit == NULL ||
+#endif
+ tempNameUnicodeTranslit == NULL )
+ {
+ delete tempNullTranslit;
+ delete tempLowercaseTranslit;
+ delete tempUppercaseTranslit;
+ delete tempTitlecaseTranslit;
+ delete tempUnicodeTranslit;
+ delete tempNameUnicodeTranslit;
+#if !UCONFIG_NO_BREAK_ITERATION
+ delete tempBreakTranslit;
+#endif
+ // Since there was an error, remove registry
+ delete registry;
+ registry = NULL;
+
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+
+ registry->put(tempNullTranslit, TRUE, status);
+ registry->put(tempLowercaseTranslit, TRUE, status);
+ registry->put(tempUppercaseTranslit, TRUE, status);
+ registry->put(tempTitlecaseTranslit, TRUE, status);
+ registry->put(tempUnicodeTranslit, TRUE, status);
+ registry->put(tempNameUnicodeTranslit, TRUE, status);
+#if !UCONFIG_NO_BREAK_ITERATION
+ registry->put(tempBreakTranslit, FALSE, status); // FALSE means invisible.
+#endif
RemoveTransliterator::registerIDs(); // Must be within mutex
EscapeTransliterator::registerIDs();
NormalizationTransliterator::registerIDs();
AnyTransliterator::registerIDs();
- _registerSpecialInverse(NullTransliterator::SHORT_ID,
- NullTransliterator::SHORT_ID, FALSE);
+ _registerSpecialInverse(UNICODE_STRING_SIMPLE("Null"),
+ UNICODE_STRING_SIMPLE("Null"), FALSE);
_registerSpecialInverse(UNICODE_STRING_SIMPLE("Upper"),
UNICODE_STRING_SIMPLE("Lower"), TRUE);
_registerSpecialInverse(UNICODE_STRING_SIMPLE("Title"),
UNICODE_STRING_SIMPLE("Lower"), FALSE);
- ucln_i18n_registerCleanup(UCLN_I18N_TRANSLITERATOR, transliterator_cleanup);
+ ucln_i18n_registerCleanup(UCLN_I18N_TRANSLITERATOR, utrans_transliterator_cleanup);
return TRUE;
}
U_NAMESPACE_END
-// Defined in ucln_in.h:
+// Defined in transreg.h:
/**
* Release all static memory held by transliterator. This will
* necessarily invalidate any rule-based transliterators held by the
* user, because RBTs hold pointers to common data objects.
*/
-U_CFUNC UBool transliterator_cleanup(void) {
+U_CFUNC UBool utrans_transliterator_cleanup(void) {
+ U_NAMESPACE_USE
TransliteratorIDParser::cleanup();
if (registry) {
delete registry;
registry = NULL;
}
- umtx_destroy(®istryMutex);
return TRUE;
}