+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
-**********************************************************************
-* Copyright (C) 1999-2003, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Date Name Description
-* 11/17/99 aliu Creation.
-**********************************************************************
-*/
+ **********************************************************************
+ * Copyright (C) 1999-2016, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ * Date Name Description
+ * 11/17/99 aliu Creation.
+ **********************************************************************
+ */
+
+#include "utypeinfo.h" // for 'typeid' to work
#include "unicode/utypes.h"
#include "unicode/rep.h"
#include "unicode/resbund.h"
#include "unicode/unifilt.h"
-#include "unicode/unifltlg.h"
#include "unicode/uniset.h"
#include "unicode/uscript.h"
+#include "unicode/strenum.h"
+#include "unicode/utf16.h"
#include "cpdtrans.h"
#include "nultrans.h"
#include "rbt_data.h"
#include "tolowtrn.h"
#include "toupptrn.h"
#include "uni2name.h"
+#include "brktrans.h"
#include "esctrn.h"
#include "unesctrn.h"
#include "tridpars.h"
#include "uassert.h"
#include "cmemory.h"
#include "cstring.h"
+#include "uinvchar.h"
static const UChar TARGET_SEP = 0x002D; /*-*/
static const UChar ID_DELIM = 0x003B; /*;*/
/**
* The mutex controlling access to registry object.
*/
-static UMTX registryMutex = 0;
+static UMutex registryMutex = U_MUTEX_INITIALIZER;
/**
* System transliterator registry; non-null when initialized.
*/
-static TransliteratorRegistry* registry = 0;
+static icu::TransliteratorRegistry* registry = 0;
// Macro to check/initialize the registry. ONLY USE WITHIN
// MUTEX. Avoids function call when registry is initialized.
-#define HAVE_REGISTRY (registry!=0 || initializeRegistry())
-
-// Empty string
-static const UChar EMPTY[] = {0}; //""
+#define HAVE_REGISTRY(status) (registry!=0 || initializeRegistry(status))
U_NAMESPACE_BEGIN
-/**
- * Class identifier for subclasses of Transliterator that do not
- * define their class (anonymous subclasses).
- */
-const char Transliterator::fgClassID = 0; // Value is irrelevant
+UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(Transliterator)
/**
* Return TRUE if the given UTransPosition is valid for text of
* the given length.
*/
-inline UBool positionIsValid(UTransPosition& index, int32_t len) {
+static inline UBool positionIsValid(UTransPosition& index, int32_t len) {
return !(index.contextStart < 0 ||
index.start < index.contextStart ||
index.limit < index.start ||
Transliterator::Transliterator(const UnicodeString& theID,
UnicodeFilter* adoptedFilter) :
UObject(), ID(theID), filter(adoptedFilter),
- maximumContextLength(0) {}
+ maximumContextLength(0)
+{
+ // NUL-terminate the ID string, which is a non-aliased copy.
+ ID.append((UChar)0);
+ ID.truncate(ID.length()-1);
+}
/**
* Destructor.
*/
Transliterator::~Transliterator() {
- delete filter;
+ if (filter) {
+ delete filter;
+ }
}
/**
*/
Transliterator::Transliterator(const Transliterator& other) :
UObject(other), ID(other.ID), filter(0),
- maximumContextLength(other.maximumContextLength) {
+ maximumContextLength(other.maximumContextLength)
+{
+ // NUL-terminate the ID string, which is a non-aliased copy.
+ ID.append((UChar)0);
+ ID.truncate(ID.length()-1);
+
if (other.filter != 0) {
// We own the filter, so we must have our own copy
filter = (UnicodeFilter*) other.filter->clone();
}
}
+Transliterator* Transliterator::clone() const {
+ return NULL;
+}
+
/**
* Assignment operator.
*/
Transliterator& Transliterator::operator=(const Transliterator& other) {
ID = other.ID;
+ // NUL-terminate the ID string
+ ID.getTerminatedBuffer();
+
maximumContextLength = other.maximumContextLength;
adoptFilter((other.filter == 0) ? 0 : (UnicodeFilter*) other.filter->clone());
return *this;
}
if (index.limit > 0 &&
- UTF_IS_LEAD(text.charAt(index.limit - 1))) {
+ U16_IS_LEAD(text.charAt(index.limit - 1))) {
// Oops, there is a dangling lead surrogate in the buffer.
// This will break most transliterators, since they will
// assume it is part of a pair. Don't transliterate until
int32_t n = getMaximumContextLength();
while (newCS > originalStart && n-- > 0) {
--newCS;
- newCS -= UTF_CHAR_LENGTH(text.char32At(newCS)) - 1;
+ newCS -= U16_LENGTH(text.char32At(newCS)) - 1;
}
index.contextStart = uprv_max(newCS, originalStart);
#endif
UChar32 c;
while (index.start < globalLimit &&
!filter->contains(c=text.char32At(index.start))) {
- index.start += UTF_CHAR_LENGTH(c);
+ index.start += U16_LENGTH(c);
}
// Find the end of this run of unfiltered chars
index.limit = index.start;
while (index.limit < globalLimit &&
filter->contains(c=text.char32At(index.limit))) {
- index.limit += UTF_CHAR_LENGTH(c);
+ index.limit += U16_LENGTH(c);
}
}
// transliterations and commit complete transliterations.
for (;;) {
// Length of additional code point, either one or two
- int32_t charLength =
- UTF_CHAR_LENGTH(text.char32At(passLimit));
+ int32_t charLength = U16_LENGTH(text.char32At(passLimit));
passLimit += charLength;
if (passLimit > runLimit) {
break;
int32_t rs = rollbackStart + delta - (index.limit - passStart);
// Delete the partially transliterated text
- text.handleReplaceBetween(passStart, index.limit, EMPTY);
+ text.handleReplaceBetween(passStart, index.limit, UnicodeString());
// Copy the rollback text back
text.copy(rs, rs + uncommittedLength, passStart);
globalLimit += totalDelta;
// Delete the rollback copy
- text.handleReplaceBetween(rollbackOrigin, rollbackOrigin + runLength, EMPTY);
+ text.handleReplaceBetween(rollbackOrigin, rollbackOrigin + runLength, UnicodeString());
// Move start past committed text
index.start = passStart;
* display to the user in the default locale. See {@link
* #getDisplayName(Locale)} for details.
*/
-UnicodeString& Transliterator::getDisplayName(const UnicodeString& ID,
+UnicodeString& U_EXPORT2 Transliterator::getDisplayName(const UnicodeString& ID,
UnicodeString& result) {
return getDisplayName(ID, Locale::getDefault(), result);
}
* localized.
* @see java.text.MessageFormat
*/
-UnicodeString& Transliterator::getDisplayName(const UnicodeString& id,
+UnicodeString& U_EXPORT2 Transliterator::getDisplayName(const UnicodeString& id,
const Locale& inLocale,
UnicodeString& result) {
UErrorCode status = U_ZERO_ERROR;
- ResourceBundle bundle(u_getDataDirectory(), inLocale, status);
+ ResourceBundle bundle(U_ICUDATA_TRANSLIT, inLocale, status);
// Suspend checking status until later...
ID.append(TARGET_SEP).append(target).append(variant);
// build the char* key
- char key[200];
- uprv_strcpy(key, RB_DISPLAY_NAME_PREFIX);
- int32_t length=(int32_t)uprv_strlen(RB_DISPLAY_NAME_PREFIX);
- ID.extract(0, (int32_t)(sizeof(key)-length), key+length, "");
+ if (uprv_isInvariantUString(ID.getBuffer(), ID.length())) {
+ char key[200];
+ uprv_strcpy(key, RB_DISPLAY_NAME_PREFIX);
+ int32_t length=(int32_t)uprv_strlen(RB_DISPLAY_NAME_PREFIX);
+ ID.extract(0, (int32_t)(sizeof(key)-length), key+length, (int32_t)(sizeof(key)-length), US_INV);
- // Try to retrieve a UnicodeString from the bundle.
- UnicodeString resString = bundle.getStringEx(key, status);
+ // Try to retrieve a UnicodeString from the bundle.
+ UnicodeString resString = bundle.getStringEx(key, status);
- if (U_SUCCESS(status) && resString.length() != 0) {
- return result = resString; // [sic] assign & return
- }
+ if (U_SUCCESS(status) && resString.length() != 0) {
+ return result = resString; // [sic] assign & return
+ }
#if !UCONFIG_NO_FORMATTING
- // We have failed to get a name from the locale data. This is
- // typical, since most transliterators will not have localized
- // name data. The next step is to retrieve the MessageFormat
- // pattern from the locale data and to use it to synthesize the
- // name from the ID.
-
- status = U_ZERO_ERROR;
- resString = bundle.getStringEx(RB_DISPLAY_NAME_PATTERN, status);
-
- if (U_SUCCESS(status) && resString.length() != 0) {
- MessageFormat msg(resString, inLocale, status);
- // Suspend checking status until later...
-
- // We pass either 2 or 3 Formattable objects to msg.
- Formattable args[3];
- int32_t nargs;
- args[0].setLong(2); // # of args to follow
- args[1].setString(source);
- args[2].setString(target);
- nargs = 3;
-
- // Use display names for the scripts, if they exist
- UnicodeString s;
- length=(int32_t)uprv_strlen(RB_SCRIPT_DISPLAY_NAME_PREFIX);
- for (int j=1; j<=2; ++j) {
- status = U_ZERO_ERROR;
- uprv_strcpy(key, RB_SCRIPT_DISPLAY_NAME_PREFIX);
- args[j].getString(s);
- s.extract(0, sizeof(key)-length-1, key+length, "");
+ // We have failed to get a name from the locale data. This is
+ // typical, since most transliterators will not have localized
+ // name data. The next step is to retrieve the MessageFormat
+ // pattern from the locale data and to use it to synthesize the
+ // name from the ID.
- resString = bundle.getStringEx(key, status);
+ status = U_ZERO_ERROR;
+ resString = bundle.getStringEx(RB_DISPLAY_NAME_PATTERN, status);
+
+ if (U_SUCCESS(status) && resString.length() != 0) {
+ MessageFormat msg(resString, inLocale, status);
+ // Suspend checking status until later...
+
+ // We pass either 2 or 3 Formattable objects to msg.
+ Formattable args[3];
+ int32_t nargs;
+ args[0].setLong(2); // # of args to follow
+ args[1].setString(source);
+ args[2].setString(target);
+ nargs = 3;
+
+ // Use display names for the scripts, if they exist
+ UnicodeString s;
+ length=(int32_t)uprv_strlen(RB_SCRIPT_DISPLAY_NAME_PREFIX);
+ for (int j=1; j<=2; ++j) {
+ status = U_ZERO_ERROR;
+ uprv_strcpy(key, RB_SCRIPT_DISPLAY_NAME_PREFIX);
+ args[j].getString(s);
+ if (uprv_isInvariantUString(s.getBuffer(), s.length())) {
+ s.extract(0, sizeof(key)-length-1, key+length, (int32_t)sizeof(key)-length-1, US_INV);
+
+ resString = bundle.getStringEx(key, status);
+
+ if (U_SUCCESS(status)) {
+ args[j] = resString;
+ }
+ }
+ }
+ status = U_ZERO_ERROR;
+ FieldPosition pos; // ignored by msg
+ msg.format(args, nargs, result, pos, status);
if (U_SUCCESS(status)) {
- args[j] = resString;
+ result.append(variant);
+ return result;
}
}
-
- status = U_ZERO_ERROR;
- FieldPosition pos; // ignored by msg
- msg.format(args, nargs, result, pos, status);
- if (U_SUCCESS(status)) {
- result.append(variant);
- return result;
- }
- }
#endif
+ }
// We should not reach this point unless there is something
// wrong with the build or the RB_DISPLAY_NAME_PATTERN has
return Transliterator::createInstance(ID, UTRANS_REVERSE,parseError,status);
}
-Transliterator* Transliterator::createInstance(const UnicodeString& ID,
- UTransDirection dir,
- UErrorCode& status) {
+Transliterator* U_EXPORT2
+Transliterator::createInstance(const UnicodeString& ID,
+ UTransDirection dir,
+ UErrorCode& status)
+{
UParseError parseError;
return createInstance(ID, dir, parseError, status);
}
* @see #getAvailableIDs
* @see #getID
*/
-Transliterator* Transliterator::createInstance(const UnicodeString& ID,
- UTransDirection dir,
- UParseError& parseError,
- UErrorCode& status) {
+Transliterator* U_EXPORT2
+Transliterator::createInstance(const UnicodeString& ID,
+ UTransDirection dir,
+ UParseError& parseError,
+ UErrorCode& status)
+{
if (U_FAILURE(status)) {
return 0;
}
return NULL;
}
- TransliteratorIDParser::instantiateList(list, NULL, -1, status);
+ TransliteratorIDParser::instantiateList(list, status);
if (U_FAILURE(status)) {
return NULL;
}
U_ASSERT(list.size() > 0);
Transliterator* t = NULL;
- switch (list.size()) {
- case 1:
- t = (Transliterator*) list.elementAt(0);
- break;
- default:
+
+ if (list.size() > 1 || canonID.indexOf(ID_DELIM) >= 0) {
+ // [NOTE: If it's a compoundID, we instantiate a CompoundTransliterator even if it only
+ // has one child transliterator. This is so that toRules() will return the right thing
+ // (without any inactive ID), but our main ID still comes out correct. That is, if we
+ // instantiate "(Lower);Latin-Greek;", we want the rules to come out as "::Latin-Greek;"
+ // even though the ID is "(Lower);Latin-Greek;".
t = new CompoundTransliterator(list, parseError, status);
- /* test for NULL */
- if (t == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- if (U_FAILURE(status)) {
- delete t;
- return NULL;
+ }
+ else {
+ t = (Transliterator*)list.elementAt(0);
+ }
+ // Check null pointer
+ if (t != NULL) {
+ t->setID(canonID);
+ if (globalFilter != NULL) {
+ t->adoptFilter(globalFilter);
}
- break;
}
- t->setID(canonID);
- if (globalFilter != NULL) {
- t->adoptFilter(globalFilter);
+ else if (U_SUCCESS(status)) {
+ status = U_MEMORY_ALLOCATION_ERROR;
}
return t;
}
UErrorCode ec = U_ZERO_ERROR;
TransliteratorAlias* alias = 0;
Transliterator* t = 0;
-
- umtx_init(®istryMutex);
+
umtx_lock(®istryMutex);
- if (HAVE_REGISTRY) {
- t = registry->get(id, alias, pe, ec);
+ if (HAVE_REGISTRY(ec)) {
+ t = registry->get(id, alias, ec);
}
umtx_unlock(®istryMutex);
if (U_FAILURE(ec)) {
delete t;
delete alias;
- return NULL;
+ return 0;
}
- if (alias != 0) {
- // Instantiate an alias
+ // We may have not gotten a transliterator: Because we can't
+ // instantiate a transliterator from inside TransliteratorRegistry::
+ // get() (that would deadlock), we sometimes pass back an alias. This
+ // contains the data we need to finish the instantiation outside the
+ // registry mutex. The alias may, in turn, generate another alias, so
+ // we handle aliases in a loop. The max times through the loop is two.
+ // [alan]
+ while (alias != 0) {
U_ASSERT(t==0);
- t = alias->create(pe, ec);
- delete alias;
+ // Rule-based aliases are handled with TransliteratorAlias::
+ // parse(), followed by TransliteratorRegistry::reget().
+ // Other aliases are handled with TransliteratorAlias::create().
+ if (alias->isRuleBased()) {
+ // Step 1. parse
+ TransliteratorParser parser(ec);
+ alias->parse(parser, pe, ec);
+ delete alias;
+ alias = 0;
+
+ // Step 2. reget
+ umtx_lock(®istryMutex);
+ if (HAVE_REGISTRY(ec)) {
+ t = registry->reget(id, parser, alias, ec);
+ }
+ umtx_unlock(®istryMutex);
+
+ // Step 3. Loop back around!
+ } else {
+ t = alias->create(pe, ec);
+ delete alias;
+ alias = 0;
+ break;
+ }
if (U_FAILURE(ec)) {
delete t;
+ delete alias;
t = NULL;
+ break;
}
}
* NullTransliterator, if it contains ID blocks which parse as
* empty for the given direction.
*/
-Transliterator* Transliterator::createFromRules(const UnicodeString& ID,
- const UnicodeString& rules,
- UTransDirection dir,
- UParseError& parseError,
- UErrorCode& status) {
+Transliterator* U_EXPORT2
+Transliterator::createFromRules(const UnicodeString& ID,
+ const UnicodeString& rules,
+ UTransDirection dir,
+ UParseError& parseError,
+ UErrorCode& status)
+{
Transliterator* t = NULL;
- TransliteratorParser parser;
+ TransliteratorParser parser(status);
parser.parse(rules, dir, parseError, status);
if (U_FAILURE(status)) {
}
// NOTE: The logic here matches that in TransliteratorRegistry.
- if (parser.idBlock.length() == 0) {
- if (parser.data == NULL) {
- // No idBlock, no data -- this is just an
- // alias for Null
- t = new NullTransliterator();
- } else {
- // No idBlock, data != 0 -- this is an
- // ordinary RBT_DATA.
- t = new RuleBasedTransliterator(ID, parser.orphanData(), TRUE); // TRUE == adopt data object
+ if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 0) {
+ t = new NullTransliterator();
+ }
+ else if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 1) {
+ t = new RuleBasedTransliterator(ID, (TransliterationRuleData*)parser.dataVector.orphanElementAt(0), TRUE);
+ }
+ else if (parser.idBlockVector.size() == 1 && parser.dataVector.size() == 0) {
+ // idBlock, no data -- this is an alias. The ID has
+ // been munged from reverse into forward mode, if
+ // necessary, so instantiate the ID in the forward
+ // direction.
+ if (parser.compoundFilter != NULL) {
+ UnicodeString filterPattern;
+ parser.compoundFilter->toPattern(filterPattern, FALSE);
+ t = createInstance(filterPattern + UnicodeString(ID_DELIM)
+ + *((UnicodeString*)parser.idBlockVector.elementAt(0)), UTRANS_FORWARD, parseError, status);
}
- /* test for NULL */
- if (t == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
+ else
+ t = createInstance(*((UnicodeString*)parser.idBlockVector.elementAt(0)), UTRANS_FORWARD, parseError, status);
+
+
+ if (t != NULL) {
+ t->setID(ID);
}
- } else {
- if (parser.data == NULL) {
- // idBlock, no data -- this is an alias. The ID has
- // been munged from reverse into forward mode, if
- // necessary, so instantiate the ID in the forward
- // direction.
- t = createInstance(parser.idBlock, UTRANS_FORWARD, parseError, status);
- if (t != NULL) {
- t->setID(ID);
- }
- } else {
- // idBlock and data -- this is a compound
- // RBT
- UnicodeString id("_", "");
- t = new RuleBasedTransliterator(id, parser.orphanData(), TRUE); // TRUE == adopt data object
- /* test for NULL */
- if (t == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- t = new CompoundTransliterator(ID, parser.idBlock, parser.idSplitPoint,
- t, status);
- /* test for NULL */
- if (t == 0) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- if (U_FAILURE(status)) {
- delete t;
- t = 0;
+ }
+ else {
+ UVector transliterators(status);
+ int32_t passNumber = 1;
+
+ int32_t limit = parser.idBlockVector.size();
+ if (parser.dataVector.size() > limit)
+ limit = parser.dataVector.size();
+
+ for (int32_t i = 0; i < limit; i++) {
+ if (i < parser.idBlockVector.size()) {
+ UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i);
+ if (!idBlock->isEmpty()) {
+ Transliterator* temp = createInstance(*idBlock, UTRANS_FORWARD, parseError, status);
+ if (temp != NULL && typeid(*temp) != typeid(NullTransliterator))
+ transliterators.addElement(temp, status);
+ else
+ delete temp;
+ }
}
- if (parser.compoundFilter != NULL) {
- t->adoptFilter(parser.orphanCompoundFilter());
+ if (!parser.dataVector.isEmpty()) {
+ TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
+ // TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")?
+ RuleBasedTransliterator* temprbt = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++),
+ data, TRUE);
+ // Check if NULL before adding it to transliterators to avoid future usage of NULL pointer.
+ if (temprbt == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return t;
+ }
+ transliterators.addElement(temprbt, status);
}
- return t;
}
- }
+ t = new CompoundTransliterator(transliterators, passNumber - 1, parseError, status);
+ // Null pointer check
+ if (t != NULL) {
+ t->setID(ID);
+ t->adoptFilter(parser.orphanCompoundFilter());
+ }
+ }
+ if (U_SUCCESS(status) && t == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
return t;
}
if (!ICU_Utility::escapeUnprintable(rulesSource, c)) {
rulesSource.append(c);
}
- i += UTF_CHAR_LENGTH(c);
+ i += U16_LENGTH(c);
}
} else {
rulesSource = getID();
return rulesSource;
}
+int32_t Transliterator::countElements() const {
+ const CompoundTransliterator* ct = dynamic_cast<const CompoundTransliterator*>(this);
+ return ct != NULL ? ct->getCount() : 0;
+}
+
+const Transliterator& Transliterator::getElement(int32_t index, UErrorCode& ec) const {
+ if (U_FAILURE(ec)) {
+ return *this;
+ }
+ const CompoundTransliterator* cpd = dynamic_cast<const CompoundTransliterator*>(this);
+ int32_t n = (cpd == NULL) ? 1 : cpd->getCount();
+ if (index < 0 || index >= n) {
+ ec = U_INDEX_OUTOFBOUNDS_ERROR;
+ return *this;
+ } else {
+ return (n == 1) ? *this : cpd->getTransliterator(index);
+ }
+}
+
UnicodeSet& Transliterator::getSourceSet(UnicodeSet& result) const {
handleGetSourceSet(result);
if (filter != NULL) {
- UnicodeSet* filterSet;
- UBool deleteFilterSet = FALSE;
- // Most, but not all filters will be UnicodeSets. Optimize for
- // the high-runner case.
- if (filter->getDynamicClassID() == UnicodeSet::getStaticClassID()) {
- filterSet = (UnicodeSet*) filter;
- } else {
- filterSet = new UnicodeSet();
- deleteFilterSet = TRUE;
- filter->addMatchSetTo(*filterSet);
- }
- result.retainAll(*filterSet);
- if (deleteFilterSet) {
- delete filterSet;
- }
+ UnicodeSet* filterSet = dynamic_cast<UnicodeSet*>(filter);
+ UBool deleteFilterSet = FALSE;
+ // Most, but not all filters will be UnicodeSets. Optimize for
+ // the high-runner case.
+ if (filterSet == NULL) {
+ filterSet = new UnicodeSet();
+ // Check null pointer
+ if (filterSet == NULL) {
+ return result;
+ }
+ deleteFilterSet = TRUE;
+ filter->addMatchSetTo(*filterSet);
+ }
+ result.retainAll(*filterSet);
+ if (deleteFilterSet) {
+ delete filterSet;
+ }
}
return result;
}
}
// For public consumption
-void Transliterator::registerFactory(const UnicodeString& id,
+void U_EXPORT2 Transliterator::registerFactory(const UnicodeString& id,
Transliterator::Factory factory,
Transliterator::Token context) {
- umtx_init(®istryMutex);
Mutex lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
_registerFactory(id, factory, context);
}
}
void Transliterator::_registerFactory(const UnicodeString& id,
Transliterator::Factory factory,
Transliterator::Token context) {
- registry->put(id, factory, context, TRUE);
+ UErrorCode ec = U_ZERO_ERROR;
+ registry->put(id, factory, context, TRUE, ec);
}
// To be called only by Transliterator subclasses that are called
void Transliterator::_registerSpecialInverse(const UnicodeString& target,
const UnicodeString& inverseTarget,
UBool bidirectional) {
- TransliteratorIDParser::registerSpecialInverse(target, inverseTarget, bidirectional);
+ UErrorCode status = U_ZERO_ERROR;
+ TransliteratorIDParser::registerSpecialInverse(target, inverseTarget, bidirectional, status);
}
/**
* @see #getInstance
* @see #unregister
*/
-void Transliterator::registerInstance(Transliterator* adoptedPrototype) {
- umtx_init(®istryMutex);
+void U_EXPORT2 Transliterator::registerInstance(Transliterator* adoptedPrototype) {
Mutex lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
_registerInstance(adoptedPrototype);
}
}
void Transliterator::_registerInstance(Transliterator* adoptedPrototype) {
- registry->put(adoptedPrototype, TRUE);
+ UErrorCode ec = U_ZERO_ERROR;
+ registry->put(adoptedPrototype, TRUE, ec);
+}
+
+void U_EXPORT2 Transliterator::registerAlias(const UnicodeString& aliasID,
+ const UnicodeString& realID) {
+ Mutex lock(®istryMutex);
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
+ _registerAlias(aliasID, realID);
+ }
+}
+
+void Transliterator::_registerAlias(const UnicodeString& aliasID,
+ const UnicodeString& realID) {
+ UErrorCode ec = U_ZERO_ERROR;
+ registry->put(aliasID, realID, FALSE, TRUE, ec);
}
/**
* Unregisters a transliterator or class. This may be either
* a system transliterator or a user transliterator or class.
- *
+ *
* @param ID the ID of the transliterator or class
* @see #registerInstance
*/
-void Transliterator::unregister(const UnicodeString& ID) {
- umtx_init(®istryMutex);
+void U_EXPORT2 Transliterator::unregister(const UnicodeString& ID) {
Mutex lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
registry->remove(ID);
}
}
/**
+ * == OBSOLETE - remove in ICU 3.4 ==
* Return the number of IDs currently registered with the system.
* To retrieve the actual IDs, call getAvailableID(i) with
* i from 0 to countAvailableIDs() - 1.
*/
-int32_t Transliterator::countAvailableIDs(void) {
- umtx_init(®istryMutex);
+int32_t U_EXPORT2 Transliterator::countAvailableIDs(void) {
+ int32_t retVal = 0;
Mutex lock(®istryMutex);
- return HAVE_REGISTRY ? registry->countAvailableIDs() : 0;
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
+ retVal = registry->countAvailableIDs();
+ }
+ return retVal;
}
/**
+ * == OBSOLETE - remove in ICU 3.4 ==
* Return the index-th available ID. index must be between 0
* and countAvailableIDs() - 1, inclusive. If index is out of
* range, the result of getAvailableID(0) is returned.
*/
-const UnicodeString& Transliterator::getAvailableID(int32_t index) {
+const UnicodeString& U_EXPORT2 Transliterator::getAvailableID(int32_t index) {
const UnicodeString* result = NULL;
- umtx_init(®istryMutex);
umtx_lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
result = ®istry->getAvailableID(index);
}
umtx_unlock(®istryMutex);
return *result;
}
-int32_t Transliterator::countAvailableSources(void) {
- umtx_init(®istryMutex);
+StringEnumeration* U_EXPORT2 Transliterator::getAvailableIDs(UErrorCode& ec) {
+ if (U_FAILURE(ec)) return NULL;
+ StringEnumeration* result = NULL;
+ umtx_lock(®istryMutex);
+ if (HAVE_REGISTRY(ec)) {
+ result = registry->getAvailableIDs();
+ }
+ umtx_unlock(®istryMutex);
+ if (result == NULL) {
+ ec = U_INTERNAL_TRANSLITERATOR_ERROR;
+ }
+ return result;
+}
+
+int32_t U_EXPORT2 Transliterator::countAvailableSources(void) {
Mutex lock(®istryMutex);
- return HAVE_REGISTRY ? _countAvailableSources() : 0;
+ UErrorCode ec = U_ZERO_ERROR;
+ return HAVE_REGISTRY(ec) ? _countAvailableSources() : 0;
}
-UnicodeString& Transliterator::getAvailableSource(int32_t index,
+UnicodeString& U_EXPORT2 Transliterator::getAvailableSource(int32_t index,
UnicodeString& result) {
- umtx_init(®istryMutex);
Mutex lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
_getAvailableSource(index, result);
}
return result;
}
-int32_t Transliterator::countAvailableTargets(const UnicodeString& source) {
- umtx_init(®istryMutex);
+int32_t U_EXPORT2 Transliterator::countAvailableTargets(const UnicodeString& source) {
Mutex lock(®istryMutex);
- return HAVE_REGISTRY ? _countAvailableTargets(source) : 0;
+ UErrorCode ec = U_ZERO_ERROR;
+ return HAVE_REGISTRY(ec) ? _countAvailableTargets(source) : 0;
}
-UnicodeString& Transliterator::getAvailableTarget(int32_t index,
+UnicodeString& U_EXPORT2 Transliterator::getAvailableTarget(int32_t index,
const UnicodeString& source,
UnicodeString& result) {
- umtx_init(®istryMutex);
Mutex lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
_getAvailableTarget(index, source, result);
}
return result;
}
-int32_t Transliterator::countAvailableVariants(const UnicodeString& source,
+int32_t U_EXPORT2 Transliterator::countAvailableVariants(const UnicodeString& source,
const UnicodeString& target) {
- umtx_init(®istryMutex);
Mutex lock(®istryMutex);
- return HAVE_REGISTRY ? _countAvailableVariants(source, target) : 0;
+ UErrorCode ec = U_ZERO_ERROR;
+ return HAVE_REGISTRY(ec) ? _countAvailableVariants(source, target) : 0;
}
-UnicodeString& Transliterator::getAvailableVariant(int32_t index,
+UnicodeString& U_EXPORT2 Transliterator::getAvailableVariant(int32_t index,
const UnicodeString& source,
const UnicodeString& target,
UnicodeString& result) {
- umtx_init(®istryMutex);
Mutex lock(®istryMutex);
- if (HAVE_REGISTRY) {
+ UErrorCode ec = U_ZERO_ERROR;
+ if (HAVE_REGISTRY(ec)) {
_getAvailableVariant(index, source, target, result);
}
return result;
* and return TRUE. If the registry cannot be initialized, return
* FALSE (rare).
*
- * IMPORTANT: Upon entry, registryMutex must be LOCKED. The entirely
+ * IMPORTANT: Upon entry, registryMutex must be LOCKED. The entire
* initialization is done with the lock held. There is NO REASON to
* unlock, since no other thread that is waiting on the registryMutex
* cannot itself proceed until the registry is initialized.
*/
-UBool Transliterator::initializeRegistry() {
+UBool Transliterator::initializeRegistry(UErrorCode &status) {
if (registry != 0) {
return TRUE;
}
- UErrorCode status = U_ZERO_ERROR;
-
registry = new TransliteratorRegistry(status);
if (registry == 0 || U_FAILURE(status)) {
delete registry;
}
/* The following code parses the index table located in
- * icu/data/translit_index.txt. The index is an n x 4 table
+ * icu/data/translit/root.txt. The index is an n x 4 table
* that follows this format:
- *
- * <id>:file:<resource>:<direction>
- * <id>:internal:<resource>:<direction>
- * <id>:alias:<getInstanceArg>:
- *
+ * <id>{
+ * file{
+ * resource{"<resource>"}
+ * direction{"<direction>"}
+ * }
+ * }
+ * <id>{
+ * internal{
+ * resource{"<resource>"}
+ * direction{"<direction"}
+ * }
+ * }
+ * <id>{
+ * alias{"<getInstanceArg"}
+ * }
* <id> is the ID of the system transliterator being defined. These
* are public IDs enumerated by Transliterator.getAvailableIDs(),
* unless the second field is "internal".
- *
+ *
* <resource> is a ResourceReader resource name. Currently these refer
* to file names under com/ibm/text/resources. This string is passed
* directly to ResourceReader, together with <encoding>.
- *
+ *
* <direction> is either "FORWARD" or "REVERSE".
- *
+ *
* <getInstanceArg> is a string to be passed directly to
* Transliterator.getInstance(). The returned Transliterator object
* then has its ID changed to <id> and is returned.
*
* The extra blank field on "alias" lines is to make the array square.
*/
- static const char translit_index[] = "translit_index";
-
- UResourceBundle *bundle, *transIDs, *colBund;
- bundle = ures_openDirect(0, translit_index, &status);
- transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status);
+ //static const char translit_index[] = "translit_index";
- int32_t row, maxRows;
+ UResourceBundle *bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open default locale*/, &status);
+ UResourceBundle *transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status);
if (U_SUCCESS(status)) {
- maxRows = ures_getSize(transIDs);
+ UResourceBundle *colBund = NULL;
+ UResourceBundle* res = NULL;
+ int32_t row, maxRows = ures_getSize(transIDs);
for (row = 0; row < maxRows; row++) {
- colBund = ures_getByIndex(transIDs, row, 0, &status);
-
- if (U_SUCCESS(status) && ures_getSize(colBund) == 4) {
- UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status);
- UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0);
- UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status);
-
- if (U_SUCCESS(status)) {
- switch (type) {
- case 0x66: // 'f'
- case 0x69: // 'i'
- // 'file' or 'internal';
- // row[2]=resource, row[3]=direction
- {
- UBool visible = (type == 0x0066 /*f*/);
- UTransDirection dir =
- (ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) ==
- 0x0046 /*F*/) ?
- UTRANS_FORWARD : UTRANS_REVERSE;
- registry->put(id, resString, dir, visible);
- }
- break;
- case 0x61: // 'a'
- // 'alias'; row[2]=createInstance argument
- registry->put(id, resString, TRUE);
- break;
+ colBund = ures_getByIndex(transIDs, row, colBund, &status);
+ if (U_FAILURE(status)) {
+ break;
+ }
+ const char *tridKey = ures_getKey(colBund);
+ if (tridKey == NULL || uprv_strstr(tridKey, "-t-") != NULL) {
+ continue; // Apple version should not get any of these, eliminated the root.txt entries
+ }
+ res = ures_getNextResource(colBund, res, &status);
+ if (U_FAILURE(status)) {
+ break;
+ }
+ UnicodeString trID(tridKey, -1, US_INV);
+ const char* typeStr = ures_getKey(res);
+ int32_t len = 0, dlen = 0;
+ UBool visible = FALSE;
+ const UChar *resString;
+ switch (typeStr[0]) {
+ case 'f': // "file"
+ visible = TRUE;
+ // FALLTHROUGH
+ case 'i': // "internal" => visible = FALSE
+ // child resources are resource and direction
+ {
+ resString = ures_getStringByKey(res, "resource", &len, &status);
+ const UChar* dirString = ures_getStringByKey(res, "direction", &dlen, &status);
+ UTransDirection dir = (dlen <= 0 || dirString[0] == 0x0046 /*F*/)? UTRANS_FORWARD : UTRANS_REVERSE;
+ registry->put(trID, UnicodeString(TRUE, resString, len), dir, TRUE, visible, status);
}
- }
+ break;
+ case 'a': // "alias", string argument is alias
+ resString = ures_getString(res, &len, &status);
+ registry->put(trID, UnicodeString(TRUE, resString, len), TRUE, TRUE, status);
+ break;
+ default: // do nothing
+ break;
}
-
- ures_close(colBund);
}
+ ures_close(res);
+ ures_close(colBund);
}
ures_close(transIDs);
// cache. This is how new non-rule-based transliterators are
// added to the system.
- registry->put(new NullTransliterator(), TRUE);
- registry->put(new LowercaseTransliterator(), TRUE);
- registry->put(new UppercaseTransliterator(), TRUE);
- registry->put(new TitlecaseTransliterator(), TRUE);
- registry->put(new UnicodeNameTransliterator(), TRUE);
- registry->put(new NameUnicodeTransliterator(), TRUE);
+ // This is to allow for null pointer check
+ NullTransliterator* tempNullTranslit = new NullTransliterator();
+ LowercaseTransliterator* tempLowercaseTranslit = new LowercaseTransliterator();
+ UppercaseTransliterator* tempUppercaseTranslit = new UppercaseTransliterator();
+ TitlecaseTransliterator* tempTitlecaseTranslit = new TitlecaseTransliterator();
+ UnicodeNameTransliterator* tempUnicodeTranslit = new UnicodeNameTransliterator();
+ NameUnicodeTransliterator* tempNameUnicodeTranslit = new NameUnicodeTransliterator();
+#if !UCONFIG_NO_BREAK_ITERATION
+ // TODO: could or should these transliterators be referenced polymorphically once constructed?
+ BreakTransliterator* tempBreakTranslit = new BreakTransliterator();
+#endif
+ // Check for null pointers
+ if (tempNullTranslit == NULL || tempLowercaseTranslit == NULL || tempUppercaseTranslit == NULL ||
+ tempTitlecaseTranslit == NULL || tempUnicodeTranslit == NULL ||
+#if !UCONFIG_NO_BREAK_ITERATION
+ tempBreakTranslit == NULL ||
+#endif
+ tempNameUnicodeTranslit == NULL )
+ {
+ delete tempNullTranslit;
+ delete tempLowercaseTranslit;
+ delete tempUppercaseTranslit;
+ delete tempTitlecaseTranslit;
+ delete tempUnicodeTranslit;
+ delete tempNameUnicodeTranslit;
+#if !UCONFIG_NO_BREAK_ITERATION
+ delete tempBreakTranslit;
+#endif
+ // Since there was an error, remove registry
+ delete registry;
+ registry = NULL;
+
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+
+ registry->put(tempNullTranslit, TRUE, status);
+ registry->put(tempLowercaseTranslit, TRUE, status);
+ registry->put(tempUppercaseTranslit, TRUE, status);
+ registry->put(tempTitlecaseTranslit, TRUE, status);
+ registry->put(tempUnicodeTranslit, TRUE, status);
+ registry->put(tempNameUnicodeTranslit, TRUE, status);
+#if !UCONFIG_NO_BREAK_ITERATION
+ registry->put(tempBreakTranslit, FALSE, status); // FALSE means invisible.
+#endif
RemoveTransliterator::registerIDs(); // Must be within mutex
EscapeTransliterator::registerIDs();
NormalizationTransliterator::registerIDs();
AnyTransliterator::registerIDs();
- _registerSpecialInverse(NullTransliterator::SHORT_ID,
- NullTransliterator::SHORT_ID, FALSE);
- _registerSpecialInverse("Upper", "Lower", TRUE);
- _registerSpecialInverse("Title", "Lower", FALSE);
+ _registerSpecialInverse(UNICODE_STRING_SIMPLE("Null"),
+ UNICODE_STRING_SIMPLE("Null"), FALSE);
+ _registerSpecialInverse(UNICODE_STRING_SIMPLE("Upper"),
+ UNICODE_STRING_SIMPLE("Lower"), TRUE);
+ _registerSpecialInverse(UNICODE_STRING_SIMPLE("Title"),
+ UNICODE_STRING_SIMPLE("Lower"), FALSE);
- ucln_i18n_registerCleanup();
+ ucln_i18n_registerCleanup(UCLN_I18N_TRANSLITERATOR, utrans_transliterator_cleanup);
return TRUE;
}
U_NAMESPACE_END
-// Defined in ucln_in.h:
+// Defined in transreg.h:
/**
* Release all static memory held by transliterator. This will
* necessarily invalidate any rule-based transliterators held by the
* user, because RBTs hold pointers to common data objects.
*/
-U_CFUNC UBool transliterator_cleanup(void) {
- TitlecaseTransliterator::cleanup();
+U_CFUNC UBool utrans_transliterator_cleanup(void) {
+ U_NAMESPACE_USE
TransliteratorIDParser::cleanup();
if (registry) {
delete registry;
registry = NULL;
}
- umtx_destroy(®istryMutex);
return TRUE;
}