+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
-* Copyright (c) 2001-2004, International Business Machines
+* Copyright (c) 2001-2014, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
#if !UCONFIG_NO_TRANSLITERATION
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
-#include "unicode/parseerr.h"
-#include "unicode/resbund.h"
#include "unicode/translit.h"
+#include "unicode/resbund.h"
#include "unicode/uniset.h"
#include "unicode/uscript.h"
#include "rbt.h"
#include "tridpars.h"
#include "charstr.h"
#include "uassert.h"
+#include "locutil.h"
// Enable the following symbol to add debugging code that tracks the
// allocation, deletion, and use of Entry objects. BoundsChecker has
// UChar constants
static const UChar LOCALE_SEP = 95; // '_'
-static const UChar ID_SEP = 0x002D; /*-*/
-static const UChar VARIANT_SEP = 0x002F; // '/'
+//static const UChar ID_SEP = 0x002D; /*-*/
+//static const UChar VARIANT_SEP = 0x002F; // '/'
// String constants
-static const UChar NO_VARIANT[] = { 0 }; // empty string
-static const UChar ANY[] = { 65, 110, 121, 0 }; // Any
+static const UChar ANY[] = { 0x41, 0x6E, 0x79, 0 }; // Any
+static const UChar LAT[] = { 0x4C, 0x61, 0x74, 0 }; // Lat
+
+// empty string
+#define NO_VARIANT UnicodeString()
+
+// initial estimate for specDAG size
+// ICU 60 Transliterator::countAvailableSources()
+//#define SPECDAG_INIT_SIZE 149
+// Apple adjustment
+#define SPECDAG_INIT_SIZE 134
+
+// initial estimate for number of variant names
+#define VARIANT_LIST_INIT_SIZE 11
+#define VARIANT_LIST_MAX_SIZE 31
+
+// initial estimate for availableIDs count (default estimate is 8 => multiple reallocs)
+// ICU 60 Transliterator::countAvailableIDs()
+//#define AVAILABLE_IDS_INIT_SIZE 641
+// Apple adjustment
+#define AVAILABLE_IDS_INIT_SIZE 493
+
+// initial estimate for number of targets for source "Any", "Lat"
+// ICU 60 Transliterator::countAvailableTargets("Any")/("Latn")
+//#define ANY_TARGETS_INIT_SIZE 125
+// Apple adjustmennt
+#define ANY_TARGETS_INIT_SIZE 102
+#define LAT_TARGETS_INIT_SIZE 23
/**
* Resource bundle key for the RuleBasedTransliterator rule.
*/
-static const char RB_RULE[] = "Rule";
+//static const char RB_RULE[] = "Rule";
U_NAMESPACE_BEGIN
// Alias
//------------------------------------------------------------------
-TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID) :
+TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID,
+ const UnicodeSet* cpdFilter) :
ID(),
- aliasID(theAliasID),
- trans(0),
- compoundFilter(0),
- idSplitPoint(-1),
+ aliasesOrRules(theAliasID),
+ transes(0),
+ compoundFilter(cpdFilter),
+ direction(UTRANS_FORWARD),
type(TransliteratorAlias::SIMPLE) {
}
TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
- const UnicodeString& idBlock,
- Transliterator* adopted,
- int32_t theIDSplitPoint,
+ const UnicodeString& idBlocks,
+ UVector* adoptedTransliterators,
const UnicodeSet* cpdFilter) :
ID(theID),
- aliasID(idBlock),
- trans(adopted),
+ aliasesOrRules(idBlocks),
+ transes(adoptedTransliterators),
compoundFilter(cpdFilter),
- idSplitPoint(theIDSplitPoint),
+ direction(UTRANS_FORWARD),
type(TransliteratorAlias::COMPOUND) {
}
const UnicodeString& rules,
UTransDirection dir) :
ID(theID),
- aliasID(rules), // bad name -- rename aliasID!
- trans(0),
+ aliasesOrRules(rules),
+ transes(0),
compoundFilter(0),
- idSplitPoint((int32_t) dir), // bad name -- rename idSplitPoint!
+ direction(dir),
type(TransliteratorAlias::RULES) {
}
TransliteratorAlias::~TransliteratorAlias() {
- delete trans;
+ delete transes;
}
Transliterator *t = NULL;
switch (type) {
case SIMPLE:
- t = Transliterator::createInstance(aliasID, UTRANS_FORWARD, pe, ec);
- break;
- case COMPOUND:
- t = new CompoundTransliterator(ID, aliasID, idSplitPoint,
- trans, ec);
- /* test for NULL */
- if (t == 0) {
- ec = U_MEMORY_ALLOCATION_ERROR;
+ t = Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec);
+ if(U_FAILURE(ec)){
return 0;
}
- trans = 0; // so we don't delete it later
- if (compoundFilter) {
- // TODO: Is this right? Are we leaking memory here?
- // I'm suspicious because of the "trans = 0" line above;
- // doesn't seem to fit the cloning here. Don't have time
- // to track this down right now. [alan 3.0]
- t->adoptFilter((UnicodeSet*) compoundFilter->clone());
+ if (compoundFilter != 0)
+ t->adoptFilter((UnicodeSet*)compoundFilter->clone());
+ break;
+ case COMPOUND:
+ {
+ // the total number of transliterators in the compound is the total number of anonymous transliterators
+ // plus the total number of ID blocks-- we start by assuming the list begins and ends with an ID
+ // block and that each pair anonymous transliterators has an ID block between them. Then we go back
+ // to see whether there really are ID blocks at the beginning and end (by looking for U+FFFF, which
+ // marks the position where an anonymous transliterator goes) and adjust accordingly
+ int32_t anonymousRBTs = transes->size();
+ int32_t transCount = anonymousRBTs * 2 + 1;
+ if (!aliasesOrRules.isEmpty() && aliasesOrRules[0] == (UChar)(0xffff))
+ --transCount;
+ if (aliasesOrRules.length() >= 2 && aliasesOrRules[aliasesOrRules.length() - 1] == (UChar)(0xffff))
+ --transCount;
+ UnicodeString noIDBlock((UChar)(0xffff));
+ noIDBlock += ((UChar)(0xffff));
+ int32_t pos = aliasesOrRules.indexOf(noIDBlock);
+ while (pos >= 0) {
+ --transCount;
+ pos = aliasesOrRules.indexOf(noIDBlock, pos + 1);
+ }
+
+ UVector transliterators(ec);
+ UnicodeString idBlock;
+ int32_t blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff));
+ while (blockSeparatorPos >= 0) {
+ aliasesOrRules.extract(0, blockSeparatorPos, idBlock);
+ aliasesOrRules.remove(0, blockSeparatorPos + 1);
+ if (!idBlock.isEmpty())
+ transliterators.addElement(Transliterator::createInstance(idBlock, UTRANS_FORWARD, pe, ec), ec);
+ if (!transes->isEmpty())
+ transliterators.addElement(transes->orphanElementAt(0), ec);
+ blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff));
+ }
+ if (!aliasesOrRules.isEmpty())
+ transliterators.addElement(Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec), ec);
+ while (!transes->isEmpty())
+ transliterators.addElement(transes->orphanElementAt(0), ec);
+
+ if (U_SUCCESS(ec)) {
+ t = new CompoundTransliterator(ID, transliterators,
+ (compoundFilter ? (UnicodeSet*)(compoundFilter->clone()) : 0),
+ anonymousRBTs, pe, ec);
+ if (t == 0) {
+ ec = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ } else {
+ for (int32_t i = 0; i < transliterators.size(); i++)
+ delete (Transliterator*)(transliterators.elementAt(i));
+ }
}
break;
case RULES:
- U_ASSERT(FALSE); // don't call create() if isRuleBased() returns TRUE!
- break;
+ UPRV_UNREACHABLE; // don't call create() if isRuleBased() returns TRUE!
}
return t;
}
return;
}
- // aliasID is really rules -- rename it!
- // idSplitPoint is really UTransDirection -- rename it!
- parser.parse(aliasID, (UTransDirection) idSplitPoint, pe, ec);
+ parser.parse(aliasesOrRules, direction, pe, ec);
}
//----------------------------------------------------------------------
-// class Spec
+// class TransliteratorSpec
//----------------------------------------------------------------------
/**
- * A Spec is a string specifying either a source or a target. In more
+ * A TransliteratorSpec is a string specifying either a source or a target. In more
* general terms, it may also specify a variant, but we only use the
* Spec class for sources and targets.
*
* canonical form, or the script is transformed from an abbreviation
* to a full name.
*/
-class Spec : public UMemory {
+class TransliteratorSpec : public UMemory {
public:
- Spec(const UnicodeString& spec);
- ~Spec();
+ TransliteratorSpec(const UnicodeString& spec);
+ ~TransliteratorSpec();
const UnicodeString& get() const;
UBool hasFallback() const;
UBool isNextLocale; // TRUE if nextSpec is a locale
ResourceBundle* res;
- Spec(const Spec &other); // forbid copying of this class
- Spec &operator=(const Spec &other); // forbid copying of this class
+ TransliteratorSpec(const TransliteratorSpec &other); // forbid copying of this class
+ TransliteratorSpec &operator=(const TransliteratorSpec &other); // forbid copying of this class
};
-Spec::Spec(const UnicodeString& theSpec) : top(theSpec) {
+TransliteratorSpec::TransliteratorSpec(const UnicodeString& theSpec)
+: top(theSpec),
+ res(0)
+{
UErrorCode status = U_ZERO_ERROR;
- CharString topch(top);
- Locale toploc(topch);
- res = new ResourceBundle(U_ICUDATA_TRANSLIT, toploc, status);
- /* test for NULL */
- if (res == 0) {
- return;
- }
- if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) {
- delete res;
- res = 0;
+ Locale topLoc("");
+ LocaleUtility::initLocaleFromName(theSpec, topLoc);
+ if (!topLoc.isBogus()) {
+ res = new ResourceBundle(U_ICUDATA_TRANSLIT, topLoc, status);
+ /* test for NULL */
+ if (res == 0) {
+ return;
+ }
+ if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) {
+ delete res;
+ res = 0;
+ }
}
// Canonicalize script name -or- do locale->script mapping
status = U_ZERO_ERROR;
- const int32_t capacity = 10;
+ static const int32_t capacity = 10;
UScriptCode script[capacity]={USCRIPT_INVALID_CODE};
- int32_t num = uscript_getCode(topch,script,capacity, &status);
+ int32_t num = uscript_getCode(CharString().appendInvariantChars(theSpec, status).data(),
+ script, capacity, &status);
if (num > 0 && script[0] != USCRIPT_INVALID_CODE) {
- scriptName = UnicodeString(uscript_getName(script[0]), "");
+ scriptName = UnicodeString(uscript_getName(script[0]), -1, US_INV);
}
// Canonicalize top
- char buf[256];
if (res != 0) {
// Canonicalize locale name
- status = U_ZERO_ERROR;
- uloc_getName(topch, buf, sizeof(buf), &status);
- if (U_SUCCESS(status) && status != U_STRING_NOT_TERMINATED_WARNING) {
- top = UnicodeString(buf, "");
+ UnicodeString locStr;
+ LocaleUtility::initNameFromLocale(topLoc, locStr);
+ if (!locStr.isBogus()) {
+ top = locStr;
}
} else if (scriptName.length() != 0) {
// We are a script; use canonical name
reset();
}
-Spec::~Spec() {
+TransliteratorSpec::~TransliteratorSpec() {
delete res;
}
-UBool Spec::hasFallback() const {
+UBool TransliteratorSpec::hasFallback() const {
return nextSpec.length() != 0;
}
-void Spec::reset() {
+void TransliteratorSpec::reset() {
if (spec != top) {
spec = top;
isSpecLocale = (res != 0);
}
}
-void Spec::setupNext() {
+void TransliteratorSpec::setupNext() {
isNextLocale = FALSE;
if (isSpecLocale) {
nextSpec = spec;
// for(const UnicodeString& s(spec.get());
// spec.hasFallback(); s(spec.next())) { ...
-const UnicodeString& Spec::next() {
+const UnicodeString& TransliteratorSpec::next() {
spec = nextSpec;
isSpecLocale = isNextLocale;
setupNext();
return spec;
}
-const UnicodeString& Spec::get() const {
+const UnicodeString& TransliteratorSpec::get() const {
return spec;
}
-UBool Spec::isLocale() const {
+UBool TransliteratorSpec::isLocale() const {
return isSpecLocale;
}
-ResourceBundle& Spec::getBundle() const {
+ResourceBundle& TransliteratorSpec::getBundle() const {
return *res;
}
// Caller must call DEBUG_setup first. Return index of given Entry,
// if it is in use (not deleted yet), or -1 if not found.
-static int DEBUG_findEntry(Entry* e) {
+static int DEBUG_findEntry(TransliteratorEntry* e) {
for (int i=0; i<DEBUG_entries->size(); ++i) {
- if (e == (Entry*) DEBUG_entries->elementAt(i)) {
+ if (e == (TransliteratorEntry*) DEBUG_entries->elementAt(i)) {
return i;
}
}
}
// Track object creation
-static void DEBUG_newEntry(Entry* e) {
+static void DEBUG_newEntry(TransliteratorEntry* e) {
DEBUG_setup();
if (DEBUG_findEntry(e) >= 0) {
// This should really never happen unless the heap is broken
}
// Track object deletion
-static void DEBUG_delEntry(Entry* e) {
+static void DEBUG_delEntry(TransliteratorEntry* e) {
DEBUG_setup();
int i = DEBUG_findEntry(e);
if (i < 0) {
}
// Track object usage
-static void DEBUG_useEntry(Entry* e) {
+static void DEBUG_useEntry(TransliteratorEntry* e) {
if (e == NULL) return;
DEBUG_setup();
int i = DEBUG_findEntry(e);
* for it. We could easily add this if there is a need for it in the
* future.
*/
-class Entry : public UMemory {
+class TransliteratorEntry : public UMemory {
public:
enum Type {
RULES_FORWARD,
UnicodeSet* compoundFilter; // For COMPOUND_RBT
union {
Transliterator* prototype; // For PROTOTYPE
- TransliterationRuleData* data; // For RBT_DATA, COMPOUND_RBT
+ TransliterationRuleData* data; // For RBT_DATA
+ UVector* dataVector; // For COMPOUND_RBT
struct {
Transliterator::Factory function;
Transliterator::Token context;
} factory; // For FACTORY
} u;
- Entry();
- ~Entry();
+ TransliteratorEntry();
+ ~TransliteratorEntry();
void adoptPrototype(Transliterator* adopted);
void setFactory(Transliterator::Factory factory,
Transliterator::Token context);
private:
- Entry(const Entry &other); // forbid copying of this class
- Entry &operator=(const Entry &other); // forbid copying of this class
+ TransliteratorEntry(const TransliteratorEntry &other); // forbid copying of this class
+ TransliteratorEntry &operator=(const TransliteratorEntry &other); // forbid copying of this class
};
-Entry::Entry() {
+TransliteratorEntry::TransliteratorEntry() {
u.prototype = 0;
compoundFilter = NULL;
entryType = NONE;
DEBUG_newEntry(this);
}
-Entry::~Entry() {
+TransliteratorEntry::~TransliteratorEntry() {
DEBUG_delEntry(this);
if (entryType == PROTOTYPE) {
delete u.prototype;
- } else if (entryType == RBT_DATA || entryType == COMPOUND_RBT) {
+ } else if (entryType == RBT_DATA) {
// The data object is shared between instances of RBT. The
// entry object owns it. It should only be deleted when the
// transliterator component is being cleaned up. Doing so
// invalidates any RBTs that the user has instantiated.
delete u.data;
+ } else if (entryType == COMPOUND_RBT) {
+ while (u.dataVector != NULL && !u.dataVector->isEmpty())
+ delete (TransliterationRuleData*)u.dataVector->orphanElementAt(0);
+ delete u.dataVector;
}
delete compoundFilter;
}
-void Entry::adoptPrototype(Transliterator* adopted) {
+void TransliteratorEntry::adoptPrototype(Transliterator* adopted) {
if (entryType == PROTOTYPE) {
delete u.prototype;
}
u.prototype = adopted;
}
-void Entry::setFactory(Transliterator::Factory factory,
+void TransliteratorEntry::setFactory(Transliterator::Factory factory,
Transliterator::Token context) {
if (entryType == PROTOTYPE) {
delete u.prototype;
// UObjectDeleter for Hashtable::setValueDeleter
U_CDECL_BEGIN
-static void U_EXPORT2 U_CALLCONV
+static void U_CALLCONV
deleteEntry(void* obj) {
- delete (Entry*) obj;
+ delete (TransliteratorEntry*) obj;
}
U_CDECL_END
TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
registry(TRUE, status),
- specDAG(TRUE, status),
- availableIDs(status)
+ specDAG(TRUE, SPECDAG_INIT_SIZE, status),
+ variantList(VARIANT_LIST_INIT_SIZE, status),
+ availableIDs(AVAILABLE_IDS_INIT_SIZE, status)
{
registry.setValueDeleter(deleteEntry);
- availableIDs.setDeleter(uhash_deleteUnicodeString);
+ variantList.setDeleter(uprv_deleteUObject);
+ variantList.setComparer(uhash_compareCaselessUnicodeString);
+ UnicodeString *emptyString = new UnicodeString();
+ if (emptyString != NULL) {
+ variantList.addElement(emptyString, status);
+ }
+ availableIDs.setDeleter(uprv_deleteUObject);
availableIDs.setComparer(uhash_compareCaselessUnicodeString);
specDAG.setValueDeleter(uhash_deleteHashtable);
}
TransliteratorAlias*& aliasReturn,
UErrorCode& status) {
U_ASSERT(aliasReturn == NULL);
- Entry *entry = find(ID);
+ TransliteratorEntry *entry = find(ID);
return (entry == 0) ? 0
: instantiateEntry(ID, entry, aliasReturn, status);
}
TransliteratorAlias*& aliasReturn,
UErrorCode& status) {
U_ASSERT(aliasReturn == NULL);
- Entry *entry = find(ID);
+ TransliteratorEntry *entry = find(ID);
if (entry == 0) {
// We get to this point if there are two threads, one of which
// We have to detect this so we don't stomp over existing entry
// data members and potentially leak memory (u.data and compoundFilter).
- if (entry->entryType == Entry::RULES_FORWARD ||
- entry->entryType == Entry::RULES_REVERSE ||
- entry->entryType == Entry::LOCALE_RULES) {
+ if (entry->entryType == TransliteratorEntry::RULES_FORWARD ||
+ entry->entryType == TransliteratorEntry::RULES_REVERSE ||
+ entry->entryType == TransliteratorEntry::LOCALE_RULES) {
- entry->u.data = parser.orphanData();
- entry->stringArg = parser.idBlock;
- entry->intArg = parser.idSplitPoint;
- entry->compoundFilter = parser.orphanCompoundFilter();
-
- // Reset entry->entryType to encapsulate the parsed data. The
- // next time we instantiate this ID (including this very next
- // time, at the end of this function) we won't have to parse
- // again.
- // NOTE: The logic here matches that in
- // Transliterator::createFromRules().
- if (entry->stringArg.length() == 0) {
- if (entry->u.data == 0) {
- // No idBlock, no data -- this is just an
- // alias for Null
- entry->entryType = Entry::ALIAS;
- entry->stringArg = NullTransliterator::ID;
- } else {
- // No idBlock, data != 0 -- this is an
- // ordinary RBT_DATA
- entry->entryType = Entry::RBT_DATA;
- }
- } else {
- if (entry->u.data == 0) {
- // idBlock, no data -- this is an alias. The ID has
- // been munged from reverse into forward mode, if
- // necessary, so instantiate the ID in the forward
- // direction.
- entry->entryType = Entry::ALIAS;
- } else {
- // idBlock and data -- this is a compound
- // RBT
- entry->entryType = Entry::COMPOUND_RBT;
+ if (parser.idBlockVector.isEmpty() && parser.dataVector.isEmpty()) {
+ entry->u.data = 0;
+ entry->entryType = TransliteratorEntry::ALIAS;
+ entry->stringArg = UNICODE_STRING_SIMPLE("Any-NULL");
+ }
+ else if (parser.idBlockVector.isEmpty() && parser.dataVector.size() == 1) {
+ entry->u.data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
+ entry->entryType = TransliteratorEntry::RBT_DATA;
+ }
+ else if (parser.idBlockVector.size() == 1 && parser.dataVector.isEmpty()) {
+ entry->stringArg = *(UnicodeString*)(parser.idBlockVector.elementAt(0));
+ entry->compoundFilter = parser.orphanCompoundFilter();
+ entry->entryType = TransliteratorEntry::ALIAS;
+ }
+ else {
+ entry->entryType = TransliteratorEntry::COMPOUND_RBT;
+ entry->compoundFilter = parser.orphanCompoundFilter();
+ entry->u.dataVector = new UVector(status);
+ entry->stringArg.remove();
+
+ int32_t limit = parser.idBlockVector.size();
+ if (parser.dataVector.size() > limit)
+ limit = parser.dataVector.size();
+
+ for (int32_t i = 0; i < limit; i++) {
+ if (i < parser.idBlockVector.size()) {
+ UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i);
+ if (!idBlock->isEmpty())
+ entry->stringArg += *idBlock;
+ }
+ if (!parser.dataVector.isEmpty()) {
+ TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
+ entry->u.dataVector->addElement(data, status);
+ entry->stringArg += (UChar)0xffff; // use U+FFFF to mark position of RBTs in ID block
+ }
}
}
}
}
void TransliteratorRegistry::put(Transliterator* adoptedProto,
- UBool visible) {
- Entry *entry = new Entry();
+ UBool visible,
+ UErrorCode& ec)
+{
+ TransliteratorEntry *entry = new TransliteratorEntry();
+ if (entry == NULL) {
+ ec = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
entry->adoptPrototype(adoptedProto);
registerEntry(adoptedProto->getID(), entry, visible);
}
void TransliteratorRegistry::put(const UnicodeString& ID,
Transliterator::Factory factory,
Transliterator::Token context,
- UBool visible) {
- Entry *entry = new Entry();
+ UBool visible,
+ UErrorCode& ec) {
+ TransliteratorEntry *entry = new TransliteratorEntry();
+ if (entry == NULL) {
+ ec = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
entry->setFactory(factory, context);
registerEntry(ID, entry, visible);
}
void TransliteratorRegistry::put(const UnicodeString& ID,
const UnicodeString& resourceName,
UTransDirection dir,
- UBool visible) {
- Entry *entry = new Entry();
- entry->entryType = (dir == UTRANS_FORWARD) ? Entry::RULES_FORWARD
- : Entry::RULES_REVERSE;
- entry->stringArg = resourceName;
+ UBool readonlyResourceAlias,
+ UBool visible,
+ UErrorCode& ec) {
+ TransliteratorEntry *entry = new TransliteratorEntry();
+ if (entry == NULL) {
+ ec = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ entry->entryType = (dir == UTRANS_FORWARD) ? TransliteratorEntry::RULES_FORWARD
+ : TransliteratorEntry::RULES_REVERSE;
+ if (readonlyResourceAlias) {
+ entry->stringArg.setTo(TRUE, resourceName.getBuffer(), -1);
+ }
+ else {
+ entry->stringArg = resourceName;
+ }
registerEntry(ID, entry, visible);
}
void TransliteratorRegistry::put(const UnicodeString& ID,
const UnicodeString& alias,
- UBool visible) {
- Entry *entry = new Entry();
- entry->entryType = Entry::ALIAS;
- entry->stringArg = alias;
- registerEntry(ID, entry, visible);
+ UBool readonlyAliasAlias,
+ UBool visible,
+ UErrorCode& /*ec*/) {
+ TransliteratorEntry *entry = new TransliteratorEntry();
+ // Null pointer check
+ if (entry != NULL) {
+ entry->entryType = TransliteratorEntry::ALIAS;
+ if (readonlyAliasAlias) {
+ entry->stringArg.setTo(TRUE, alias.getBuffer(), -1);
+ }
+ else {
+ entry->stringArg = alias;
+ }
+ registerEntry(ID, entry, visible);
+ }
}
void TransliteratorRegistry::remove(const UnicodeString& ID) {
UnicodeString& TransliteratorRegistry::getAvailableSource(int32_t index,
UnicodeString& result) const {
- int32_t pos = -1;
+ int32_t pos = UHASH_FIRST;
const UHashElement *e = 0;
while (index-- >= 0) {
e = specDAG.nextElement(pos);
result.truncate(0); // invalid source
return result;
}
- int32_t pos = -1;
+ int32_t pos = UHASH_FIRST;
const UHashElement *e = 0;
while (index-- >= 0) {
e = targets->nextElement(pos);
if (targets == 0) {
return 0;
}
- UVector *variants = (UVector*) targets->get(target);
- // variants may be 0 if the source/target are invalid
- return (variants == 0) ? 0 : variants->size();
+ uint32_t varMask = targets->geti(target);
+ int32_t varCount = 0;
+ while (varMask > 0) {
+ if (varMask & 1) {
+ varCount++;
+ }
+ varMask >>= 1;
+ }
+ return varCount;
}
UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
result.truncate(0); // invalid source
return result;
}
- UVector *variants = (UVector*) targets->get(target);
- if (variants == 0) {
- result.truncate(0); // invalid target
- return result;
- }
- UnicodeString *v = (UnicodeString*) variants->elementAt(index);
- if (v == 0) {
- result.truncate(0); // invalid index
- } else {
- result = *v;
+ uint32_t varMask = targets->geti(target);
+ int32_t varCount = 0;
+ int32_t varListIndex = 0;
+ while (varMask > 0) {
+ if (varMask & 1) {
+ if (varCount == index) {
+ UnicodeString *v = (UnicodeString*) variantList.elementAt(varListIndex);
+ if (v != NULL) {
+ result = *v;
+ return result;
+ }
+ break;
+ }
+ varCount++;
+ }
+ varMask >>= 1;
+ varListIndex++;
}
+ result.truncate(0); // invalid target or index
return result;
}
void TransliteratorRegistry::registerEntry(const UnicodeString& source,
const UnicodeString& target,
const UnicodeString& variant,
- Entry* adopted,
+ TransliteratorEntry* adopted,
UBool visible) {
UnicodeString ID;
UnicodeString s(source);
if (s.length() == 0) {
- s = ANY;
+ s.setTo(TRUE, ANY, 3);
}
TransliteratorIDParser::STVtoID(source, target, variant, ID);
registerEntry(ID, s, target, variant, adopted, visible);
* Convenience method. Calls 6-arg registerEntry().
*/
void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
- Entry* adopted,
+ TransliteratorEntry* adopted,
UBool visible) {
UnicodeString source, target, variant;
UBool sawSource;
const UnicodeString& source,
const UnicodeString& target,
const UnicodeString& variant,
- Entry* adopted,
+ TransliteratorEntry* adopted,
UBool visible) {
UErrorCode status = U_ZERO_ERROR;
registry.put(ID, adopted, status);
registerSTV(source, target, variant);
if (!availableIDs.contains((void*) &ID)) {
UnicodeString *newID = (UnicodeString *)ID.clone();
- // NUL-terminate the ID string
- newID->getTerminatedBuffer();
- availableIDs.addElement(newID, status);
+ // Check to make sure newID was created.
+ if (newID != NULL) {
+ // NUL-terminate the ID string
+ newID->getTerminatedBuffer();
+ availableIDs.addElement(newID, status);
+ }
}
} else {
removeSTV(source, target, variant);
/**
* Register a source-target/variant in the specDAG. Variant may be
- * empty, but source and target must not be. If variant is empty then
- * the special variant NO_VARIANT is stored in slot zero of the
- * UVector of variants.
+ * empty, but source and target must not be.
*/
void TransliteratorRegistry::registerSTV(const UnicodeString& source,
const UnicodeString& target,
UErrorCode status = U_ZERO_ERROR;
Hashtable *targets = (Hashtable*) specDAG.get(source);
if (targets == 0) {
- targets = new Hashtable(TRUE, status);
- if (U_FAILURE(status) || targets == 0) {
+ int32_t size = 3;
+ if (source.compare(ANY,3) == 0) {
+ size = ANY_TARGETS_INIT_SIZE;
+ } else if (source.compare(LAT,3) == 0) {
+ size = LAT_TARGETS_INIT_SIZE;
+ }
+ targets = new Hashtable(TRUE, size, status);
+ if (U_FAILURE(status) || targets == NULL) {
return;
}
- targets->setValueDeleter(uhash_deleteUVector);
specDAG.put(source, targets, status);
}
- UVector *variants = (UVector*) targets->get(target);
- if (variants == 0) {
- variants = new UVector(uhash_deleteUnicodeString,
- uhash_compareCaselessUnicodeString, status);
- if (variants == 0) {
+ int32_t variantListIndex = variantList.indexOf((void*) &variant, 0);
+ if (variantListIndex < 0) {
+ if (variantList.size() >= VARIANT_LIST_MAX_SIZE) {
+ // can't handle any more variants
return;
}
- targets->put(target, variants, status);
- }
- // assert(NO_VARIANT == "");
- // We add the variant string. If it is the special "no variant"
- // string, that is, the empty string, we add it at position zero.
- if (!variants->contains((void*) &variant)) {
- if (variant.length() > 0) {
- variants->addElement(new UnicodeString(variant), status);
- } else {
- variants->insertElementAt(new UnicodeString(NO_VARIANT), 0, status);
+ UnicodeString *variantEntry = new UnicodeString(variant);
+ if (variantEntry != NULL) {
+ variantList.addElement(variantEntry, status);
+ if (U_SUCCESS(status)) {
+ variantListIndex = variantList.size() - 1;
+ }
+ }
+ if (variantListIndex < 0) {
+ return;
}
}
+ uint32_t addMask = 1 << variantListIndex;
+ uint32_t varMask = targets->geti(target);
+ targets->puti(target, varMask | addMask, status);
}
/**
const UnicodeString& variant) {
// assert(source.length() > 0);
// assert(target.length() > 0);
-// UErrorCode status = U_ZERO_ERROR;
+ UErrorCode status = U_ZERO_ERROR;
Hashtable *targets = (Hashtable*) specDAG.get(source);
- if (targets == 0) {
+ if (targets == NULL) {
+ return; // should never happen for valid s-t/v
+ }
+ uint32_t varMask = targets->geti(target);
+ if (varMask == 0) {
return; // should never happen for valid s-t/v
}
- UVector *variants = (UVector*) targets->get(target);
- if (variants == 0) {
+ int32_t variantListIndex = variantList.indexOf((void*) &variant, 0);
+ if (variantListIndex < 0) {
return; // should never happen for valid s-t/v
}
- variants->removeElement((void*) &variant);
- if (variants->size() == 0) {
+ int32_t remMask = 1 << variantListIndex;
+ varMask &= (~remMask);
+ if (varMask != 0) {
+ targets->puti(target, varMask, status);
+ } else {
targets->remove(target); // should delete variants
if (targets->count() == 0) {
specDAG.remove(source); // should delete targets
*
* Caller does NOT own returned object.
*/
-Entry* TransliteratorRegistry::findInDynamicStore(const Spec& src,
- const Spec& trg,
+TransliteratorEntry* TransliteratorRegistry::findInDynamicStore(const TransliteratorSpec& src,
+ const TransliteratorSpec& trg,
const UnicodeString& variant) const {
UnicodeString ID;
TransliteratorIDParser::STVtoID(src, trg, variant, ID);
- Entry *e = (Entry*) registry.get(ID);
+ TransliteratorEntry *e = (TransliteratorEntry*) registry.get(ID);
DEBUG_useEntry(e);
return e;
}
*
* Caller does NOT own returned object.
*/
-Entry* TransliteratorRegistry::findInStaticStore(const Spec& src,
- const Spec& trg,
+TransliteratorEntry* TransliteratorRegistry::findInStaticStore(const TransliteratorSpec& src,
+ const TransliteratorSpec& trg,
const UnicodeString& variant) {
- Entry* entry = 0;
+ TransliteratorEntry* entry = 0;
if (src.isLocale()) {
entry = findInBundle(src, trg, variant, UTRANS_FORWARD);
} else if (trg.isLocale()) {
* On success, create a new Entry object, populate it, and return it.
* The caller owns the returned object.
*/
-Entry* TransliteratorRegistry::findInBundle(const Spec& specToOpen,
- const Spec& specToFind,
+TransliteratorEntry* TransliteratorRegistry::findInBundle(const TransliteratorSpec& specToOpen,
+ const TransliteratorSpec& specToFind,
const UnicodeString& variant,
UTransDirection direction)
{
// but must be consistent and documented.
if (pass == 0) {
utag.append(direction == UTRANS_FORWARD ?
- TRANSLITERATE_TO : TRANSLITERATE_FROM);
+ TRANSLITERATE_TO : TRANSLITERATE_FROM, -1);
} else {
- utag.append(TRANSLITERATE);
+ utag.append(TRANSLITERATE, -1);
}
UnicodeString s(specToFind.get());
- utag.append(s.toUpper());
- CharString tag(utag);
-
+ utag.append(s.toUpper(""));
UErrorCode status = U_ZERO_ERROR;
- ResourceBundle subres(specToOpen.getBundle().get(tag, status));
+ ResourceBundle subres(specToOpen.getBundle().get(
+ CharString().appendInvariantChars(utag, status).data(), status));
if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) {
continue;
}
-
- if (specToOpen.get() != subres.getLocale().getName()) {
+
+ s.truncate(0);
+ if (specToOpen.get() != LocaleUtility::initNameFromLocale(subres.getLocale(), s)) {
continue;
}
-
+
if (variant.length() != 0) {
- CharString var(variant);
status = U_ZERO_ERROR;
- resStr = subres.getStringEx(var, status);
+ resStr = subres.getStringEx(
+ CharString().appendInvariantChars(variant, status).data(), status);
if (U_SUCCESS(status)) {
// Exit loop successfully
break;
}
- }
-
- else {
+ } else {
// Variant is empty, which means match the first variant listed.
status = U_ZERO_ERROR;
resStr = subres.getStringEx(1, status);
// We have succeeded in loading a string from the locale
// resources. Create a new registry entry to hold it and return it.
- Entry *entry = new Entry();
+ TransliteratorEntry *entry = new TransliteratorEntry();
if (entry != 0) {
// The direction is always forward for the
// TransliterateTo_xxx and TransliterateFrom_xxx
// the direction is the value passed in to this
// function.
int32_t dir = (pass == 0) ? UTRANS_FORWARD : direction;
- entry->entryType = Entry::LOCALE_RULES;
+ entry->entryType = TransliteratorEntry::LOCALE_RULES;
entry->stringArg = resStr;
entry->intArg = dir;
}
/**
* Convenience method. Calls 3-arg find().
*/
-Entry* TransliteratorRegistry::find(const UnicodeString& ID) {
+TransliteratorEntry* TransliteratorRegistry::find(const UnicodeString& ID) {
UnicodeString source, target, variant;
UBool sawSource;
TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
*
* Caller does NOT own returned object. Return 0 on failure.
*/
-Entry* TransliteratorRegistry::find(UnicodeString& source,
+TransliteratorEntry* TransliteratorRegistry::find(UnicodeString& source,
UnicodeString& target,
UnicodeString& variant) {
- Spec src(source);
- Spec trg(target);
- Entry* entry;
+ TransliteratorSpec src(source);
+ TransliteratorSpec trg(target);
+ TransliteratorEntry* entry;
+
+ // Seek exact match in hashtable. Temporary fix for ICU 4.6.
+ // TODO: The general logic for finding a matching transliterator needs to be reviewed.
+ // ICU ticket #8089
+ UnicodeString ID;
+ TransliteratorIDParser::STVtoID(source, target, variant, ID);
+ entry = (TransliteratorEntry*) registry.get(ID);
+ if (entry != 0) {
+ // std::string ss;
+ // std::cout << ID.toUTF8String(ss) << std::endl;
+ return entry;
+ }
if (variant.length() != 0) {
* modified.
*/
Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID,
- Entry *entry,
+ TransliteratorEntry *entry,
TransliteratorAlias* &aliasReturn,
UErrorCode& status) {
Transliterator *t = 0;
U_ASSERT(aliasReturn == 0);
switch (entry->entryType) {
- case Entry::RBT_DATA:
+ case TransliteratorEntry::RBT_DATA:
t = new RuleBasedTransliterator(ID, entry->u.data);
if (t == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return t;
- case Entry::PROTOTYPE:
+ case TransliteratorEntry::PROTOTYPE:
t = entry->u.prototype->clone();
if (t == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return t;
- case Entry::ALIAS:
- aliasReturn = new TransliteratorAlias(entry->stringArg);
+ case TransliteratorEntry::ALIAS:
+ aliasReturn = new TransliteratorAlias(entry->stringArg, entry->compoundFilter);
if (aliasReturn == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return 0;
- case Entry::FACTORY:
+ case TransliteratorEntry::FACTORY:
t = entry->u.factory.function(ID, entry->u.factory.context);
if (t == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return t;
- case Entry::COMPOUND_RBT:
+ case TransliteratorEntry::COMPOUND_RBT:
{
- UnicodeString id((UChar)0x005F); /* "_" */
- Transliterator *t = new RuleBasedTransliterator(id, entry->u.data);
- if (t == 0) {
+ UVector* rbts = new UVector(entry->u.dataVector->size(), status);
+ // Check for null pointer
+ if (rbts == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ int32_t passNumber = 1;
+ for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) {
+ // TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")?
+ Transliterator* tl = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++),
+ (TransliterationRuleData*)(entry->u.dataVector->elementAt(i)), FALSE);
+ if (tl == 0)
+ status = U_MEMORY_ALLOCATION_ERROR;
+ else
+ rbts->addElement(tl, status);
+ }
+ if (U_FAILURE(status)) {
+ delete rbts;
return 0;
}
- aliasReturn = new TransliteratorAlias(ID, entry->stringArg, t, entry->intArg, entry->compoundFilter);
+ aliasReturn = new TransliteratorAlias(ID, entry->stringArg, rbts, entry->compoundFilter);
}
if (aliasReturn == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return 0;
- case Entry::LOCALE_RULES:
+ case TransliteratorEntry::LOCALE_RULES:
aliasReturn = new TransliteratorAlias(ID, entry->stringArg,
(UTransDirection) entry->intArg);
if (aliasReturn == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return 0;
- case Entry::RULES_FORWARD:
- case Entry::RULES_REVERSE:
+ case TransliteratorEntry::RULES_FORWARD:
+ case TransliteratorEntry::RULES_REVERSE:
// Process the rule data into a TransliteratorRuleData object,
// and possibly also into an ::id header and/or footer. Then
// we modify the registry with the parsed data and retry.
{
- TransliteratorParser parser;
+ TransliteratorParser parser(status);
// We use the file name, taken from another resource bundle
// 2-d array at static init time, as a locale language. We're
// transliterators; if it lists something that's not
// installed, we'll get an error from ResourceBundle.
aliasReturn = new TransliteratorAlias(ID, rules,
- ((entry->entryType == Entry::RULES_REVERSE) ?
+ ((entry->entryType == TransliteratorEntry::RULES_REVERSE) ?
UTRANS_REVERSE : UTRANS_FORWARD));
if (aliasReturn == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return 0;
default:
- U_ASSERT(FALSE); // can't get here
- return 0;
+ UPRV_UNREACHABLE; // can't get here
}
}
U_NAMESPACE_END