git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	// © 2016 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3	/*
	4	**********************************************************************
	5	* Copyright (c) 2001-2016 International Business Machines
	6	* Corporation and others. All Rights Reserved.
	7	**********************************************************************
	8	* Date Name Description
	9	* 08/10/2001 aliu Creation.
	10	**********************************************************************
	11	*/
	12
	13	#include "unicode/utypes.h"
	14
	15	#if !UCONFIG_NO_TRANSLITERATION
	16
	17	#include "unicode/translit.h"
	18	#include "unicode/resbund.h"
	19	#include "unicode/uniset.h"
	20	#include "unicode/uscript.h"
	21	#include "rbt.h"
	22	#include "cpdtrans.h"
	23	#include "nultrans.h"
	24	#include "transreg.h"
	25	#include "rbt_data.h"
	26	#include "rbt_pars.h"
	27	#include "tridpars.h"
	28	#include "charstr.h"
	29	#include "uassert.h"
	30	#include "locutil.h"
	31
	32	// Enable the following symbol to add debugging code that tracks the
	33	// allocation, deletion, and use of Entry objects. BoundsChecker has
	34	// reported dangling pointer errors with these objects, but I have
	35	// been unable to confirm them. I suspect BoundsChecker is getting
	36	// confused with pointers going into and coming out of a UHashtable,
	37	// despite the hinting code that is designed to help it.
	38	// #define DEBUG_MEM
	39	#ifdef DEBUG_MEM
	40	#include <stdio.h>
	41	#endif
	42
	43	// UChar constants
	44	static const UChar LOCALE_SEP = 95; // '_'
	45	//static const UChar ID_SEP = 0x002D; /-/
	46	//static const UChar VARIANT_SEP = 0x002F; // '/'
	47
	48	// String constants
	49	static const UChar ANY[] = { 0x41, 0x6E, 0x79, 0 }; // Any
	50	static const UChar LAT[] = { 0x4C, 0x61, 0x74, 0 }; // Lat
	51
	52	// empty string
	53	#define NO_VARIANT UnicodeString()
	54
	55	// initial estimate for specDAG size
	56	#define SPECDAG_INIT_SIZE 134
	57
	58	// initial estimate for number of variant names
	59	#define VARIANT_LIST_INIT_SIZE 11
	60	#define VARIANT_LIST_MAX_SIZE 31
	61
	62	// initial estimate for availableIDs count (default estimate is 8 => multiple reallocs)
	63	#define AVAILABLE_IDS_INIT_SIZE 493
	64
	65	// initial estimate for number of targets for source "Any", "Lat"
	66	#define ANY_TARGETS_INIT_SIZE 102
	67	#define LAT_TARGETS_INIT_SIZE 23
	68
	69	/**
	70	* Resource bundle key for the RuleBasedTransliterator rule.
	71	*/
	72	//static const char RB_RULE[] = "Rule";
	73
	74	U_NAMESPACE_BEGIN
	75
	76	//------------------------------------------------------------------
	77	// Alias
	78	//------------------------------------------------------------------
	79
	80	TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID,
	81	const UnicodeSet* cpdFilter) :
	82	ID(),
	83	aliasesOrRules(theAliasID),
	84	transes(0),
	85	compoundFilter(cpdFilter),
	86	direction(UTRANS_FORWARD),
	87	type(TransliteratorAlias::SIMPLE) {
	88	}
	89
	90	TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
	91	const UnicodeString& idBlocks,
	92	UVector* adoptedTransliterators,
	93	const UnicodeSet* cpdFilter) :
	94	ID(theID),
	95	aliasesOrRules(idBlocks),
	96	transes(adoptedTransliterators),
	97	compoundFilter(cpdFilter),
	98	direction(UTRANS_FORWARD),
	99	type(TransliteratorAlias::COMPOUND) {
	100	}
	101
	102	TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
	103	const UnicodeString& rules,
	104	UTransDirection dir) :
	105	ID(theID),
	106	aliasesOrRules(rules),
	107	transes(0),
	108	compoundFilter(0),
	109	direction(dir),
	110	type(TransliteratorAlias::RULES) {
	111	}
	112
	113	TransliteratorAlias::~TransliteratorAlias() {
	114	delete transes;
	115	}
	116
	117
	118	Transliterator* TransliteratorAlias::create(UParseError& pe,
	119	UErrorCode& ec) {
	120	if (U_FAILURE(ec)) {
	121	return 0;
	122	}
	123	Transliterator *t = NULL;
	124	switch (type) {
	125	case SIMPLE:
	126	t = Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec);
	127	if(U_FAILURE(ec)){
	128	return 0;
	129	}
	130	if (compoundFilter != 0)
	131	t->adoptFilter((UnicodeSet*)compoundFilter->clone());
	132	break;
	133	case COMPOUND:
	134	{
	135	// the total number of transliterators in the compound is the total number of anonymous transliterators
	136	// plus the total number of ID blocks-- we start by assuming the list begins and ends with an ID
	137	// block and that each pair anonymous transliterators has an ID block between them. Then we go back
	138	// to see whether there really are ID blocks at the beginning and end (by looking for U+FFFF, which
	139	// marks the position where an anonymous transliterator goes) and adjust accordingly
	140	int32_t anonymousRBTs = transes->size();
	141	int32_t transCount = anonymousRBTs * 2 + 1;
	142	if (!aliasesOrRules.isEmpty() && aliasesOrRules[0] == (UChar)(0xffff))
	143	--transCount;
	144	if (aliasesOrRules.length() >= 2 && aliasesOrRules[aliasesOrRules.length() - 1] == (UChar)(0xffff))
	145	--transCount;
	146	UnicodeString noIDBlock((UChar)(0xffff));
	147	noIDBlock += ((UChar)(0xffff));
	148	int32_t pos = aliasesOrRules.indexOf(noIDBlock);
	149	while (pos >= 0) {
	150	--transCount;
	151	pos = aliasesOrRules.indexOf(noIDBlock, pos + 1);
	152	}
	153
	154	UVector transliterators(ec);
	155	UnicodeString idBlock;
	156	int32_t blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff));
	157	while (blockSeparatorPos >= 0) {
	158	aliasesOrRules.extract(0, blockSeparatorPos, idBlock);
	159	aliasesOrRules.remove(0, blockSeparatorPos + 1);
	160	if (!idBlock.isEmpty())
	161	transliterators.addElement(Transliterator::createInstance(idBlock, UTRANS_FORWARD, pe, ec), ec);
	162	if (!transes->isEmpty())
	163	transliterators.addElement(transes->orphanElementAt(0), ec);
	164	blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff));
	165	}
	166	if (!aliasesOrRules.isEmpty())
	167	transliterators.addElement(Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec), ec);
	168	while (!transes->isEmpty())
	169	transliterators.addElement(transes->orphanElementAt(0), ec);
	170
	171	if (U_SUCCESS(ec)) {
	172	t = new CompoundTransliterator(ID, transliterators,
	173	(compoundFilter ? (UnicodeSet*)(compoundFilter->clone()) : 0),
	174	anonymousRBTs, pe, ec);
	175	if (t == 0) {
	176	ec = U_MEMORY_ALLOCATION_ERROR;
	177	return 0;
	178	}
	179	} else {
	180	for (int32_t i = 0; i < transliterators.size(); i++)
	181	delete (Transliterator*)(transliterators.elementAt(i));
	182	}
	183	}
	184	break;
	185	case RULES:
	186	U_ASSERT(FALSE); // don't call create() if isRuleBased() returns TRUE!
	187	break;
	188	}
	189	return t;
	190	}
	191
	192	UBool TransliteratorAlias::isRuleBased() const {
	193	return type == RULES;
	194	}
	195
	196	void TransliteratorAlias::parse(TransliteratorParser& parser,
	197	UParseError& pe, UErrorCode& ec) const {
	198	U_ASSERT(type == RULES);
	199	if (U_FAILURE(ec)) {
	200	return;
	201	}
	202
	203	parser.parse(aliasesOrRules, direction, pe, ec);
	204	}
	205
	206	//----------------------------------------------------------------------
	207	// class TransliteratorSpec
	208	//----------------------------------------------------------------------
	209
	210	/**
	211	* A TransliteratorSpec is a string specifying either a source or a target. In more
	212	* general terms, it may also specify a variant, but we only use the
	213	* Spec class for sources and targets.
	214	*
	215	* A Spec may be a locale or a script. If it is a locale, it has a
	216	* fallback chain that goes xx_YY_ZZZ -> xx_YY -> xx -> ssss, where
	217	* ssss is the script mapping of xx_YY_ZZZ. The Spec API methods
	218	* hasFallback(), next(), and reset() iterate over this fallback
	219	* sequence.
	220	*
	221	* The Spec class canonicalizes itself, so the locale is put into
	222	* canonical form, or the script is transformed from an abbreviation
	223	* to a full name.
	224	*/
	225	class TransliteratorSpec : public UMemory {
	226	public:
	227	TransliteratorSpec(const UnicodeString& spec);
	228	~TransliteratorSpec();
	229
	230	const UnicodeString& get() const;
	231	UBool hasFallback() const;
	232	const UnicodeString& next();
	233	void reset();
	234
	235	UBool isLocale() const;
	236	ResourceBundle& getBundle() const;
	237
	238	operator const UnicodeString&() const { return get(); }
	239	const UnicodeString& getTop() const { return top; }
	240
	241	private:
	242	void setupNext();
	243
	244	UnicodeString top;
	245	UnicodeString spec;
	246	UnicodeString nextSpec;
	247	UnicodeString scriptName;
	248	UBool isSpecLocale; // TRUE if spec is a locale
	249	UBool isNextLocale; // TRUE if nextSpec is a locale
	250	ResourceBundle* res;
	251
	252	TransliteratorSpec(const TransliteratorSpec &other); // forbid copying of this class
	253	TransliteratorSpec &operator=(const TransliteratorSpec &other); // forbid copying of this class
	254	};
	255
	256	TransliteratorSpec::TransliteratorSpec(const UnicodeString& theSpec)
	257	: top(theSpec),
	258	res(0)
	259	{
	260	UErrorCode status = U_ZERO_ERROR;
	261	Locale topLoc("");
	262	LocaleUtility::initLocaleFromName(theSpec, topLoc);
	263	if (!topLoc.isBogus()) {
	264	res = new ResourceBundle(U_ICUDATA_TRANSLIT, topLoc, status);
	265	/* test for NULL */
	266	if (res == 0) {
	267	return;
	268	}
	269	if (U_FAILURE(status) \|\| status == U_USING_DEFAULT_WARNING) {
	270	delete res;
	271	res = 0;
	272	}
	273	}
	274
	275	// Canonicalize script name -or- do locale->script mapping
	276	status = U_ZERO_ERROR;
	277	static const int32_t capacity = 10;
	278	UScriptCode script[capacity]={USCRIPT_INVALID_CODE};
	279	int32_t num = uscript_getCode(CharString().appendInvariantChars(theSpec, status).data(),
	280	script, capacity, &status);
	281	if (num > 0 && script[0] != USCRIPT_INVALID_CODE) {
	282	scriptName = UnicodeString(uscript_getName(script[0]), -1, US_INV);
	283	}
	284
	285	// Canonicalize top
	286	if (res != 0) {
	287	// Canonicalize locale name
	288	UnicodeString locStr;
	289	LocaleUtility::initNameFromLocale(topLoc, locStr);
	290	if (!locStr.isBogus()) {
	291	top = locStr;
	292	}
	293	} else if (scriptName.length() != 0) {
	294	// We are a script; use canonical name
	295	top = scriptName;
	296	}
	297
	298	// assert(spec != top);
	299	reset();
	300	}
	301
	302	TransliteratorSpec::~TransliteratorSpec() {
	303	delete res;
	304	}
	305
	306	UBool TransliteratorSpec::hasFallback() const {
	307	return nextSpec.length() != 0;
	308	}
	309
	310	void TransliteratorSpec::reset() {
	311	if (spec != top) {
	312	spec = top;
	313	isSpecLocale = (res != 0);
	314	setupNext();
	315	}
	316	}
	317
	318	void TransliteratorSpec::setupNext() {
	319	isNextLocale = FALSE;
	320	if (isSpecLocale) {
	321	nextSpec = spec;
	322	int32_t i = nextSpec.lastIndexOf(LOCALE_SEP);
	323	// If i == 0 then we have _FOO, so we fall through
	324	// to the scriptName.
	325	if (i > 0) {
	326	nextSpec.truncate(i);
	327	isNextLocale = TRUE;
	328	} else {
	329	nextSpec = scriptName; // scriptName may be empty
	330	}
	331	} else {
	332	// spec is a script, so we are at the end
	333	nextSpec.truncate(0);
	334	}
	335	}
	336
	337	// Protocol:
	338	// for(const UnicodeString& s(spec.get());
	339	// spec.hasFallback(); s(spec.next())) { ...
	340
	341	const UnicodeString& TransliteratorSpec::next() {
	342	spec = nextSpec;
	343	isSpecLocale = isNextLocale;
	344	setupNext();
	345	return spec;
	346	}
	347
	348	const UnicodeString& TransliteratorSpec::get() const {
	349	return spec;
	350	}
	351
	352	UBool TransliteratorSpec::isLocale() const {
	353	return isSpecLocale;
	354	}
	355
	356	ResourceBundle& TransliteratorSpec::getBundle() const {
	357	return *res;
	358	}
	359
	360	//----------------------------------------------------------------------
	361
	362	#ifdef DEBUG_MEM
	363
	364	// Vector of Entry pointers currently in use
	365	static UVector* DEBUG_entries = NULL;
	366
	367	static void DEBUG_setup() {
	368	if (DEBUG_entries == NULL) {
	369	UErrorCode ec = U_ZERO_ERROR;
	370	DEBUG_entries = new UVector(ec);
	371	}
	372	}
	373
	374	// Caller must call DEBUG_setup first. Return index of given Entry,
	375	// if it is in use (not deleted yet), or -1 if not found.
	376	static int DEBUG_findEntry(TransliteratorEntry* e) {
	377	for (int i=0; i<DEBUG_entries->size(); ++i) {
	378	if (e == (TransliteratorEntry*) DEBUG_entries->elementAt(i)) {
	379	return i;
	380	}
	381	}
	382	return -1;
	383	}
	384
	385	// Track object creation
	386	static void DEBUG_newEntry(TransliteratorEntry* e) {
	387	DEBUG_setup();
	388	if (DEBUG_findEntry(e) >= 0) {
	389	// This should really never happen unless the heap is broken
	390	printf("ERROR DEBUG_newEntry duplicate new pointer %08X\n", e);
	391	return;
	392	}
	393	UErrorCode ec = U_ZERO_ERROR;
	394	DEBUG_entries->addElement(e, ec);
	395	}
	396
	397	// Track object deletion
	398	static void DEBUG_delEntry(TransliteratorEntry* e) {
	399	DEBUG_setup();
	400	int i = DEBUG_findEntry(e);
	401	if (i < 0) {
	402	printf("ERROR DEBUG_delEntry possible double deletion %08X\n", e);
	403	return;
	404	}
	405	DEBUG_entries->removeElementAt(i);
	406	}
	407
	408	// Track object usage
	409	static void DEBUG_useEntry(TransliteratorEntry* e) {
	410	if (e == NULL) return;
	411	DEBUG_setup();
	412	int i = DEBUG_findEntry(e);
	413	if (i < 0) {
	414	printf("ERROR DEBUG_useEntry possible dangling pointer %08X\n", e);
	415	}
	416	}
	417
	418	#else
	419	// If we're not debugging then make these macros into NOPs
	420	#define DEBUG_newEntry(x)
	421	#define DEBUG_delEntry(x)
	422	#define DEBUG_useEntry(x)
	423	#endif
	424
	425	//----------------------------------------------------------------------
	426	// class Entry
	427	//----------------------------------------------------------------------
	428
	429	/**
	430	* The Entry object stores objects of different types and
	431	* singleton objects as placeholders for rule-based transliterators to
	432	* be built as needed. Instances of this struct can be placeholders,
	433	* can represent prototype transliterators to be cloned, or can
	434	* represent TransliteratorData objects. We don't support storing
	435	* classes in the registry because we don't have the rtti infrastructure
	436	* for it. We could easily add this if there is a need for it in the
	437	* future.
	438	*/
	439	class TransliteratorEntry : public UMemory {
	440	public:
	441	enum Type {
	442	RULES_FORWARD,
	443	RULES_REVERSE,
	444	LOCALE_RULES,
	445	PROTOTYPE,
	446	RBT_DATA,
	447	COMPOUND_RBT,
	448	ALIAS,
	449	FACTORY,
	450	NONE // Only used for uninitialized entries
	451	} entryType;
	452	// NOTE: stringArg cannot go inside the union because
	453	// it has a copy constructor
	454	UnicodeString stringArg; // For RULES_*, ALIAS, COMPOUND_RBT
	455	int32_t intArg; // For COMPOUND_RBT, LOCALE_RULES
	456	UnicodeSet* compoundFilter; // For COMPOUND_RBT
	457	union {
	458	Transliterator* prototype; // For PROTOTYPE
	459	TransliterationRuleData* data; // For RBT_DATA
	460	UVector* dataVector; // For COMPOUND_RBT
	461	struct {
	462	Transliterator::Factory function;
	463	Transliterator::Token context;
	464	} factory; // For FACTORY
	465	} u;
	466	TransliteratorEntry();
	467	~TransliteratorEntry();
	468	void adoptPrototype(Transliterator* adopted);
	469	void setFactory(Transliterator::Factory factory,
	470	Transliterator::Token context);
	471
	472	private:
	473
	474	TransliteratorEntry(const TransliteratorEntry &other); // forbid copying of this class
	475	TransliteratorEntry &operator=(const TransliteratorEntry &other); // forbid copying of this class
	476	};
	477
	478	TransliteratorEntry::TransliteratorEntry() {
	479	u.prototype = 0;
	480	compoundFilter = NULL;
	481	entryType = NONE;
	482	DEBUG_newEntry(this);
	483	}
	484
	485	TransliteratorEntry::~TransliteratorEntry() {
	486	DEBUG_delEntry(this);
	487	if (entryType == PROTOTYPE) {
	488	delete u.prototype;
	489	} else if (entryType == RBT_DATA) {
	490	// The data object is shared between instances of RBT. The
	491	// entry object owns it. It should only be deleted when the
	492	// transliterator component is being cleaned up. Doing so
	493	// invalidates any RBTs that the user has instantiated.
	494	delete u.data;
	495	} else if (entryType == COMPOUND_RBT) {
	496	while (u.dataVector != NULL && !u.dataVector->isEmpty())
	497	delete (TransliterationRuleData*)u.dataVector->orphanElementAt(0);
	498	delete u.dataVector;
	499	}
	500	delete compoundFilter;
	501	}
	502
	503	void TransliteratorEntry::adoptPrototype(Transliterator* adopted) {
	504	if (entryType == PROTOTYPE) {
	505	delete u.prototype;
	506	}
	507	entryType = PROTOTYPE;
	508	u.prototype = adopted;
	509	}
	510
	511	void TransliteratorEntry::setFactory(Transliterator::Factory factory,
	512	Transliterator::Token context) {
	513	if (entryType == PROTOTYPE) {
	514	delete u.prototype;
	515	}
	516	entryType = FACTORY;
	517	u.factory.function = factory;
	518	u.factory.context = context;
	519	}
	520
	521	// UObjectDeleter for Hashtable::setValueDeleter
	522	U_CDECL_BEGIN
	523	static void U_CALLCONV
	524	deleteEntry(void* obj) {
	525	delete (TransliteratorEntry*) obj;
	526	}
	527	U_CDECL_END
	528
	529	//----------------------------------------------------------------------
	530	// class TransliteratorRegistry: Basic public API
	531	//----------------------------------------------------------------------
	532
	533	TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
	534	registry(TRUE, status),
	535	specDAG(TRUE, SPECDAG_INIT_SIZE, status),
	536	variantList(VARIANT_LIST_INIT_SIZE, status),
	537	availableIDs(AVAILABLE_IDS_INIT_SIZE, status)
	538	{
	539	registry.setValueDeleter(deleteEntry);
	540	variantList.setDeleter(uprv_deleteUObject);
	541	variantList.setComparer(uhash_compareCaselessUnicodeString);
	542	UnicodeString *emptyString = new UnicodeString();
	543	if (emptyString != NULL) {
	544	variantList.addElement(emptyString, status);
	545	}
	546	availableIDs.setDeleter(uprv_deleteUObject);
	547	availableIDs.setComparer(uhash_compareCaselessUnicodeString);
	548	specDAG.setValueDeleter(uhash_deleteHashtable);
	549	}
	550
	551	TransliteratorRegistry::~TransliteratorRegistry() {
	552	// Through the magic of C++, everything cleans itself up
	553	}
	554
	555	Transliterator* TransliteratorRegistry::get(const UnicodeString& ID,
	556	TransliteratorAlias*& aliasReturn,
	557	UErrorCode& status) {
	558	U_ASSERT(aliasReturn == NULL);
	559	TransliteratorEntry *entry = find(ID);
	560	return (entry == 0) ? 0
	561	: instantiateEntry(ID, entry, aliasReturn, status);
	562	}
	563
	564	Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID,
	565	TransliteratorParser& parser,
	566	TransliteratorAlias*& aliasReturn,
	567	UErrorCode& status) {
	568	U_ASSERT(aliasReturn == NULL);
	569	TransliteratorEntry *entry = find(ID);
	570
	571	if (entry == 0) {
	572	// We get to this point if there are two threads, one of which
	573	// is instantiating an ID, and another of which is removing
	574	// the same ID from the registry, and the timing is just right.
	575	return 0;
	576	}
	577
	578	// The usage model for the caller is that they will first call
	579	// reg->get() inside the mutex, they'll get back an alias, they call
	580	// alias->isRuleBased(), and if they get TRUE, they call alias->parse()
	581	// outside the mutex, then reg->reget() inside the mutex again. A real
	582	// mess, but it gets things working for ICU 3.0. [alan].
	583
	584	// Note: It's possible that in between the caller calling
	585	// alias->parse() and reg->reget(), that another thread will have
	586	// called reg->reget(), and the entry will already have been fixed up.
	587	// We have to detect this so we don't stomp over existing entry
	588	// data members and potentially leak memory (u.data and compoundFilter).
	589
	590	if (entry->entryType == TransliteratorEntry::RULES_FORWARD \|\|
	591	entry->entryType == TransliteratorEntry::RULES_REVERSE \|\|
	592	entry->entryType == TransliteratorEntry::LOCALE_RULES) {
	593
	594	if (parser.idBlockVector.isEmpty() && parser.dataVector.isEmpty()) {
	595	entry->u.data = 0;
	596	entry->entryType = TransliteratorEntry::ALIAS;
	597	entry->stringArg = UNICODE_STRING_SIMPLE("Any-NULL");
	598	}
	599	else if (parser.idBlockVector.isEmpty() && parser.dataVector.size() == 1) {
	600	entry->u.data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
	601	entry->entryType = TransliteratorEntry::RBT_DATA;
	602	}
	603	else if (parser.idBlockVector.size() == 1 && parser.dataVector.isEmpty()) {
	604	entry->stringArg = (UnicodeString)(parser.idBlockVector.elementAt(0));
	605	entry->compoundFilter = parser.orphanCompoundFilter();
	606	entry->entryType = TransliteratorEntry::ALIAS;
	607	}
	608	else {
	609	entry->entryType = TransliteratorEntry::COMPOUND_RBT;
	610	entry->compoundFilter = parser.orphanCompoundFilter();
	611	entry->u.dataVector = new UVector(status);
	612	entry->stringArg.remove();
	613
	614	int32_t limit = parser.idBlockVector.size();
	615	if (parser.dataVector.size() > limit)
	616	limit = parser.dataVector.size();
	617
	618	for (int32_t i = 0; i < limit; i++) {
	619	if (i < parser.idBlockVector.size()) {
	620	UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i);
	621	if (!idBlock->isEmpty())
	622	entry->stringArg += *idBlock;
	623	}
	624	if (!parser.dataVector.isEmpty()) {
	625	TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
	626	entry->u.dataVector->addElement(data, status);
	627	entry->stringArg += (UChar)0xffff; // use U+FFFF to mark position of RBTs in ID block
	628	}
	629	}
	630	}
	631	}
	632
	633	Transliterator *t =
	634	instantiateEntry(ID, entry, aliasReturn, status);
	635	return t;
	636	}
	637
	638	void TransliteratorRegistry::put(Transliterator* adoptedProto,
	639	UBool visible,
	640	UErrorCode& ec)
	641	{
	642	TransliteratorEntry *entry = new TransliteratorEntry();
	643	if (entry == NULL) {
	644	ec = U_MEMORY_ALLOCATION_ERROR;
	645	return;
	646	}
	647	entry->adoptPrototype(adoptedProto);
	648	registerEntry(adoptedProto->getID(), entry, visible);
	649	}
	650
	651	void TransliteratorRegistry::put(const UnicodeString& ID,
	652	Transliterator::Factory factory,
	653	Transliterator::Token context,
	654	UBool visible,
	655	UErrorCode& ec) {
	656	TransliteratorEntry *entry = new TransliteratorEntry();
	657	if (entry == NULL) {
	658	ec = U_MEMORY_ALLOCATION_ERROR;
	659	return;
	660	}
	661	entry->setFactory(factory, context);
	662	registerEntry(ID, entry, visible);
	663	}
	664
	665	void TransliteratorRegistry::put(const UnicodeString& ID,
	666	const UnicodeString& resourceName,
	667	UTransDirection dir,
	668	UBool readonlyResourceAlias,
	669	UBool visible,
	670	UErrorCode& ec) {
	671	TransliteratorEntry *entry = new TransliteratorEntry();
	672	if (entry == NULL) {
	673	ec = U_MEMORY_ALLOCATION_ERROR;
	674	return;
	675	}
	676	entry->entryType = (dir == UTRANS_FORWARD) ? TransliteratorEntry::RULES_FORWARD
	677	: TransliteratorEntry::RULES_REVERSE;
	678	if (readonlyResourceAlias) {
	679	entry->stringArg.setTo(TRUE, resourceName.getBuffer(), -1);
	680	}
	681	else {
	682	entry->stringArg = resourceName;
	683	}
	684	registerEntry(ID, entry, visible);
	685	}
	686
	687	void TransliteratorRegistry::put(const UnicodeString& ID,
	688	const UnicodeString& alias,
	689	UBool readonlyAliasAlias,
	690	UBool visible,
	691	UErrorCode& /ec/) {
	692	TransliteratorEntry *entry = new TransliteratorEntry();
	693	// Null pointer check
	694	if (entry != NULL) {
	695	entry->entryType = TransliteratorEntry::ALIAS;
	696	if (readonlyAliasAlias) {
	697	entry->stringArg.setTo(TRUE, alias.getBuffer(), -1);
	698	}
	699	else {
	700	entry->stringArg = alias;
	701	}
	702	registerEntry(ID, entry, visible);
	703	}
	704	}
	705
	706	void TransliteratorRegistry::remove(const UnicodeString& ID) {
	707	UnicodeString source, target, variant;
	708	UBool sawSource;
	709	TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
	710	// Only need to do this if ID.indexOf('-') < 0
	711	UnicodeString id;
	712	TransliteratorIDParser::STVtoID(source, target, variant, id);
	713	registry.remove(id);
	714	removeSTV(source, target, variant);
	715	availableIDs.removeElement((void*) &id);
	716	}
	717
	718	//----------------------------------------------------------------------
	719	// class TransliteratorRegistry: Public ID and spec management
	720	//----------------------------------------------------------------------
	721
	722	/**
	723	* == OBSOLETE - remove in ICU 3.4 ==
	724	* Return the number of IDs currently registered with the system.
	725	* To retrieve the actual IDs, call getAvailableID(i) with
	726	* i from 0 to countAvailableIDs() - 1.
	727	*/
	728	int32_t TransliteratorRegistry::countAvailableIDs(void) const {
	729	return availableIDs.size();
	730	}
	731
	732	/**
	733	* == OBSOLETE - remove in ICU 3.4 ==
	734	* Return the index-th available ID. index must be between 0
	735	* and countAvailableIDs() - 1, inclusive. If index is out of
	736	* range, the result of getAvailableID(0) is returned.
	737	*/
	738	const UnicodeString& TransliteratorRegistry::getAvailableID(int32_t index) const {
	739	if (index < 0 \|\| index >= availableIDs.size()) {
	740	index = 0;
	741	}
	742	return (const UnicodeString) availableIDs[index];
	743	}
	744
	745	StringEnumeration* TransliteratorRegistry::getAvailableIDs() const {
	746	return new Enumeration(*this);
	747	}
	748
	749	int32_t TransliteratorRegistry::countAvailableSources(void) const {
	750	return specDAG.count();
	751	}
	752
	753	UnicodeString& TransliteratorRegistry::getAvailableSource(int32_t index,
	754	UnicodeString& result) const {
	755	int32_t pos = UHASH_FIRST;
	756	const UHashElement *e = 0;
	757	while (index-- >= 0) {
	758	e = specDAG.nextElement(pos);
	759	if (e == 0) {
	760	break;
	761	}
	762	}
	763	if (e == 0) {
	764	result.truncate(0);
	765	} else {
	766	result = (UnicodeString) e->key.pointer;
	767	}
	768	return result;
	769	}
	770
	771	int32_t TransliteratorRegistry::countAvailableTargets(const UnicodeString& source) const {
	772	Hashtable targets = (Hashtable) specDAG.get(source);
	773	return (targets == 0) ? 0 : targets->count();
	774	}
	775
	776	UnicodeString& TransliteratorRegistry::getAvailableTarget(int32_t index,
	777	const UnicodeString& source,
	778	UnicodeString& result) const {
	779	Hashtable targets = (Hashtable) specDAG.get(source);
	780	if (targets == 0) {
	781	result.truncate(0); // invalid source
	782	return result;
	783	}
	784	int32_t pos = UHASH_FIRST;
	785	const UHashElement *e = 0;
	786	while (index-- >= 0) {
	787	e = targets->nextElement(pos);
	788	if (e == 0) {
	789	break;
	790	}
	791	}
	792	if (e == 0) {
	793	result.truncate(0); // invalid index
	794	} else {
	795	result = (UnicodeString) e->key.pointer;
	796	}
	797	return result;
	798	}
	799
	800	int32_t TransliteratorRegistry::countAvailableVariants(const UnicodeString& source,
	801	const UnicodeString& target) const {
	802	Hashtable targets = (Hashtable) specDAG.get(source);
	803	if (targets == 0) {
	804	return 0;
	805	}
	806	int32_t varMask = targets->geti(target);
	807	int32_t varCount = 0;
	808	while (varMask > 0) {
	809	if (varMask & 1) {
	810	varCount++;
	811	}
	812	varMask >>= 1;
	813	}
	814	return varCount;
	815	}
	816
	817	UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
	818	const UnicodeString& source,
	819	const UnicodeString& target,
	820	UnicodeString& result) const {
	821	Hashtable targets = (Hashtable) specDAG.get(source);
	822	if (targets == 0) {
	823	result.truncate(0); // invalid source
	824	return result;
	825	}
	826	int32_t varMask = targets->geti(target);
	827	int32_t varCount = 0;
	828	int32_t varListIndex = 0;
	829	while (varMask > 0) {
	830	if (varMask & 1) {
	831	if (varCount == index) {
	832	UnicodeString v = (UnicodeString) variantList.elementAt(varListIndex);
	833	if (v != NULL) {
	834	result = *v;
	835	return result;
	836	}
	837	break;
	838	}
	839	varCount++;
	840	}
	841	varMask >>= 1;
	842	varListIndex++;
	843	}
	844	result.truncate(0); // invalid target or index
	845	return result;
	846	}
	847
	848	//----------------------------------------------------------------------
	849	// class TransliteratorRegistry::Enumeration
	850	//----------------------------------------------------------------------
	851
	852	TransliteratorRegistry::Enumeration::Enumeration(const TransliteratorRegistry& _reg) :
	853	index(0), reg(_reg) {
	854	}
	855
	856	TransliteratorRegistry::Enumeration::~Enumeration() {
	857	}
	858
	859	int32_t TransliteratorRegistry::Enumeration::count(UErrorCode& /status/) const {
	860	return reg.availableIDs.size();
	861	}
	862
	863	const UnicodeString* TransliteratorRegistry::Enumeration::snext(UErrorCode& status) {
	864	// This is sloppy but safe -- if we get out of sync with the underlying
	865	// registry, we will still return legal strings, but they might not
	866	// correspond to the snapshot at construction time. So there could be
	867	// duplicate IDs or omitted IDs if insertions or deletions occur in one
	868	// thread while another is iterating. To be more rigorous, add a timestamp,
	869	// which is incremented with any modification, and validate this iterator
	870	// against the timestamp at construction time. This probably isn't worth
	871	// doing as long as there is some possibility of removing this code in favor
	872	// of some new code based on Doug's service framework.
	873	if (U_FAILURE(status)) {
	874	return NULL;
	875	}
	876	int32_t n = reg.availableIDs.size();
	877	if (index > n) {
	878	status = U_ENUM_OUT_OF_SYNC_ERROR;
	879	}
	880	// index == n is okay -- this means we've reached the end
	881	if (index < n) {
	882	// Copy the string! This avoids lifetime problems.
	883	unistr = (const UnicodeString)reg.availableIDs[index++];
	884	return &unistr;
	885	} else {
	886	return NULL;
	887	}
	888	}
	889
	890	void TransliteratorRegistry::Enumeration::reset(UErrorCode& /status/) {
	891	index = 0;
	892	}
	893
	894	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TransliteratorRegistry::Enumeration)
	895
	896	//----------------------------------------------------------------------
	897	// class TransliteratorRegistry: internal
	898	//----------------------------------------------------------------------
	899
	900	/**
	901	* Convenience method. Calls 6-arg registerEntry().
	902	*/
	903	void TransliteratorRegistry::registerEntry(const UnicodeString& source,
	904	const UnicodeString& target,
	905	const UnicodeString& variant,
	906	TransliteratorEntry* adopted,
	907	UBool visible) {
	908	UnicodeString ID;
	909	UnicodeString s(source);
	910	if (s.length() == 0) {
	911	s.setTo(TRUE, ANY, 3);
	912	}
	913	TransliteratorIDParser::STVtoID(source, target, variant, ID);
	914	registerEntry(ID, s, target, variant, adopted, visible);
	915	}
	916
	917	/**
	918	* Convenience method. Calls 6-arg registerEntry().
	919	*/
	920	void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
	921	TransliteratorEntry* adopted,
	922	UBool visible) {
	923	UnicodeString source, target, variant;
	924	UBool sawSource;
	925	TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
	926	// Only need to do this if ID.indexOf('-') < 0
	927	UnicodeString id;
	928	TransliteratorIDParser::STVtoID(source, target, variant, id);
	929	registerEntry(id, source, target, variant, adopted, visible);
	930	}
	931
	932	/**
	933	* Register an entry object (adopted) with the given ID, source,
	934	* target, and variant strings.
	935	*/
	936	void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
	937	const UnicodeString& source,
	938	const UnicodeString& target,
	939	const UnicodeString& variant,
	940	TransliteratorEntry* adopted,
	941	UBool visible) {
	942	UErrorCode status = U_ZERO_ERROR;
	943	registry.put(ID, adopted, status);
	944	if (visible) {
	945	registerSTV(source, target, variant);
	946	if (!availableIDs.contains((void*) &ID)) {
	947	UnicodeString newID = (UnicodeString )ID.clone();
	948	// Check to make sure newID was created.
	949	if (newID != NULL) {
	950	// NUL-terminate the ID string
	951	newID->getTerminatedBuffer();
	952	availableIDs.addElement(newID, status);
	953	}
	954	}
	955	} else {
	956	removeSTV(source, target, variant);
	957	availableIDs.removeElement((void*) &ID);
	958	}
	959	}
	960
	961	/**
	962	* Register a source-target/variant in the specDAG. Variant may be
	963	* empty, but source and target must not be.
	964	*/
	965	void TransliteratorRegistry::registerSTV(const UnicodeString& source,
	966	const UnicodeString& target,
	967	const UnicodeString& variant) {
	968	// assert(source.length() > 0);
	969	// assert(target.length() > 0);
	970	UErrorCode status = U_ZERO_ERROR;
	971	Hashtable targets = (Hashtable) specDAG.get(source);
	972	if (targets == 0) {
	973	int32_t size = 3;
	974	if (source.compare(ANY,3) == 0) {
	975	size = ANY_TARGETS_INIT_SIZE;
	976	} else if (source.compare(LAT,3) == 0) {
	977	size = LAT_TARGETS_INIT_SIZE;
	978	}
	979	targets = new Hashtable(TRUE, size, status);
	980	if (U_FAILURE(status) \|\| targets == NULL) {
	981	return;
	982	}
	983	specDAG.put(source, targets, status);
	984	}
	985	int32_t variantListIndex = variantList.indexOf((void*) &variant, 0);
	986	if (variantListIndex < 0) {
	987	if (variantList.size() >= VARIANT_LIST_MAX_SIZE) {
	988	// can't handle any more variants
	989	return;
	990	}
	991	UnicodeString *variantEntry = new UnicodeString(variant);
	992	if (variantEntry != NULL) {
	993	variantList.addElement(variantEntry, status);
	994	if (U_SUCCESS(status)) {
	995	variantListIndex = variantList.size() - 1;
	996	}
	997	}
	998	if (variantListIndex < 0) {
	999	return;
	1000	}
	1001	}
	1002	int32_t addMask = 1 << variantListIndex;
	1003	int32_t varMask = targets->geti(target);
	1004	targets->puti(target, varMask \| addMask, status);
	1005	}
	1006
	1007	/**
	1008	* Remove a source-target/variant from the specDAG.
	1009	*/
	1010	void TransliteratorRegistry::removeSTV(const UnicodeString& source,
	1011	const UnicodeString& target,
	1012	const UnicodeString& variant) {
	1013	// assert(source.length() > 0);
	1014	// assert(target.length() > 0);
	1015	UErrorCode status = U_ZERO_ERROR;
	1016	Hashtable targets = (Hashtable) specDAG.get(source);
	1017	if (targets == NULL) {
	1018	return; // should never happen for valid s-t/v
	1019	}
	1020	int32_t varMask = targets->geti(target);
	1021	if (varMask == 0) {
	1022	return; // should never happen for valid s-t/v
	1023	}
	1024	int32_t variantListIndex = variantList.indexOf((void*) &variant, 0);
	1025	if (variantListIndex < 0) {
	1026	return; // should never happen for valid s-t/v
	1027	}
	1028	int32_t remMask = 1 << variantListIndex;
	1029	varMask &= (~remMask);
	1030	if (varMask != 0) {
	1031	targets->puti(target, varMask, status);
	1032	} else {
	1033	targets->remove(target); // should delete variants
	1034	if (targets->count() == 0) {
	1035	specDAG.remove(source); // should delete targetss
	1036	}
	1037	}
	1038	}
	1039
	1040	/**
	1041	* Attempt to find a source-target/variant in the dynamic registry
	1042	* store. Return 0 on failure.
	1043	*
	1044	* Caller does NOT own returned object.
	1045	*/
	1046	TransliteratorEntry* TransliteratorRegistry::findInDynamicStore(const TransliteratorSpec& src,
	1047	const TransliteratorSpec& trg,
	1048	const UnicodeString& variant) const {
	1049	UnicodeString ID;
	1050	TransliteratorIDParser::STVtoID(src, trg, variant, ID);
	1051	TransliteratorEntry e = (TransliteratorEntry) registry.get(ID);
	1052	DEBUG_useEntry(e);
	1053	return e;
	1054	}
	1055
	1056	/**
	1057	* Attempt to find a source-target/variant in the static locale
	1058	* resource store. Do not perform fallback. Return 0 on failure.
	1059	*
	1060	* On success, create a new entry object, register it in the dynamic
	1061	* store, and return a pointer to it, but do not make it public --
	1062	* just because someone requested something, we do not expand the
	1063	* available ID list (or spec DAG).
	1064	*
	1065	* Caller does NOT own returned object.
	1066	*/
	1067	TransliteratorEntry* TransliteratorRegistry::findInStaticStore(const TransliteratorSpec& src,
	1068	const TransliteratorSpec& trg,
	1069	const UnicodeString& variant) {
	1070	TransliteratorEntry* entry = 0;
	1071	if (src.isLocale()) {
	1072	entry = findInBundle(src, trg, variant, UTRANS_FORWARD);
	1073	} else if (trg.isLocale()) {
	1074	entry = findInBundle(trg, src, variant, UTRANS_REVERSE);
	1075	}
	1076
	1077	// If we found an entry, store it in the Hashtable for next
	1078	// time.
	1079	if (entry != 0) {
	1080	registerEntry(src.getTop(), trg.getTop(), variant, entry, FALSE);
	1081	}
	1082
	1083	return entry;
	1084	}
	1085
	1086	// As of 2.0, resource bundle keys cannot contain '_'
	1087	static const UChar TRANSLITERATE_TO[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,84,111,0}; // "TransliterateTo"
	1088
	1089	static const UChar TRANSLITERATE_FROM[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,70,114,111,109,0}; // "TransliterateFrom"
	1090
	1091	static const UChar TRANSLITERATE[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,0}; // "Transliterate"
	1092
	1093	/**
	1094	* Attempt to find an entry in a single resource bundle. This is
	1095	* a one-sided lookup. findInStaticStore() performs up to two such
	1096	* lookups, one for the source, and one for the target.
	1097	*
	1098	* Do not perform fallback. Return 0 on failure.
	1099	*
	1100	* On success, create a new Entry object, populate it, and return it.
	1101	* The caller owns the returned object.
	1102	*/
	1103	TransliteratorEntry* TransliteratorRegistry::findInBundle(const TransliteratorSpec& specToOpen,
	1104	const TransliteratorSpec& specToFind,
	1105	const UnicodeString& variant,
	1106	UTransDirection direction)
	1107	{
	1108	UnicodeString utag;
	1109	UnicodeString resStr;
	1110	int32_t pass;
	1111
	1112	for (pass=0; pass<2; ++pass) {
	1113	utag.truncate(0);
	1114	// First try either TransliteratorTo_xxx or
	1115	// TransliterateFrom_xxx, then try the bidirectional
	1116	// Transliterate_xxx. This precedence order is arbitrary
	1117	// but must be consistent and documented.
	1118	if (pass == 0) {
	1119	utag.append(direction == UTRANS_FORWARD ?
	1120	TRANSLITERATE_TO : TRANSLITERATE_FROM, -1);
	1121	} else {
	1122	utag.append(TRANSLITERATE, -1);
	1123	}
	1124	UnicodeString s(specToFind.get());
	1125	utag.append(s.toUpper(""));
	1126	UErrorCode status = U_ZERO_ERROR;
	1127	ResourceBundle subres(specToOpen.getBundle().get(
	1128	CharString().appendInvariantChars(utag, status).data(), status));
	1129	if (U_FAILURE(status) \|\| status == U_USING_DEFAULT_WARNING) {
	1130	continue;
	1131	}
	1132
	1133	s.truncate(0);
	1134	if (specToOpen.get() != LocaleUtility::initNameFromLocale(subres.getLocale(), s)) {
	1135	continue;
	1136	}
	1137
	1138	if (variant.length() != 0) {
	1139	status = U_ZERO_ERROR;
	1140	resStr = subres.getStringEx(
	1141	CharString().appendInvariantChars(variant, status).data(), status);
	1142	if (U_SUCCESS(status)) {
	1143	// Exit loop successfully
	1144	break;
	1145	}
	1146	} else {
	1147	// Variant is empty, which means match the first variant listed.
	1148	status = U_ZERO_ERROR;
	1149	resStr = subres.getStringEx(1, status);
	1150	if (U_SUCCESS(status)) {
	1151	// Exit loop successfully
	1152	break;
	1153	}
	1154	}
	1155	}
	1156
	1157	if (pass==2) {
	1158	// Failed
	1159	return NULL;
	1160	}
	1161
	1162	// We have succeeded in loading a string from the locale
	1163	// resources. Create a new registry entry to hold it and return it.
	1164	TransliteratorEntry *entry = new TransliteratorEntry();
	1165	if (entry != 0) {
	1166	// The direction is always forward for the
	1167	// TransliterateTo_xxx and TransliterateFrom_xxx
	1168	// items; those are unidirectional forward rules.
	1169	// For the bidirectional Transliterate_xxx items,
	1170	// the direction is the value passed in to this
	1171	// function.
	1172	int32_t dir = (pass == 0) ? UTRANS_FORWARD : direction;
	1173	entry->entryType = TransliteratorEntry::LOCALE_RULES;
	1174	entry->stringArg = resStr;
	1175	entry->intArg = dir;
	1176	}
	1177
	1178	return entry;
	1179	}
	1180
	1181	/**
	1182	* Convenience method. Calls 3-arg find().
	1183	*/
	1184	TransliteratorEntry* TransliteratorRegistry::find(const UnicodeString& ID) {
	1185	UnicodeString source, target, variant;
	1186	UBool sawSource;
	1187	TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
	1188	return find(source, target, variant);
	1189	}
	1190
	1191	/**
	1192	* Top-level find method. Attempt to find a source-target/variant in
	1193	* either the dynamic or the static (locale resource) store. Perform
	1194	* fallback.
	1195	*
	1196	* Lookup sequence for ss_SS_SSS-tt_TT_TTT/v:
	1197	*
	1198	* ss_SS_SSS-tt_TT_TTT/v -- in hashtable
	1199	* ss_SS_SSS-tt_TT_TTT/v -- in ss_SS_SSS (no fallback)
	1200	*
	1201	* repeat with t = tt_TT_TTT, tt_TT, tt, and tscript
	1202	*
	1203	* ss_SS_SSS-t/ *
	1204	* ss_SS-t/ *
	1205	* ss-t/ *
	1206	* sscript-t/ *
	1207	*
	1208	* Here * matches the first variant listed.
	1209	*
	1210	* Caller does NOT own returned object. Return 0 on failure.
	1211	*/
	1212	TransliteratorEntry* TransliteratorRegistry::find(UnicodeString& source,
	1213	UnicodeString& target,
	1214	UnicodeString& variant) {
	1215
	1216	TransliteratorSpec src(source);
	1217	TransliteratorSpec trg(target);
	1218	TransliteratorEntry* entry;
	1219
	1220	// Seek exact match in hashtable. Temporary fix for ICU 4.6.
	1221	// TODO: The general logic for finding a matching transliterator needs to be reviewed.
	1222	// ICU ticket #8089
	1223	UnicodeString ID;
	1224	TransliteratorIDParser::STVtoID(source, target, variant, ID);
	1225	entry = (TransliteratorEntry*) registry.get(ID);
	1226	if (entry != 0) {
	1227	// std::string ss;
	1228	// std::cout << ID.toUTF8String(ss) << std::endl;
	1229	return entry;
	1230	}
	1231
	1232	if (variant.length() != 0) {
	1233
	1234	// Seek exact match in hashtable
	1235	entry = findInDynamicStore(src, trg, variant);
	1236	if (entry != 0) {
	1237	return entry;
	1238	}
	1239
	1240	// Seek exact match in locale resources
	1241	entry = findInStaticStore(src, trg, variant);
	1242	if (entry != 0) {
	1243	return entry;
	1244	}
	1245	}
	1246
	1247	for (;;) {
	1248	src.reset();
	1249	for (;;) {
	1250	// Seek match in hashtable
	1251	entry = findInDynamicStore(src, trg, NO_VARIANT);
	1252	if (entry != 0) {
	1253	return entry;
	1254	}
	1255
	1256	// Seek match in locale resources
	1257	entry = findInStaticStore(src, trg, NO_VARIANT);
	1258	if (entry != 0) {
	1259	return entry;
	1260	}
	1261	if (!src.hasFallback()) {
	1262	break;
	1263	}
	1264	src.next();
	1265	}
	1266	if (!trg.hasFallback()) {
	1267	break;
	1268	}
	1269	trg.next();
	1270	}
	1271
	1272	return 0;
	1273	}
	1274
	1275	/**
	1276	* Given an Entry object, instantiate it. Caller owns result. Return
	1277	* 0 on failure.
	1278	*
	1279	* Return a non-empty aliasReturn value if the ID points to an alias.
	1280	* We cannot instantiate it ourselves because the alias may contain
	1281	* filters or compounds, which we do not understand. Caller should
	1282	* make aliasReturn empty before calling.
	1283	*
	1284	* The entry object is assumed to reside in the dynamic store. It may be
	1285	* modified.
	1286	*/
	1287	Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID,
	1288	TransliteratorEntry *entry,
	1289	TransliteratorAlias* &aliasReturn,
	1290	UErrorCode& status) {
	1291	Transliterator *t = 0;
	1292	U_ASSERT(aliasReturn == 0);
	1293
	1294	switch (entry->entryType) {
	1295	case TransliteratorEntry::RBT_DATA:
	1296	t = new RuleBasedTransliterator(ID, entry->u.data);
	1297	if (t == 0) {
	1298	status = U_MEMORY_ALLOCATION_ERROR;
	1299	}
	1300	return t;
	1301	case TransliteratorEntry::PROTOTYPE:
	1302	t = entry->u.prototype->clone();
	1303	if (t == 0) {
	1304	status = U_MEMORY_ALLOCATION_ERROR;
	1305	}
	1306	return t;
	1307	case TransliteratorEntry::ALIAS:
	1308	aliasReturn = new TransliteratorAlias(entry->stringArg, entry->compoundFilter);
	1309	if (aliasReturn == 0) {
	1310	status = U_MEMORY_ALLOCATION_ERROR;
	1311	}
	1312	return 0;
	1313	case TransliteratorEntry::FACTORY:
	1314	t = entry->u.factory.function(ID, entry->u.factory.context);
	1315	if (t == 0) {
	1316	status = U_MEMORY_ALLOCATION_ERROR;
	1317	}
	1318	return t;
	1319	case TransliteratorEntry::COMPOUND_RBT:
	1320	{
	1321	UVector* rbts = new UVector(entry->u.dataVector->size(), status);
	1322	// Check for null pointer
	1323	if (rbts == NULL) {
	1324	status = U_MEMORY_ALLOCATION_ERROR;
	1325	return NULL;
	1326	}
	1327	int32_t passNumber = 1;
	1328	for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) {
	1329	// TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")?
	1330	Transliterator* t = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++),
	1331	(TransliterationRuleData*)(entry->u.dataVector->elementAt(i)), FALSE);
	1332	if (t == 0)
	1333	status = U_MEMORY_ALLOCATION_ERROR;
	1334	else
	1335	rbts->addElement(t, status);
	1336	}
	1337	if (U_FAILURE(status)) {
	1338	delete rbts;
	1339	return 0;
	1340	}
	1341	aliasReturn = new TransliteratorAlias(ID, entry->stringArg, rbts, entry->compoundFilter);
	1342	}
	1343	if (aliasReturn == 0) {
	1344	status = U_MEMORY_ALLOCATION_ERROR;
	1345	}
	1346	return 0;
	1347	case TransliteratorEntry::LOCALE_RULES:
	1348	aliasReturn = new TransliteratorAlias(ID, entry->stringArg,
	1349	(UTransDirection) entry->intArg);
	1350	if (aliasReturn == 0) {
	1351	status = U_MEMORY_ALLOCATION_ERROR;
	1352	}
	1353	return 0;
	1354	case TransliteratorEntry::RULES_FORWARD:
	1355	case TransliteratorEntry::RULES_REVERSE:
	1356	// Process the rule data into a TransliteratorRuleData object,
	1357	// and possibly also into an ::id header and/or footer. Then
	1358	// we modify the registry with the parsed data and retry.
	1359	{
	1360	TransliteratorParser parser(status);
	1361
	1362	// We use the file name, taken from another resource bundle
	1363	// 2-d array at static init time, as a locale language. We're
	1364	// just using the locale mechanism to map through to a file
	1365	// name; this in no way represents an actual locale.
	1366	//CharString ch(entry->stringArg);
	1367	//UResourceBundle *bundle = ures_openDirect(0, ch, &status);
	1368	UnicodeString rules = entry->stringArg;
	1369	//ures_close(bundle);
	1370
	1371	//if (U_FAILURE(status)) {
	1372	// We have a failure of some kind. Remove the ID from the
	1373	// registry so we don't keep trying. NOTE: This will throw off
	1374	// anyone who is, at the moment, trying to iterate over the
	1375	// available IDs. That's acceptable since we should never
	1376	// really get here except under installation, configuration,
	1377	// or unrecoverable run time memory failures.
	1378	// remove(ID);
	1379	//} else {
	1380
	1381	// If the status indicates a failure, then we don't have any
	1382	// rules -- there is probably an installation error. The list
	1383	// in the root locale should correspond to all the installed
	1384	// transliterators; if it lists something that's not
	1385	// installed, we'll get an error from ResourceBundle.
	1386	aliasReturn = new TransliteratorAlias(ID, rules,
	1387	((entry->entryType == TransliteratorEntry::RULES_REVERSE) ?
	1388	UTRANS_REVERSE : UTRANS_FORWARD));
	1389	if (aliasReturn == 0) {
	1390	status = U_MEMORY_ALLOCATION_ERROR;
	1391	}
	1392	//}
	1393	}
	1394	return 0;
	1395	default:
	1396	U_ASSERT(FALSE); // can't get here
	1397	return 0;
	1398	}
	1399	}
	1400	U_NAMESPACE_END
	1401
	1402	#endif /* #if !UCONFIG_NO_TRANSLITERATION */
	1403
	1404	//eof