2 **********************************************************************
3 * Copyright (c) 2001-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 08/10/2001 aliu Creation.
8 **********************************************************************
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_TRANSLITERATION
15 #include "unicode/translit.h"
16 #include "unicode/resbund.h"
17 #include "unicode/uniset.h"
18 #include "unicode/uscript.h"
30 // Enable the following symbol to add debugging code that tracks the
31 // allocation, deletion, and use of Entry objects. BoundsChecker has
32 // reported dangling pointer errors with these objects, but I have
33 // been unable to confirm them. I suspect BoundsChecker is getting
34 // confused with pointers going into and coming out of a UHashtable,
35 // despite the hinting code that is designed to help it.
42 static const UChar LOCALE_SEP
= 95; // '_'
43 //static const UChar ID_SEP = 0x002D; /*-*/
44 //static const UChar VARIANT_SEP = 0x002F; // '/'
47 static const UChar NO_VARIANT
[] = { 0 }; // empty string
48 static const UChar ANY
[] = { 65, 110, 121, 0 }; // Any
51 * Resource bundle key for the RuleBasedTransliterator rule.
53 //static const char RB_RULE[] = "Rule";
57 //------------------------------------------------------------------
59 //------------------------------------------------------------------
61 TransliteratorAlias::TransliteratorAlias(const UnicodeString
& theAliasID
,
62 const UnicodeSet
* cpdFilter
) :
64 aliasesOrRules(theAliasID
),
66 compoundFilter(cpdFilter
),
67 direction(UTRANS_FORWARD
),
68 type(TransliteratorAlias::SIMPLE
) {
71 TransliteratorAlias::TransliteratorAlias(const UnicodeString
& theID
,
72 const UnicodeString
& idBlocks
,
73 UVector
* adoptedTransliterators
,
74 const UnicodeSet
* cpdFilter
) :
76 aliasesOrRules(idBlocks
),
77 transes(adoptedTransliterators
),
78 compoundFilter(cpdFilter
),
79 direction(UTRANS_FORWARD
),
80 type(TransliteratorAlias::COMPOUND
) {
83 TransliteratorAlias::TransliteratorAlias(const UnicodeString
& theID
,
84 const UnicodeString
& rules
,
85 UTransDirection dir
) :
87 aliasesOrRules(rules
),
91 type(TransliteratorAlias::RULES
) {
94 TransliteratorAlias::~TransliteratorAlias() {
99 Transliterator
* TransliteratorAlias::create(UParseError
& pe
,
104 Transliterator
*t
= NULL
;
107 t
= Transliterator::createInstance(aliasesOrRules
, UTRANS_FORWARD
, pe
, ec
);
111 if (compoundFilter
!= 0)
112 t
->adoptFilter((UnicodeSet
*)compoundFilter
->clone());
116 // the total number of transliterators in the compound is the total number of anonymous transliterators
117 // plus the total number of ID blocks-- we start by assuming the list begins and ends with an ID
118 // block and that each pair anonymous transliterators has an ID block between them. Then we go back
119 // to see whether there really are ID blocks at the beginning and end (by looking for U+FFFF, which
120 // marks the position where an anonymous transliterator goes) and adjust accordingly
121 int32_t anonymousRBTs
= transes
->size();
122 int32_t transCount
= anonymousRBTs
* 2 + 1;
123 if (!aliasesOrRules
.isEmpty() && aliasesOrRules
[0] == (UChar
)(0xffff))
125 if (aliasesOrRules
.length() >= 2 && aliasesOrRules
[aliasesOrRules
.length() - 1] == (UChar
)(0xffff))
127 UnicodeString
noIDBlock((UChar
)(0xffff));
128 noIDBlock
+= ((UChar
)(0xffff));
129 int32_t pos
= aliasesOrRules
.indexOf(noIDBlock
);
132 pos
= aliasesOrRules
.indexOf(noIDBlock
, pos
+ 1);
135 UVector
transliterators(ec
);
136 UnicodeString idBlock
;
137 int32_t blockSeparatorPos
= aliasesOrRules
.indexOf((UChar
)(0xffff));
138 while (blockSeparatorPos
>= 0) {
139 aliasesOrRules
.extract(0, blockSeparatorPos
, idBlock
);
140 aliasesOrRules
.remove(0, blockSeparatorPos
+ 1);
141 if (!idBlock
.isEmpty())
142 transliterators
.addElement(Transliterator::createInstance(idBlock
, UTRANS_FORWARD
, pe
, ec
), ec
);
143 if (!transes
->isEmpty())
144 transliterators
.addElement(transes
->orphanElementAt(0), ec
);
145 blockSeparatorPos
= aliasesOrRules
.indexOf((UChar
)(0xffff));
147 if (!aliasesOrRules
.isEmpty())
148 transliterators
.addElement(Transliterator::createInstance(aliasesOrRules
, UTRANS_FORWARD
, pe
, ec
), ec
);
149 while (!transes
->isEmpty())
150 transliterators
.addElement(transes
->orphanElementAt(0), ec
);
153 t
= new CompoundTransliterator(ID
, transliterators
,
154 (compoundFilter
? (UnicodeSet
*)(compoundFilter
->clone()) : 0),
155 anonymousRBTs
, pe
, ec
);
157 ec
= U_MEMORY_ALLOCATION_ERROR
;
161 for (int32_t i
= 0; i
< transliterators
.size(); i
++)
162 delete (Transliterator
*)(transliterators
.elementAt(i
));
167 U_ASSERT(FALSE
); // don't call create() if isRuleBased() returns TRUE!
173 UBool
TransliteratorAlias::isRuleBased() const {
174 return type
== RULES
;
177 void TransliteratorAlias::parse(TransliteratorParser
& parser
,
178 UParseError
& pe
, UErrorCode
& ec
) const {
179 U_ASSERT(type
== RULES
);
184 parser
.parse(aliasesOrRules
, direction
, pe
, ec
);
187 //----------------------------------------------------------------------
189 //----------------------------------------------------------------------
192 * A Spec is a string specifying either a source or a target. In more
193 * general terms, it may also specify a variant, but we only use the
194 * Spec class for sources and targets.
196 * A Spec may be a locale or a script. If it is a locale, it has a
197 * fallback chain that goes xx_YY_ZZZ -> xx_YY -> xx -> ssss, where
198 * ssss is the script mapping of xx_YY_ZZZ. The Spec API methods
199 * hasFallback(), next(), and reset() iterate over this fallback
202 * The Spec class canonicalizes itself, so the locale is put into
203 * canonical form, or the script is transformed from an abbreviation
206 class Spec
: public UMemory
{
208 Spec(const UnicodeString
& spec
);
211 const UnicodeString
& get() const;
212 UBool
hasFallback() const;
213 const UnicodeString
& next();
216 UBool
isLocale() const;
217 ResourceBundle
& getBundle() const;
219 operator const UnicodeString
&() const { return get(); }
220 const UnicodeString
& getTop() const { return top
; }
227 UnicodeString nextSpec
;
228 UnicodeString scriptName
;
229 UBool isSpecLocale
; // TRUE if spec is a locale
230 UBool isNextLocale
; // TRUE if nextSpec is a locale
233 Spec(const Spec
&other
); // forbid copying of this class
234 Spec
&operator=(const Spec
&other
); // forbid copying of this class
237 Spec::Spec(const UnicodeString
& theSpec
)
241 UErrorCode status
= U_ZERO_ERROR
;
242 CharString
topch(theSpec
);
244 LocaleUtility::initLocaleFromName(theSpec
, topLoc
);
245 if (!topLoc
.isBogus()) {
246 res
= new ResourceBundle(U_ICUDATA_TRANSLIT
, topLoc
, status
);
251 if (U_FAILURE(status
) || status
== U_USING_DEFAULT_WARNING
) {
257 // Canonicalize script name -or- do locale->script mapping
258 status
= U_ZERO_ERROR
;
259 static const int32_t capacity
= 10;
260 UScriptCode script
[capacity
]={USCRIPT_INVALID_CODE
};
261 int32_t num
= uscript_getCode(topch
,script
,capacity
, &status
);
262 if (num
> 0 && script
[0] != USCRIPT_INVALID_CODE
) {
263 scriptName
= UnicodeString(uscript_getName(script
[0]), -1, US_INV
);
268 // Canonicalize locale name
269 UnicodeString locStr
;
270 LocaleUtility::initNameFromLocale(topLoc
, locStr
);
271 if (!locStr
.isBogus()) {
274 } else if (scriptName
.length() != 0) {
275 // We are a script; use canonical name
279 // assert(spec != top);
287 UBool
Spec::hasFallback() const {
288 return nextSpec
.length() != 0;
294 isSpecLocale
= (res
!= 0);
299 void Spec::setupNext() {
300 isNextLocale
= FALSE
;
303 int32_t i
= nextSpec
.lastIndexOf(LOCALE_SEP
);
304 // If i == 0 then we have _FOO, so we fall through
305 // to the scriptName.
307 nextSpec
.truncate(i
);
310 nextSpec
= scriptName
; // scriptName may be empty
313 // spec is a script, so we are at the end
314 nextSpec
.truncate(0);
319 // for(const UnicodeString& s(spec.get());
320 // spec.hasFallback(); s(spec.next())) { ...
322 const UnicodeString
& Spec::next() {
324 isSpecLocale
= isNextLocale
;
329 const UnicodeString
& Spec::get() const {
333 UBool
Spec::isLocale() const {
337 ResourceBundle
& Spec::getBundle() const {
341 //----------------------------------------------------------------------
345 // Vector of Entry pointers currently in use
346 static UVector
* DEBUG_entries
= NULL
;
348 static void DEBUG_setup() {
349 if (DEBUG_entries
== NULL
) {
350 UErrorCode ec
= U_ZERO_ERROR
;
351 DEBUG_entries
= new UVector(ec
);
355 // Caller must call DEBUG_setup first. Return index of given Entry,
356 // if it is in use (not deleted yet), or -1 if not found.
357 static int DEBUG_findEntry(Entry
* e
) {
358 for (int i
=0; i
<DEBUG_entries
->size(); ++i
) {
359 if (e
== (Entry
*) DEBUG_entries
->elementAt(i
)) {
366 // Track object creation
367 static void DEBUG_newEntry(Entry
* e
) {
369 if (DEBUG_findEntry(e
) >= 0) {
370 // This should really never happen unless the heap is broken
371 printf("ERROR DEBUG_newEntry duplicate new pointer %08X\n", e
);
374 UErrorCode ec
= U_ZERO_ERROR
;
375 DEBUG_entries
->addElement(e
, ec
);
378 // Track object deletion
379 static void DEBUG_delEntry(Entry
* e
) {
381 int i
= DEBUG_findEntry(e
);
383 printf("ERROR DEBUG_delEntry possible double deletion %08X\n", e
);
386 DEBUG_entries
->removeElementAt(i
);
389 // Track object usage
390 static void DEBUG_useEntry(Entry
* e
) {
391 if (e
== NULL
) return;
393 int i
= DEBUG_findEntry(e
);
395 printf("ERROR DEBUG_useEntry possible dangling pointer %08X\n", e
);
400 // If we're not debugging then make these macros into NOPs
401 #define DEBUG_newEntry(x)
402 #define DEBUG_delEntry(x)
403 #define DEBUG_useEntry(x)
406 //----------------------------------------------------------------------
408 //----------------------------------------------------------------------
411 * The Entry object stores objects of different types and
412 * singleton objects as placeholders for rule-based transliterators to
413 * be built as needed. Instances of this struct can be placeholders,
414 * can represent prototype transliterators to be cloned, or can
415 * represent TransliteratorData objects. We don't support storing
416 * classes in the registry because we don't have the rtti infrastructure
417 * for it. We could easily add this if there is a need for it in the
420 class Entry
: public UMemory
{
431 NONE
// Only used for uninitialized entries
433 // NOTE: stringArg cannot go inside the union because
434 // it has a copy constructor
435 UnicodeString stringArg
; // For RULES_*, ALIAS, COMPOUND_RBT
436 int32_t intArg
; // For COMPOUND_RBT, LOCALE_RULES
437 UnicodeSet
* compoundFilter
; // For COMPOUND_RBT
439 Transliterator
* prototype
; // For PROTOTYPE
440 TransliterationRuleData
* data
; // For RBT_DATA
441 UVector
* dataVector
; // For COMPOUND_RBT
443 Transliterator::Factory function
;
444 Transliterator::Token context
;
445 } factory
; // For FACTORY
449 void adoptPrototype(Transliterator
* adopted
);
450 void setFactory(Transliterator::Factory factory
,
451 Transliterator::Token context
);
455 Entry(const Entry
&other
); // forbid copying of this class
456 Entry
&operator=(const Entry
&other
); // forbid copying of this class
461 compoundFilter
= NULL
;
463 DEBUG_newEntry(this);
467 DEBUG_delEntry(this);
468 if (entryType
== PROTOTYPE
) {
470 } else if (entryType
== RBT_DATA
) {
471 // The data object is shared between instances of RBT. The
472 // entry object owns it. It should only be deleted when the
473 // transliterator component is being cleaned up. Doing so
474 // invalidates any RBTs that the user has instantiated.
476 } else if (entryType
== COMPOUND_RBT
) {
477 while (u
.dataVector
!= NULL
&& !u
.dataVector
->isEmpty())
478 delete (TransliterationRuleData
*)u
.dataVector
->orphanElementAt(0);
481 delete compoundFilter
;
484 void Entry::adoptPrototype(Transliterator
* adopted
) {
485 if (entryType
== PROTOTYPE
) {
488 entryType
= PROTOTYPE
;
489 u
.prototype
= adopted
;
492 void Entry::setFactory(Transliterator::Factory factory
,
493 Transliterator::Token context
) {
494 if (entryType
== PROTOTYPE
) {
498 u
.factory
.function
= factory
;
499 u
.factory
.context
= context
;
502 // UObjectDeleter for Hashtable::setValueDeleter
504 static void U_CALLCONV
505 deleteEntry(void* obj
) {
510 //----------------------------------------------------------------------
511 // class TransliteratorRegistry: Basic public API
512 //----------------------------------------------------------------------
514 TransliteratorRegistry::TransliteratorRegistry(UErrorCode
& status
) :
515 registry(TRUE
, status
),
516 specDAG(TRUE
, status
),
519 registry
.setValueDeleter(deleteEntry
);
520 availableIDs
.setDeleter(uhash_deleteUnicodeString
);
521 availableIDs
.setComparer(uhash_compareCaselessUnicodeString
);
522 specDAG
.setValueDeleter(uhash_deleteHashtable
);
525 TransliteratorRegistry::~TransliteratorRegistry() {
526 // Through the magic of C++, everything cleans itself up
529 Transliterator
* TransliteratorRegistry::get(const UnicodeString
& ID
,
530 TransliteratorAlias
*& aliasReturn
,
531 UErrorCode
& status
) {
532 U_ASSERT(aliasReturn
== NULL
);
533 Entry
*entry
= find(ID
);
534 return (entry
== 0) ? 0
535 : instantiateEntry(ID
, entry
, aliasReturn
, status
);
538 Transliterator
* TransliteratorRegistry::reget(const UnicodeString
& ID
,
539 TransliteratorParser
& parser
,
540 TransliteratorAlias
*& aliasReturn
,
541 UErrorCode
& status
) {
542 U_ASSERT(aliasReturn
== NULL
);
543 Entry
*entry
= find(ID
);
546 // We get to this point if there are two threads, one of which
547 // is instantiating an ID, and another of which is removing
548 // the same ID from the registry, and the timing is just right.
552 // The usage model for the caller is that they will first call
553 // reg->get() inside the mutex, they'll get back an alias, they call
554 // alias->isRuleBased(), and if they get TRUE, they call alias->parse()
555 // outside the mutex, then reg->reget() inside the mutex again. A real
556 // mess, but it gets things working for ICU 3.0. [alan].
558 // Note: It's possible that in between the caller calling
559 // alias->parse() and reg->reget(), that another thread will have
560 // called reg->reget(), and the entry will already have been fixed up.
561 // We have to detect this so we don't stomp over existing entry
562 // data members and potentially leak memory (u.data and compoundFilter).
564 if (entry
->entryType
== Entry::RULES_FORWARD
||
565 entry
->entryType
== Entry::RULES_REVERSE
||
566 entry
->entryType
== Entry::LOCALE_RULES
) {
568 if (parser
.idBlockVector
.isEmpty() && parser
.dataVector
.isEmpty()) {
570 entry
->entryType
= Entry::ALIAS
;
571 entry
->stringArg
= UNICODE_STRING_SIMPLE("Any-NULL");
573 else if (parser
.idBlockVector
.isEmpty() && parser
.dataVector
.size() == 1) {
574 entry
->u
.data
= (TransliterationRuleData
*)parser
.dataVector
.orphanElementAt(0);
575 entry
->entryType
= Entry::RBT_DATA
;
577 else if (parser
.idBlockVector
.size() == 1 && parser
.dataVector
.isEmpty()) {
578 entry
->stringArg
= *(UnicodeString
*)(parser
.idBlockVector
.elementAt(0));
579 entry
->compoundFilter
= parser
.orphanCompoundFilter();
580 entry
->entryType
= Entry::ALIAS
;
583 entry
->entryType
= Entry::COMPOUND_RBT
;
584 entry
->compoundFilter
= parser
.orphanCompoundFilter();
585 entry
->u
.dataVector
= new UVector(status
);
586 entry
->stringArg
.remove();
588 int32_t limit
= parser
.idBlockVector
.size();
589 if (parser
.dataVector
.size() > limit
)
590 limit
= parser
.dataVector
.size();
592 for (int32_t i
= 0; i
< limit
; i
++) {
593 if (i
< parser
.idBlockVector
.size()) {
594 UnicodeString
* idBlock
= (UnicodeString
*)parser
.idBlockVector
.elementAt(i
);
595 if (!idBlock
->isEmpty())
596 entry
->stringArg
+= *idBlock
;
598 if (!parser
.dataVector
.isEmpty()) {
599 TransliterationRuleData
* data
= (TransliterationRuleData
*)parser
.dataVector
.orphanElementAt(0);
600 entry
->u
.dataVector
->addElement(data
, status
);
601 entry
->stringArg
+= (UChar
)0xffff; // use U+FFFF to mark position of RBTs in ID block
608 instantiateEntry(ID
, entry
, aliasReturn
, status
);
612 void TransliteratorRegistry::put(Transliterator
* adoptedProto
,
614 Entry
*entry
= new Entry();
615 entry
->adoptPrototype(adoptedProto
);
616 registerEntry(adoptedProto
->getID(), entry
, visible
);
619 void TransliteratorRegistry::put(const UnicodeString
& ID
,
620 Transliterator::Factory factory
,
621 Transliterator::Token context
,
623 Entry
*entry
= new Entry();
624 entry
->setFactory(factory
, context
);
625 registerEntry(ID
, entry
, visible
);
628 void TransliteratorRegistry::put(const UnicodeString
& ID
,
629 const UnicodeString
& resourceName
,
631 UBool readonlyResourceAlias
,
633 Entry
*entry
= new Entry();
634 entry
->entryType
= (dir
== UTRANS_FORWARD
) ? Entry::RULES_FORWARD
635 : Entry::RULES_REVERSE
;
636 if (readonlyResourceAlias
) {
637 entry
->stringArg
.setTo(TRUE
, resourceName
.getBuffer(), -1);
640 entry
->stringArg
= resourceName
;
642 registerEntry(ID
, entry
, visible
);
645 void TransliteratorRegistry::put(const UnicodeString
& ID
,
646 const UnicodeString
& alias
,
647 UBool readonlyAliasAlias
,
649 Entry
*entry
= new Entry();
650 entry
->entryType
= Entry::ALIAS
;
651 if (readonlyAliasAlias
) {
652 entry
->stringArg
.setTo(TRUE
, alias
.getBuffer(), -1);
655 entry
->stringArg
= alias
;
657 registerEntry(ID
, entry
, visible
);
660 void TransliteratorRegistry::remove(const UnicodeString
& ID
) {
661 UnicodeString source
, target
, variant
;
663 TransliteratorIDParser::IDtoSTV(ID
, source
, target
, variant
, sawSource
);
664 // Only need to do this if ID.indexOf('-') < 0
666 TransliteratorIDParser::STVtoID(source
, target
, variant
, id
);
668 removeSTV(source
, target
, variant
);
669 availableIDs
.removeElement((void*) &id
);
672 //----------------------------------------------------------------------
673 // class TransliteratorRegistry: Public ID and spec management
674 //----------------------------------------------------------------------
677 * == OBSOLETE - remove in ICU 3.4 ==
678 * Return the number of IDs currently registered with the system.
679 * To retrieve the actual IDs, call getAvailableID(i) with
680 * i from 0 to countAvailableIDs() - 1.
682 int32_t TransliteratorRegistry::countAvailableIDs(void) const {
683 return availableIDs
.size();
687 * == OBSOLETE - remove in ICU 3.4 ==
688 * Return the index-th available ID. index must be between 0
689 * and countAvailableIDs() - 1, inclusive. If index is out of
690 * range, the result of getAvailableID(0) is returned.
692 const UnicodeString
& TransliteratorRegistry::getAvailableID(int32_t index
) const {
693 if (index
< 0 || index
>= availableIDs
.size()) {
696 return *(const UnicodeString
*) availableIDs
[index
];
699 StringEnumeration
* TransliteratorRegistry::getAvailableIDs() const {
700 return new Enumeration(*this);
703 int32_t TransliteratorRegistry::countAvailableSources(void) const {
704 return specDAG
.count();
707 UnicodeString
& TransliteratorRegistry::getAvailableSource(int32_t index
,
708 UnicodeString
& result
) const {
710 const UHashElement
*e
= 0;
711 while (index
-- >= 0) {
712 e
= specDAG
.nextElement(pos
);
720 result
= *(UnicodeString
*) e
->key
.pointer
;
725 int32_t TransliteratorRegistry::countAvailableTargets(const UnicodeString
& source
) const {
726 Hashtable
*targets
= (Hashtable
*) specDAG
.get(source
);
727 return (targets
== 0) ? 0 : targets
->count();
730 UnicodeString
& TransliteratorRegistry::getAvailableTarget(int32_t index
,
731 const UnicodeString
& source
,
732 UnicodeString
& result
) const {
733 Hashtable
*targets
= (Hashtable
*) specDAG
.get(source
);
735 result
.truncate(0); // invalid source
739 const UHashElement
*e
= 0;
740 while (index
-- >= 0) {
741 e
= targets
->nextElement(pos
);
747 result
.truncate(0); // invalid index
749 result
= *(UnicodeString
*) e
->key
.pointer
;
754 int32_t TransliteratorRegistry::countAvailableVariants(const UnicodeString
& source
,
755 const UnicodeString
& target
) const {
756 Hashtable
*targets
= (Hashtable
*) specDAG
.get(source
);
760 UVector
*variants
= (UVector
*) targets
->get(target
);
761 // variants may be 0 if the source/target are invalid
762 return (variants
== 0) ? 0 : variants
->size();
765 UnicodeString
& TransliteratorRegistry::getAvailableVariant(int32_t index
,
766 const UnicodeString
& source
,
767 const UnicodeString
& target
,
768 UnicodeString
& result
) const {
769 Hashtable
*targets
= (Hashtable
*) specDAG
.get(source
);
771 result
.truncate(0); // invalid source
774 UVector
*variants
= (UVector
*) targets
->get(target
);
776 result
.truncate(0); // invalid target
779 UnicodeString
*v
= (UnicodeString
*) variants
->elementAt(index
);
781 result
.truncate(0); // invalid index
788 //----------------------------------------------------------------------
789 // class TransliteratorRegistry::Enumeration
790 //----------------------------------------------------------------------
792 TransliteratorRegistry::Enumeration::Enumeration(const TransliteratorRegistry
& _reg
) :
793 index(0), reg(_reg
) {
796 TransliteratorRegistry::Enumeration::~Enumeration() {
799 int32_t TransliteratorRegistry::Enumeration::count(UErrorCode
& /*status*/) const {
800 return reg
.availableIDs
.size();
803 const UnicodeString
* TransliteratorRegistry::Enumeration::snext(UErrorCode
& status
) {
804 // This is sloppy but safe -- if we get out of sync with the underlying
805 // registry, we will still return legal strings, but they might not
806 // correspond to the snapshot at construction time. So there could be
807 // duplicate IDs or omitted IDs if insertions or deletions occur in one
808 // thread while another is iterating. To be more rigorous, add a timestamp,
809 // which is incremented with any modification, and validate this iterator
810 // against the timestamp at construction time. This probably isn't worth
811 // doing as long as there is some possibility of removing this code in favor
812 // of some new code based on Doug's service framework.
813 if (U_FAILURE(status
)) {
816 int32_t n
= reg
.availableIDs
.size();
818 status
= U_ENUM_OUT_OF_SYNC_ERROR
;
820 // index == n is okay -- this means we've reached the end
822 // Copy the string! This avoids lifetime problems.
823 unistr
= *(const UnicodeString
*)reg
.availableIDs
[index
++];
830 void TransliteratorRegistry::Enumeration::reset(UErrorCode
& /*status*/) {
834 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TransliteratorRegistry::Enumeration
)
836 //----------------------------------------------------------------------
837 // class TransliteratorRegistry: internal
838 //----------------------------------------------------------------------
841 * Convenience method. Calls 6-arg registerEntry().
843 void TransliteratorRegistry::registerEntry(const UnicodeString
& source
,
844 const UnicodeString
& target
,
845 const UnicodeString
& variant
,
849 UnicodeString
s(source
);
850 if (s
.length() == 0) {
853 TransliteratorIDParser::STVtoID(source
, target
, variant
, ID
);
854 registerEntry(ID
, s
, target
, variant
, adopted
, visible
);
858 * Convenience method. Calls 6-arg registerEntry().
860 void TransliteratorRegistry::registerEntry(const UnicodeString
& ID
,
863 UnicodeString source
, target
, variant
;
865 TransliteratorIDParser::IDtoSTV(ID
, source
, target
, variant
, sawSource
);
866 // Only need to do this if ID.indexOf('-') < 0
868 TransliteratorIDParser::STVtoID(source
, target
, variant
, id
);
869 registerEntry(id
, source
, target
, variant
, adopted
, visible
);
873 * Register an entry object (adopted) with the given ID, source,
874 * target, and variant strings.
876 void TransliteratorRegistry::registerEntry(const UnicodeString
& ID
,
877 const UnicodeString
& source
,
878 const UnicodeString
& target
,
879 const UnicodeString
& variant
,
882 UErrorCode status
= U_ZERO_ERROR
;
883 registry
.put(ID
, adopted
, status
);
885 registerSTV(source
, target
, variant
);
886 if (!availableIDs
.contains((void*) &ID
)) {
887 UnicodeString
*newID
= (UnicodeString
*)ID
.clone();
888 // NUL-terminate the ID string
889 newID
->getTerminatedBuffer();
890 availableIDs
.addElement(newID
, status
);
893 removeSTV(source
, target
, variant
);
894 availableIDs
.removeElement((void*) &ID
);
899 * Register a source-target/variant in the specDAG. Variant may be
900 * empty, but source and target must not be. If variant is empty then
901 * the special variant NO_VARIANT is stored in slot zero of the
902 * UVector of variants.
904 void TransliteratorRegistry::registerSTV(const UnicodeString
& source
,
905 const UnicodeString
& target
,
906 const UnicodeString
& variant
) {
907 // assert(source.length() > 0);
908 // assert(target.length() > 0);
909 UErrorCode status
= U_ZERO_ERROR
;
910 Hashtable
*targets
= (Hashtable
*) specDAG
.get(source
);
912 targets
= new Hashtable(TRUE
, status
);
913 if (U_FAILURE(status
) || targets
== 0) {
916 targets
->setValueDeleter(uhash_deleteUVector
);
917 specDAG
.put(source
, targets
, status
);
919 UVector
*variants
= (UVector
*) targets
->get(target
);
921 variants
= new UVector(uhash_deleteUnicodeString
,
922 uhash_compareCaselessUnicodeString
, status
);
926 targets
->put(target
, variants
, status
);
928 // assert(NO_VARIANT == "");
929 // We add the variant string. If it is the special "no variant"
930 // string, that is, the empty string, we add it at position zero.
931 if (!variants
->contains((void*) &variant
)) {
932 if (variant
.length() > 0) {
933 variants
->addElement(new UnicodeString(variant
), status
);
935 variants
->insertElementAt(new UnicodeString(NO_VARIANT
), 0, status
);
941 * Remove a source-target/variant from the specDAG.
943 void TransliteratorRegistry::removeSTV(const UnicodeString
& source
,
944 const UnicodeString
& target
,
945 const UnicodeString
& variant
) {
946 // assert(source.length() > 0);
947 // assert(target.length() > 0);
948 // UErrorCode status = U_ZERO_ERROR;
949 Hashtable
*targets
= (Hashtable
*) specDAG
.get(source
);
951 return; // should never happen for valid s-t/v
953 UVector
*variants
= (UVector
*) targets
->get(target
);
955 return; // should never happen for valid s-t/v
957 variants
->removeElement((void*) &variant
);
958 if (variants
->size() == 0) {
959 targets
->remove(target
); // should delete variants
960 if (targets
->count() == 0) {
961 specDAG
.remove(source
); // should delete targets
967 * Attempt to find a source-target/variant in the dynamic registry
968 * store. Return 0 on failure.
970 * Caller does NOT own returned object.
972 Entry
* TransliteratorRegistry::findInDynamicStore(const Spec
& src
,
974 const UnicodeString
& variant
) const {
976 TransliteratorIDParser::STVtoID(src
, trg
, variant
, ID
);
977 Entry
*e
= (Entry
*) registry
.get(ID
);
983 * Attempt to find a source-target/variant in the static locale
984 * resource store. Do not perform fallback. Return 0 on failure.
986 * On success, create a new entry object, register it in the dynamic
987 * store, and return a pointer to it, but do not make it public --
988 * just because someone requested something, we do not expand the
989 * available ID list (or spec DAG).
991 * Caller does NOT own returned object.
993 Entry
* TransliteratorRegistry::findInStaticStore(const Spec
& src
,
995 const UnicodeString
& variant
) {
997 if (src
.isLocale()) {
998 entry
= findInBundle(src
, trg
, variant
, UTRANS_FORWARD
);
999 } else if (trg
.isLocale()) {
1000 entry
= findInBundle(trg
, src
, variant
, UTRANS_REVERSE
);
1003 // If we found an entry, store it in the Hashtable for next
1006 registerEntry(src
.getTop(), trg
.getTop(), variant
, entry
, FALSE
);
1012 // As of 2.0, resource bundle keys cannot contain '_'
1013 static const UChar TRANSLITERATE_TO
[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,84,111,0}; // "TransliterateTo"
1015 static const UChar TRANSLITERATE_FROM
[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,70,114,111,109,0}; // "TransliterateFrom"
1017 static const UChar TRANSLITERATE
[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,0}; // "Transliterate"
1020 * Attempt to find an entry in a single resource bundle. This is
1021 * a one-sided lookup. findInStaticStore() performs up to two such
1022 * lookups, one for the source, and one for the target.
1024 * Do not perform fallback. Return 0 on failure.
1026 * On success, create a new Entry object, populate it, and return it.
1027 * The caller owns the returned object.
1029 Entry
* TransliteratorRegistry::findInBundle(const Spec
& specToOpen
,
1030 const Spec
& specToFind
,
1031 const UnicodeString
& variant
,
1032 UTransDirection direction
)
1035 UnicodeString resStr
;
1038 for (pass
=0; pass
<2; ++pass
) {
1040 // First try either TransliteratorTo_xxx or
1041 // TransliterateFrom_xxx, then try the bidirectional
1042 // Transliterate_xxx. This precedence order is arbitrary
1043 // but must be consistent and documented.
1045 utag
.append(direction
== UTRANS_FORWARD
?
1046 TRANSLITERATE_TO
: TRANSLITERATE_FROM
);
1048 utag
.append(TRANSLITERATE
);
1050 UnicodeString
s(specToFind
.get());
1051 utag
.append(s
.toUpper(""));
1052 CharString
tag(utag
);
1054 UErrorCode status
= U_ZERO_ERROR
;
1055 ResourceBundle
subres(specToOpen
.getBundle().get(tag
, status
));
1056 if (U_FAILURE(status
) || status
== U_USING_DEFAULT_WARNING
) {
1061 if (specToOpen
.get() != LocaleUtility::initNameFromLocale(subres
.getLocale(), s
)) {
1065 if (variant
.length() != 0) {
1066 CharString
var(variant
);
1067 status
= U_ZERO_ERROR
;
1068 resStr
= subres
.getStringEx(var
, status
);
1069 if (U_SUCCESS(status
)) {
1070 // Exit loop successfully
1076 // Variant is empty, which means match the first variant listed.
1077 status
= U_ZERO_ERROR
;
1078 resStr
= subres
.getStringEx(1, status
);
1079 if (U_SUCCESS(status
)) {
1080 // Exit loop successfully
1091 // We have succeeded in loading a string from the locale
1092 // resources. Create a new registry entry to hold it and return it.
1093 Entry
*entry
= new Entry();
1095 // The direction is always forward for the
1096 // TransliterateTo_xxx and TransliterateFrom_xxx
1097 // items; those are unidirectional forward rules.
1098 // For the bidirectional Transliterate_xxx items,
1099 // the direction is the value passed in to this
1101 int32_t dir
= (pass
== 0) ? UTRANS_FORWARD
: direction
;
1102 entry
->entryType
= Entry::LOCALE_RULES
;
1103 entry
->stringArg
= resStr
;
1104 entry
->intArg
= dir
;
1111 * Convenience method. Calls 3-arg find().
1113 Entry
* TransliteratorRegistry::find(const UnicodeString
& ID
) {
1114 UnicodeString source
, target
, variant
;
1116 TransliteratorIDParser::IDtoSTV(ID
, source
, target
, variant
, sawSource
);
1117 return find(source
, target
, variant
);
1121 * Top-level find method. Attempt to find a source-target/variant in
1122 * either the dynamic or the static (locale resource) store. Perform
1125 * Lookup sequence for ss_SS_SSS-tt_TT_TTT/v:
1127 * ss_SS_SSS-tt_TT_TTT/v -- in hashtable
1128 * ss_SS_SSS-tt_TT_TTT/v -- in ss_SS_SSS (no fallback)
1130 * repeat with t = tt_TT_TTT, tt_TT, tt, and tscript
1137 * Here * matches the first variant listed.
1139 * Caller does NOT own returned object. Return 0 on failure.
1141 Entry
* TransliteratorRegistry::find(UnicodeString
& source
,
1142 UnicodeString
& target
,
1143 UnicodeString
& variant
) {
1149 if (variant
.length() != 0) {
1151 // Seek exact match in hashtable
1152 entry
= findInDynamicStore(src
, trg
, variant
);
1157 // Seek exact match in locale resources
1158 entry
= findInStaticStore(src
, trg
, variant
);
1167 // Seek match in hashtable
1168 entry
= findInDynamicStore(src
, trg
, NO_VARIANT
);
1173 // Seek match in locale resources
1174 entry
= findInStaticStore(src
, trg
, NO_VARIANT
);
1178 if (!src
.hasFallback()) {
1183 if (!trg
.hasFallback()) {
1193 * Given an Entry object, instantiate it. Caller owns result. Return
1196 * Return a non-empty aliasReturn value if the ID points to an alias.
1197 * We cannot instantiate it ourselves because the alias may contain
1198 * filters or compounds, which we do not understand. Caller should
1199 * make aliasReturn empty before calling.
1201 * The entry object is assumed to reside in the dynamic store. It may be
1204 Transliterator
* TransliteratorRegistry::instantiateEntry(const UnicodeString
& ID
,
1206 TransliteratorAlias
* &aliasReturn
,
1207 UErrorCode
& status
) {
1208 Transliterator
*t
= 0;
1209 U_ASSERT(aliasReturn
== 0);
1211 switch (entry
->entryType
) {
1212 case Entry::RBT_DATA
:
1213 t
= new RuleBasedTransliterator(ID
, entry
->u
.data
);
1215 status
= U_MEMORY_ALLOCATION_ERROR
;
1218 case Entry::PROTOTYPE
:
1219 t
= entry
->u
.prototype
->clone();
1221 status
= U_MEMORY_ALLOCATION_ERROR
;
1225 aliasReturn
= new TransliteratorAlias(entry
->stringArg
, entry
->compoundFilter
);
1226 if (aliasReturn
== 0) {
1227 status
= U_MEMORY_ALLOCATION_ERROR
;
1230 case Entry::FACTORY
:
1231 t
= entry
->u
.factory
.function(ID
, entry
->u
.factory
.context
);
1233 status
= U_MEMORY_ALLOCATION_ERROR
;
1236 case Entry::COMPOUND_RBT
:
1238 UVector
* rbts
= new UVector(status
);
1239 int32_t passNumber
= 1;
1240 for (int32_t i
= 0; U_SUCCESS(status
) && i
< entry
->u
.dataVector
->size(); i
++) {
1241 Transliterator
* t
= new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING
) + (passNumber
++),
1242 (TransliterationRuleData
*)(entry
->u
.dataVector
->elementAt(i
)), FALSE
);
1244 status
= U_MEMORY_ALLOCATION_ERROR
;
1246 rbts
->addElement(t
, status
);
1248 if (U_FAILURE(status
))
1250 aliasReturn
= new TransliteratorAlias(ID
, entry
->stringArg
, rbts
, entry
->compoundFilter
);
1252 if (aliasReturn
== 0) {
1253 status
= U_MEMORY_ALLOCATION_ERROR
;
1256 case Entry::LOCALE_RULES
:
1257 aliasReturn
= new TransliteratorAlias(ID
, entry
->stringArg
,
1258 (UTransDirection
) entry
->intArg
);
1259 if (aliasReturn
== 0) {
1260 status
= U_MEMORY_ALLOCATION_ERROR
;
1263 case Entry::RULES_FORWARD
:
1264 case Entry::RULES_REVERSE
:
1265 // Process the rule data into a TransliteratorRuleData object,
1266 // and possibly also into an ::id header and/or footer. Then
1267 // we modify the registry with the parsed data and retry.
1269 TransliteratorParser
parser(status
);
1271 // We use the file name, taken from another resource bundle
1272 // 2-d array at static init time, as a locale language. We're
1273 // just using the locale mechanism to map through to a file
1274 // name; this in no way represents an actual locale.
1275 //CharString ch(entry->stringArg);
1276 //UResourceBundle *bundle = ures_openDirect(0, ch, &status);
1277 UnicodeString rules
= entry
->stringArg
;
1278 //ures_close(bundle);
1280 //if (U_FAILURE(status)) {
1281 // We have a failure of some kind. Remove the ID from the
1282 // registry so we don't keep trying. NOTE: This will throw off
1283 // anyone who is, at the moment, trying to iterate over the
1284 // available IDs. That's acceptable since we should never
1285 // really get here except under installation, configuration,
1286 // or unrecoverable run time memory failures.
1290 // If the status indicates a failure, then we don't have any
1291 // rules -- there is probably an installation error. The list
1292 // in the root locale should correspond to all the installed
1293 // transliterators; if it lists something that's not
1294 // installed, we'll get an error from ResourceBundle.
1295 aliasReturn
= new TransliteratorAlias(ID
, rules
,
1296 ((entry
->entryType
== Entry::RULES_REVERSE
) ?
1297 UTRANS_REVERSE
: UTRANS_FORWARD
));
1298 if (aliasReturn
== 0) {
1299 status
= U_MEMORY_ALLOCATION_ERROR
;
1305 U_ASSERT(FALSE
); // can't get here
1311 #endif /* #if !UCONFIG_NO_TRANSLITERATION */