2 **********************************************************************
3 * Copyright (c) 2001-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 08/10/2001 aliu Creation.
8 **********************************************************************
13 #include "unicode/utypes.h"
15 #if !UCONFIG_NO_TRANSLITERATION
17 #include "unicode/uobject.h"
18 #include "unicode/translit.h"
28 //------------------------------------------------------------------
29 // TransliteratorAlias
30 //------------------------------------------------------------------
33 * A TransliteratorAlias object is returned by get() if the given ID
34 * actually translates into something else. The caller then invokes
35 * the create() method on the alias to create the actual
36 * transliterator, and deletes the alias.
38 * Why all the shenanigans? To prevent circular calls between
39 * the registry code and the transliterator code that deadlocks.
41 class TransliteratorAlias
: public UMemory
{
44 * Construct a simple alias (type == SIMPLE)
45 * @param aliasID the given id.
47 TransliteratorAlias(const UnicodeString
& aliasID
, const UnicodeSet
* compoundFilter
);
50 * Construct a compound RBT alias (type == COMPOUND)
52 TransliteratorAlias(const UnicodeString
& ID
, const UnicodeString
& idBlocks
,
53 UVector
* adoptedTransliterators
,
54 const UnicodeSet
* compoundFilter
);
57 * Construct a rules alias (type = RULES)
59 TransliteratorAlias(const UnicodeString
& theID
,
60 const UnicodeString
& rules
,
63 ~TransliteratorAlias();
66 * The whole point of create() is that the caller must invoke
67 * it when the registry mutex is NOT held, to prevent deadlock.
68 * It may only be called once.
70 * Note: Only call create() if isRuleBased() returns FALSE.
72 * This method must be called *outside* of the TransliteratorRegistry
75 Transliterator
* create(UParseError
&, UErrorCode
&);
78 * Return TRUE if this alias is rule-based. If so, the caller
79 * must call parse() on it, then call TransliteratorRegistry::reget().
81 UBool
isRuleBased() const;
84 * If isRuleBased() returns TRUE, then the caller must call this
85 * method, followed by TransliteratorRegistry::reget(). The latter
86 * method must be called inside the TransliteratorRegistry mutex.
88 * Note: Only call parse() if isRuleBased() returns TRUE.
90 * This method must be called *outside* of the TransliteratorRegistry
91 * mutex, because it can instantiate Transliterators embedded in
92 * the rules via the "&Latin-Arabic()" syntax.
94 void parse(TransliteratorParser
& parser
,
95 UParseError
& pe
, UErrorCode
& ec
) const;
98 // We actually come in three flavors:
100 // Here aliasID is the alias string. Everything else is
101 // null, zero, empty.
103 // Here ID is the ID, aliasID is the idBlock, trans is the
104 // contained RBT, and idSplitPoint is the offet in aliasID
105 // where the contained RBT goes. compoundFilter is the
106 // compound filter, and it is _not_ owned.
108 // Here ID is the ID, aliasID is the rules string.
109 // idSplitPoint is the UTransDirection.
111 UnicodeString aliasesOrRules
;
112 UVector
* transes
; // owned
113 const UnicodeSet
* compoundFilter
; // alias
114 UTransDirection direction
;
115 enum { SIMPLE
, COMPOUND
, RULES
} type
;
117 TransliteratorAlias(const TransliteratorAlias
&other
); // forbid copying of this class
118 TransliteratorAlias
&operator=(const TransliteratorAlias
&other
); // forbid copying of this class
123 * A registry of system transliterators. This is the data structure
124 * that implements the mapping between transliterator IDs and the data
125 * or function pointers used to create the corresponding
126 * transliterators. There is one instance of the registry that is
127 * created statically.
129 * The registry consists of a dynamic component -- a hashtable -- and
130 * a static component -- locale resource bundles. The dynamic store
131 * is semantically overlaid on the static store, so the static mapping
132 * can be dynamically overridden.
134 * This is an internal class that is only used by Transliterator.
135 * Transliterator maintains one static instance of this class and
136 * delegates all registry-related operations to it.
140 class TransliteratorRegistry
: public UMemory
{
146 * @param status Output param set to success/failure code.
148 TransliteratorRegistry(UErrorCode
& status
);
151 * Nonvirtual destructor -- this class is not subclassable.
153 ~TransliteratorRegistry();
155 //------------------------------------------------------------------
157 //------------------------------------------------------------------
160 * Given a simple ID (forward direction, no inline filter, not
161 * compound) attempt to instantiate it from the registry. Return
164 * Return a non-NULL aliasReturn value if the ID points to an alias.
165 * We cannot instantiate it ourselves because the alias may contain
166 * filters or compounds, which we do not understand. Caller should
167 * make aliasReturn NULL before calling.
168 * @param ID the given ID
169 * @param aliasReturn output param to receive TransliteratorAlias;
170 * should be NULL on entry
171 * @param parseError Struct to recieve information on position
172 * of error if an error is encountered
173 * @param status Output param set to success/failure code.
175 Transliterator
* get(const UnicodeString
& ID
,
176 TransliteratorAlias
*& aliasReturn
,
180 * The caller must call this after calling get(), if [a] calling get()
181 * returns an alias, and [b] the alias is rule based. In that
182 * situation the caller must call alias->parse() to do the parsing
183 * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
184 * instantiating the transliterator.
186 * Note: Another alias might be returned by this method.
188 * This method (like all public methods of this class) must be called
189 * from within the TransliteratorRegistry mutex.
191 * @param aliasReturn output param to receive TransliteratorAlias;
192 * should be NULL on entry
194 Transliterator
* reget(const UnicodeString
& ID
,
195 TransliteratorParser
& parser
,
196 TransliteratorAlias
*& aliasReturn
,
200 * Register a prototype (adopted). This adds an entry to the
201 * dynamic store, or replaces an existing entry. Any entry in the
202 * underlying static locale resource store is masked.
204 void put(Transliterator
* adoptedProto
,
208 * Register an ID and a factory function pointer. This adds an
209 * entry to the dynamic store, or replaces an existing entry. Any
210 * entry in the underlying static locale resource store is masked.
212 void put(const UnicodeString
& ID
,
213 Transliterator::Factory factory
,
214 Transliterator::Token context
,
218 * Register an ID and a resource name. This adds an entry to the
219 * dynamic store, or replaces an existing entry. Any entry in the
220 * underlying static locale resource store is masked.
222 void put(const UnicodeString
& ID
,
223 const UnicodeString
& resourceName
,
225 UBool readonlyResourceAlias
,
229 * Register an ID and an alias ID. This adds an entry to the
230 * dynamic store, or replaces an existing entry. Any entry in the
231 * underlying static locale resource store is masked.
233 void put(const UnicodeString
& ID
,
234 const UnicodeString
& alias
,
235 UBool readonlyAliasAlias
,
239 * Unregister an ID. This removes an entry from the dynamic store
240 * if there is one. The static locale resource store is
242 * @param ID the given ID.
244 void remove(const UnicodeString
& ID
);
246 //------------------------------------------------------------------
247 // Public ID and spec management
248 //------------------------------------------------------------------
251 * Return a StringEnumeration over the IDs currently registered
255 StringEnumeration
* getAvailableIDs() const;
258 * == OBSOLETE - remove in ICU 3.4 ==
259 * Return the number of IDs currently registered with the system.
260 * To retrieve the actual IDs, call getAvailableID(i) with
261 * i from 0 to countAvailableIDs() - 1.
262 * @return the number of IDs currently registered with the system.
265 int32_t countAvailableIDs(void) const;
268 * == OBSOLETE - remove in ICU 3.4 ==
269 * Return the index-th available ID. index must be between 0
270 * and countAvailableIDs() - 1, inclusive. If index is out of
271 * range, the result of getAvailableID(0) is returned.
272 * @param index the given index.
273 * @return the index-th available ID. index must be between 0
274 * and countAvailableIDs() - 1, inclusive. If index is out of
275 * range, the result of getAvailableID(0) is returned.
278 const UnicodeString
& getAvailableID(int32_t index
) const;
281 * Return the number of registered source specifiers.
282 * @return the number of registered source specifiers.
284 int32_t countAvailableSources(void) const;
287 * Return a registered source specifier.
288 * @param index which specifier to return, from 0 to n-1, where
289 * n = countAvailableSources()
290 * @param result fill-in paramter to receive the source specifier.
291 * If index is out of range, result will be empty.
292 * @return reference to result
294 UnicodeString
& getAvailableSource(int32_t index
,
295 UnicodeString
& result
) const;
298 * Return the number of registered target specifiers for a given
300 * @param source the given source specifier.
301 * @return the number of registered target specifiers for a given
304 int32_t countAvailableTargets(const UnicodeString
& source
) const;
307 * Return a registered target specifier for a given source.
308 * @param index which specifier to return, from 0 to n-1, where
309 * n = countAvailableTargets(source)
310 * @param source the source specifier
311 * @param result fill-in paramter to receive the target specifier.
312 * If source is invalid or if index is out of range, result will
314 * @return reference to result
316 UnicodeString
& getAvailableTarget(int32_t index
,
317 const UnicodeString
& source
,
318 UnicodeString
& result
) const;
321 * Return the number of registered variant specifiers for a given
322 * source-target pair. There is always at least one variant: If
323 * just source-target is registered, then the single variant
324 * NO_VARIANT is returned. If source-target/variant is registered
325 * then that variant is returned.
326 * @param source the source specifiers
327 * @param target the target specifiers
328 * @return the number of registered variant specifiers for a given
329 * source-target pair.
331 int32_t countAvailableVariants(const UnicodeString
& source
,
332 const UnicodeString
& target
) const;
335 * Return a registered variant specifier for a given source-target
336 * pair. If NO_VARIANT is one of the variants, then it will be
338 * @param index which specifier to return, from 0 to n-1, where
339 * n = countAvailableVariants(source, target)
340 * @param source the source specifier
341 * @param target the target specifier
342 * @param result fill-in paramter to receive the variant
343 * specifier. If source is invalid or if target is invalid or if
344 * index is out of range, result will be empty.
345 * @return reference to result
347 UnicodeString
& getAvailableVariant(int32_t index
,
348 const UnicodeString
& source
,
349 const UnicodeString
& target
,
350 UnicodeString
& result
) const;
354 //----------------------------------------------------------------
355 // Private implementation
356 //----------------------------------------------------------------
358 Entry
* find(const UnicodeString
& ID
);
360 Entry
* find(UnicodeString
& source
,
361 UnicodeString
& target
,
362 UnicodeString
& variant
);
364 Entry
* findInDynamicStore(const Spec
& src
,
366 const UnicodeString
& variant
) const;
368 Entry
* findInStaticStore(const Spec
& src
,
370 const UnicodeString
& variant
);
372 static Entry
* findInBundle(const Spec
& specToOpen
,
373 const Spec
& specToFind
,
374 const UnicodeString
& variant
,
375 UTransDirection direction
);
377 void registerEntry(const UnicodeString
& source
,
378 const UnicodeString
& target
,
379 const UnicodeString
& variant
,
383 void registerEntry(const UnicodeString
& ID
,
387 void registerEntry(const UnicodeString
& ID
,
388 const UnicodeString
& source
,
389 const UnicodeString
& target
,
390 const UnicodeString
& variant
,
394 void registerSTV(const UnicodeString
& source
,
395 const UnicodeString
& target
,
396 const UnicodeString
& variant
);
398 void removeSTV(const UnicodeString
& source
,
399 const UnicodeString
& target
,
400 const UnicodeString
& variant
);
402 Transliterator
* instantiateEntry(const UnicodeString
& ID
,
404 TransliteratorAlias
*& aliasReturn
,
408 * A StringEnumeration over the registered IDs in this object.
410 class Enumeration
: public StringEnumeration
{
412 Enumeration(const TransliteratorRegistry
& reg
);
413 virtual ~Enumeration();
414 virtual int32_t count(UErrorCode
& status
) const;
415 virtual const UnicodeString
* snext(UErrorCode
& status
);
416 virtual void reset(UErrorCode
& status
);
417 static UClassID U_EXPORT2
getStaticClassID();
418 virtual UClassID
getDynamicClassID() const;
421 const TransliteratorRegistry
& reg
;
423 friend class Enumeration
;
428 * Dynamic registry mapping full IDs to Entry objects. This
429 * contains both public and internal entities. The visibility is
430 * controlled by whether an entry is listed in availableIDs and
436 * DAG of visible IDs by spec. Hashtable: source => (Hashtable:
437 * target => (UVector: variant)) The UVector of variants is never
438 * empty. For a source-target with no variant, the special
439 * variant NO_VARIANT (the empty string) is stored in slot zero of
445 * Vector of public full IDs.
447 UVector availableIDs
;
449 TransliteratorRegistry(const TransliteratorRegistry
&other
); // forbid copying of this class
450 TransliteratorRegistry
&operator=(const TransliteratorRegistry
&other
); // forbid copying of this class
455 #endif /* #if !UCONFIG_NO_TRANSLITERATION */