]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/transreg.h
ICU-511.32.tar.gz
[apple/icu.git] / icuSources / i18n / transreg.h
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
46f4442e 3* Copyright (c) 2001-2008, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6* Date Name Description
7* 08/10/2001 aliu Creation.
8**********************************************************************
9*/
10#ifndef _TRANSREG_H
11#define _TRANSREG_H
12
13#include "unicode/utypes.h"
14
15#if !UCONFIG_NO_TRANSLITERATION
16
17#include "unicode/uobject.h"
18#include "unicode/translit.h"
19#include "hash.h"
20#include "uvector.h"
21
22U_NAMESPACE_BEGIN
23
729e4ab9
A
24class TransliteratorEntry;
25class TransliteratorSpec;
b75a7d8f
A
26class UnicodeString;
27
28//------------------------------------------------------------------
29// TransliteratorAlias
30//------------------------------------------------------------------
31
32/**
33 * A TransliteratorAlias object is returned by get() if the given ID
34 * actually translates into something else. The caller then invokes
35 * the create() method on the alias to create the actual
36 * transliterator, and deletes the alias.
37 *
38 * Why all the shenanigans? To prevent circular calls between
39 * the registry code and the transliterator code that deadlocks.
40 */
41class TransliteratorAlias : public UMemory {
42 public:
43 /**
374ca955 44 * Construct a simple alias (type == SIMPLE)
b75a7d8f
A
45 * @param aliasID the given id.
46 */
73c04bcf 47 TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);
374ca955 48
b75a7d8f 49 /**
374ca955 50 * Construct a compound RBT alias (type == COMPOUND)
b75a7d8f 51 */
73c04bcf
A
52 TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
53 UVector* adoptedTransliterators,
b75a7d8f
A
54 const UnicodeSet* compoundFilter);
55
374ca955
A
56 /**
57 * Construct a rules alias (type = RULES)
58 */
59 TransliteratorAlias(const UnicodeString& theID,
60 const UnicodeString& rules,
61 UTransDirection dir);
62
b75a7d8f 63 ~TransliteratorAlias();
374ca955 64
b75a7d8f
A
65 /**
66 * The whole point of create() is that the caller must invoke
67 * it when the registry mutex is NOT held, to prevent deadlock.
68 * It may only be called once.
374ca955
A
69 *
70 * Note: Only call create() if isRuleBased() returns FALSE.
71 *
72 * This method must be called *outside* of the TransliteratorRegistry
73 * mutex.
b75a7d8f
A
74 */
75 Transliterator* create(UParseError&, UErrorCode&);
374ca955
A
76
77 /**
78 * Return TRUE if this alias is rule-based. If so, the caller
79 * must call parse() on it, then call TransliteratorRegistry::reget().
80 */
81 UBool isRuleBased() const;
82
83 /**
84 * If isRuleBased() returns TRUE, then the caller must call this
85 * method, followed by TransliteratorRegistry::reget(). The latter
86 * method must be called inside the TransliteratorRegistry mutex.
87 *
88 * Note: Only call parse() if isRuleBased() returns TRUE.
89 *
90 * This method must be called *outside* of the TransliteratorRegistry
91 * mutex, because it can instantiate Transliterators embedded in
92 * the rules via the "&Latin-Arabic()" syntax.
93 */
94 void parse(TransliteratorParser& parser,
95 UParseError& pe, UErrorCode& ec) const;
96
b75a7d8f 97 private:
374ca955 98 // We actually come in three flavors:
b75a7d8f
A
99 // 1. Simple alias
100 // Here aliasID is the alias string. Everything else is
101 // null, zero, empty.
102 // 2. CompoundRBT
103 // Here ID is the ID, aliasID is the idBlock, trans is the
104 // contained RBT, and idSplitPoint is the offet in aliasID
105 // where the contained RBT goes. compoundFilter is the
106 // compound filter, and it is _not_ owned.
374ca955
A
107 // 3. Rules
108 // Here ID is the ID, aliasID is the rules string.
109 // idSplitPoint is the UTransDirection.
b75a7d8f 110 UnicodeString ID;
73c04bcf
A
111 UnicodeString aliasesOrRules;
112 UVector* transes; // owned
b75a7d8f 113 const UnicodeSet* compoundFilter; // alias
73c04bcf 114 UTransDirection direction;
374ca955 115 enum { SIMPLE, COMPOUND, RULES } type;
b75a7d8f
A
116
117 TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
118 TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
119};
120
121
122/**
123 * A registry of system transliterators. This is the data structure
124 * that implements the mapping between transliterator IDs and the data
125 * or function pointers used to create the corresponding
126 * transliterators. There is one instance of the registry that is
127 * created statically.
128 *
129 * The registry consists of a dynamic component -- a hashtable -- and
130 * a static component -- locale resource bundles. The dynamic store
131 * is semantically overlaid on the static store, so the static mapping
132 * can be dynamically overridden.
133 *
134 * This is an internal class that is only used by Transliterator.
135 * Transliterator maintains one static instance of this class and
136 * delegates all registry-related operations to it.
137 *
138 * @author Alan Liu
139 */
140class TransliteratorRegistry : public UMemory {
141
142 public:
143
144 /**
145 * Contructor
146 * @param status Output param set to success/failure code.
147 */
148 TransliteratorRegistry(UErrorCode& status);
149
150 /**
151 * Nonvirtual destructor -- this class is not subclassable.
152 */
153 ~TransliteratorRegistry();
154
155 //------------------------------------------------------------------
156 // Basic public API
157 //------------------------------------------------------------------
158
159 /**
160 * Given a simple ID (forward direction, no inline filter, not
161 * compound) attempt to instantiate it from the registry. Return
162 * 0 on failure.
163 *
164 * Return a non-NULL aliasReturn value if the ID points to an alias.
165 * We cannot instantiate it ourselves because the alias may contain
166 * filters or compounds, which we do not understand. Caller should
167 * make aliasReturn NULL before calling.
168 * @param ID the given ID
374ca955
A
169 * @param aliasReturn output param to receive TransliteratorAlias;
170 * should be NULL on entry
171 * @param parseError Struct to recieve information on position
b75a7d8f
A
172 * of error if an error is encountered
173 * @param status Output param set to success/failure code.
174 */
175 Transliterator* get(const UnicodeString& ID,
176 TransliteratorAlias*& aliasReturn,
b75a7d8f
A
177 UErrorCode& status);
178
374ca955
A
179 /**
180 * The caller must call this after calling get(), if [a] calling get()
181 * returns an alias, and [b] the alias is rule based. In that
182 * situation the caller must call alias->parse() to do the parsing
183 * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
184 * instantiating the transliterator.
185 *
186 * Note: Another alias might be returned by this method.
187 *
188 * This method (like all public methods of this class) must be called
189 * from within the TransliteratorRegistry mutex.
190 *
191 * @param aliasReturn output param to receive TransliteratorAlias;
192 * should be NULL on entry
193 */
194 Transliterator* reget(const UnicodeString& ID,
195 TransliteratorParser& parser,
196 TransliteratorAlias*& aliasReturn,
197 UErrorCode& status);
198
b75a7d8f
A
199 /**
200 * Register a prototype (adopted). This adds an entry to the
201 * dynamic store, or replaces an existing entry. Any entry in the
202 * underlying static locale resource store is masked.
203 */
204 void put(Transliterator* adoptedProto,
46f4442e
A
205 UBool visible,
206 UErrorCode& ec);
b75a7d8f
A
207
208 /**
209 * Register an ID and a factory function pointer. This adds an
210 * entry to the dynamic store, or replaces an existing entry. Any
211 * entry in the underlying static locale resource store is masked.
212 */
213 void put(const UnicodeString& ID,
214 Transliterator::Factory factory,
215 Transliterator::Token context,
46f4442e
A
216 UBool visible,
217 UErrorCode& ec);
b75a7d8f
A
218
219 /**
220 * Register an ID and a resource name. This adds an entry to the
221 * dynamic store, or replaces an existing entry. Any entry in the
222 * underlying static locale resource store is masked.
223 */
224 void put(const UnicodeString& ID,
225 const UnicodeString& resourceName,
226 UTransDirection dir,
73c04bcf 227 UBool readonlyResourceAlias,
46f4442e
A
228 UBool visible,
229 UErrorCode& ec);
b75a7d8f
A
230
231 /**
232 * Register an ID and an alias ID. This adds an entry to the
233 * dynamic store, or replaces an existing entry. Any entry in the
234 * underlying static locale resource store is masked.
235 */
236 void put(const UnicodeString& ID,
237 const UnicodeString& alias,
73c04bcf 238 UBool readonlyAliasAlias,
46f4442e
A
239 UBool visible,
240 UErrorCode& ec);
b75a7d8f
A
241
242 /**
243 * Unregister an ID. This removes an entry from the dynamic store
244 * if there is one. The static locale resource store is
245 * unaffected.
246 * @param ID the given ID.
247 */
248 void remove(const UnicodeString& ID);
249
250 //------------------------------------------------------------------
251 // Public ID and spec management
252 //------------------------------------------------------------------
253
254 /**
374ca955
A
255 * Return a StringEnumeration over the IDs currently registered
256 * with the system.
257 * @internal
258 */
259 StringEnumeration* getAvailableIDs() const;
260
261 /**
262 * == OBSOLETE - remove in ICU 3.4 ==
b75a7d8f
A
263 * Return the number of IDs currently registered with the system.
264 * To retrieve the actual IDs, call getAvailableID(i) with
265 * i from 0 to countAvailableIDs() - 1.
266 * @return the number of IDs currently registered with the system.
267 * @internal
268 */
374ca955 269 int32_t countAvailableIDs(void) const;
b75a7d8f
A
270
271 /**
374ca955 272 * == OBSOLETE - remove in ICU 3.4 ==
b75a7d8f
A
273 * Return the index-th available ID. index must be between 0
274 * and countAvailableIDs() - 1, inclusive. If index is out of
275 * range, the result of getAvailableID(0) is returned.
276 * @param index the given index.
277 * @return the index-th available ID. index must be between 0
278 * and countAvailableIDs() - 1, inclusive. If index is out of
279 * range, the result of getAvailableID(0) is returned.
280 * @internal
281 */
374ca955 282 const UnicodeString& getAvailableID(int32_t index) const;
b75a7d8f
A
283
284 /**
285 * Return the number of registered source specifiers.
286 * @return the number of registered source specifiers.
287 */
374ca955
A
288 int32_t countAvailableSources(void) const;
289
b75a7d8f
A
290 /**
291 * Return a registered source specifier.
292 * @param index which specifier to return, from 0 to n-1, where
293 * n = countAvailableSources()
294 * @param result fill-in paramter to receive the source specifier.
295 * If index is out of range, result will be empty.
296 * @return reference to result
297 */
298 UnicodeString& getAvailableSource(int32_t index,
374ca955
A
299 UnicodeString& result) const;
300
b75a7d8f
A
301 /**
302 * Return the number of registered target specifiers for a given
303 * source specifier.
304 * @param source the given source specifier.
305 * @return the number of registered target specifiers for a given
306 * source specifier.
307 */
374ca955
A
308 int32_t countAvailableTargets(const UnicodeString& source) const;
309
b75a7d8f
A
310 /**
311 * Return a registered target specifier for a given source.
312 * @param index which specifier to return, from 0 to n-1, where
313 * n = countAvailableTargets(source)
314 * @param source the source specifier
315 * @param result fill-in paramter to receive the target specifier.
316 * If source is invalid or if index is out of range, result will
317 * be empty.
318 * @return reference to result
319 */
320 UnicodeString& getAvailableTarget(int32_t index,
321 const UnicodeString& source,
374ca955
A
322 UnicodeString& result) const;
323
b75a7d8f
A
324 /**
325 * Return the number of registered variant specifiers for a given
326 * source-target pair. There is always at least one variant: If
327 * just source-target is registered, then the single variant
328 * NO_VARIANT is returned. If source-target/variant is registered
329 * then that variant is returned.
330 * @param source the source specifiers
331 * @param target the target specifiers
332 * @return the number of registered variant specifiers for a given
333 * source-target pair.
334 */
335 int32_t countAvailableVariants(const UnicodeString& source,
374ca955
A
336 const UnicodeString& target) const;
337
b75a7d8f
A
338 /**
339 * Return a registered variant specifier for a given source-target
340 * pair. If NO_VARIANT is one of the variants, then it will be
341 * at index 0.
342 * @param index which specifier to return, from 0 to n-1, where
343 * n = countAvailableVariants(source, target)
344 * @param source the source specifier
345 * @param target the target specifier
346 * @param result fill-in paramter to receive the variant
347 * specifier. If source is invalid or if target is invalid or if
348 * index is out of range, result will be empty.
349 * @return reference to result
350 */
351 UnicodeString& getAvailableVariant(int32_t index,
352 const UnicodeString& source,
353 const UnicodeString& target,
374ca955 354 UnicodeString& result) const;
b75a7d8f
A
355
356 private:
357
358 //----------------------------------------------------------------
359 // Private implementation
360 //----------------------------------------------------------------
361
729e4ab9 362 TransliteratorEntry* find(const UnicodeString& ID);
374ca955 363
729e4ab9 364 TransliteratorEntry* find(UnicodeString& source,
b75a7d8f
A
365 UnicodeString& target,
366 UnicodeString& variant);
367
729e4ab9
A
368 TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src,
369 const TransliteratorSpec& trg,
374ca955 370 const UnicodeString& variant) const;
b75a7d8f 371
729e4ab9
A
372 TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src,
373 const TransliteratorSpec& trg,
b75a7d8f
A
374 const UnicodeString& variant);
375
729e4ab9
A
376 static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen,
377 const TransliteratorSpec& specToFind,
b75a7d8f
A
378 const UnicodeString& variant,
379 UTransDirection direction);
380
381 void registerEntry(const UnicodeString& source,
382 const UnicodeString& target,
383 const UnicodeString& variant,
729e4ab9 384 TransliteratorEntry* adopted,
b75a7d8f
A
385 UBool visible);
386
387 void registerEntry(const UnicodeString& ID,
729e4ab9 388 TransliteratorEntry* adopted,
b75a7d8f
A
389 UBool visible);
390
374ca955 391 void registerEntry(const UnicodeString& ID,
b75a7d8f
A
392 const UnicodeString& source,
393 const UnicodeString& target,
394 const UnicodeString& variant,
729e4ab9 395 TransliteratorEntry* adopted,
b75a7d8f
A
396 UBool visible);
397
398 void registerSTV(const UnicodeString& source,
399 const UnicodeString& target,
400 const UnicodeString& variant);
401
402 void removeSTV(const UnicodeString& source,
403 const UnicodeString& target,
404 const UnicodeString& variant);
405
406 Transliterator* instantiateEntry(const UnicodeString& ID,
729e4ab9 407 TransliteratorEntry *entry,
b75a7d8f 408 TransliteratorAlias*& aliasReturn,
b75a7d8f
A
409 UErrorCode& status);
410
374ca955
A
411 /**
412 * A StringEnumeration over the registered IDs in this object.
413 */
414 class Enumeration : public StringEnumeration {
415 public:
416 Enumeration(const TransliteratorRegistry& reg);
417 virtual ~Enumeration();
418 virtual int32_t count(UErrorCode& status) const;
419 virtual const UnicodeString* snext(UErrorCode& status);
420 virtual void reset(UErrorCode& status);
421 static UClassID U_EXPORT2 getStaticClassID();
422 virtual UClassID getDynamicClassID() const;
423 private:
424 int32_t index;
425 const TransliteratorRegistry& reg;
426 };
427 friend class Enumeration;
428
b75a7d8f
A
429 private:
430
431 /**
432 * Dynamic registry mapping full IDs to Entry objects. This
433 * contains both public and internal entities. The visibility is
434 * controlled by whether an entry is listed in availableIDs and
435 * specDAG or not.
436 */
437 Hashtable registry;
374ca955 438
b75a7d8f
A
439 /**
440 * DAG of visible IDs by spec. Hashtable: source => (Hashtable:
441 * target => (UVector: variant)) The UVector of variants is never
442 * empty. For a source-target with no variant, the special
443 * variant NO_VARIANT (the empty string) is stored in slot zero of
444 * the UVector.
445 */
446 Hashtable specDAG;
374ca955 447
b75a7d8f
A
448 /**
449 * Vector of public full IDs.
450 */
451 UVector availableIDs;
452
453 TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
454 TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
455};
456
457U_NAMESPACE_END
458
459#endif /* #if !UCONFIG_NO_TRANSLITERATION */
460
461#endif
462//eof