]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ********************************************************************** | |
46f4442e | 3 | * Copyright (c) 2001-2008, International Business Machines |
b75a7d8f A |
4 | * Corporation and others. All Rights Reserved. |
5 | ********************************************************************** | |
6 | * Date Name Description | |
7 | * 08/10/2001 aliu Creation. | |
8 | ********************************************************************** | |
9 | */ | |
10 | #ifndef _TRANSREG_H | |
11 | #define _TRANSREG_H | |
12 | ||
13 | #include "unicode/utypes.h" | |
14 | ||
15 | #if !UCONFIG_NO_TRANSLITERATION | |
16 | ||
17 | #include "unicode/uobject.h" | |
18 | #include "unicode/translit.h" | |
19 | #include "hash.h" | |
20 | #include "uvector.h" | |
21 | ||
22 | U_NAMESPACE_BEGIN | |
23 | ||
729e4ab9 A |
24 | class TransliteratorEntry; |
25 | class TransliteratorSpec; | |
b75a7d8f A |
26 | class UnicodeString; |
27 | ||
28 | //------------------------------------------------------------------ | |
29 | // TransliteratorAlias | |
30 | //------------------------------------------------------------------ | |
31 | ||
32 | /** | |
33 | * A TransliteratorAlias object is returned by get() if the given ID | |
34 | * actually translates into something else. The caller then invokes | |
35 | * the create() method on the alias to create the actual | |
36 | * transliterator, and deletes the alias. | |
37 | * | |
38 | * Why all the shenanigans? To prevent circular calls between | |
39 | * the registry code and the transliterator code that deadlocks. | |
40 | */ | |
41 | class TransliteratorAlias : public UMemory { | |
42 | public: | |
43 | /** | |
374ca955 | 44 | * Construct a simple alias (type == SIMPLE) |
b75a7d8f A |
45 | * @param aliasID the given id. |
46 | */ | |
73c04bcf | 47 | TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter); |
374ca955 | 48 | |
b75a7d8f | 49 | /** |
374ca955 | 50 | * Construct a compound RBT alias (type == COMPOUND) |
b75a7d8f | 51 | */ |
73c04bcf A |
52 | TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks, |
53 | UVector* adoptedTransliterators, | |
b75a7d8f A |
54 | const UnicodeSet* compoundFilter); |
55 | ||
374ca955 A |
56 | /** |
57 | * Construct a rules alias (type = RULES) | |
58 | */ | |
59 | TransliteratorAlias(const UnicodeString& theID, | |
60 | const UnicodeString& rules, | |
61 | UTransDirection dir); | |
62 | ||
b75a7d8f | 63 | ~TransliteratorAlias(); |
374ca955 | 64 | |
b75a7d8f A |
65 | /** |
66 | * The whole point of create() is that the caller must invoke | |
67 | * it when the registry mutex is NOT held, to prevent deadlock. | |
68 | * It may only be called once. | |
374ca955 A |
69 | * |
70 | * Note: Only call create() if isRuleBased() returns FALSE. | |
71 | * | |
72 | * This method must be called *outside* of the TransliteratorRegistry | |
73 | * mutex. | |
b75a7d8f A |
74 | */ |
75 | Transliterator* create(UParseError&, UErrorCode&); | |
374ca955 A |
76 | |
77 | /** | |
78 | * Return TRUE if this alias is rule-based. If so, the caller | |
79 | * must call parse() on it, then call TransliteratorRegistry::reget(). | |
80 | */ | |
81 | UBool isRuleBased() const; | |
82 | ||
83 | /** | |
84 | * If isRuleBased() returns TRUE, then the caller must call this | |
85 | * method, followed by TransliteratorRegistry::reget(). The latter | |
86 | * method must be called inside the TransliteratorRegistry mutex. | |
87 | * | |
88 | * Note: Only call parse() if isRuleBased() returns TRUE. | |
89 | * | |
90 | * This method must be called *outside* of the TransliteratorRegistry | |
91 | * mutex, because it can instantiate Transliterators embedded in | |
92 | * the rules via the "&Latin-Arabic()" syntax. | |
93 | */ | |
94 | void parse(TransliteratorParser& parser, | |
95 | UParseError& pe, UErrorCode& ec) const; | |
96 | ||
b75a7d8f | 97 | private: |
374ca955 | 98 | // We actually come in three flavors: |
b75a7d8f A |
99 | // 1. Simple alias |
100 | // Here aliasID is the alias string. Everything else is | |
101 | // null, zero, empty. | |
102 | // 2. CompoundRBT | |
103 | // Here ID is the ID, aliasID is the idBlock, trans is the | |
104 | // contained RBT, and idSplitPoint is the offet in aliasID | |
105 | // where the contained RBT goes. compoundFilter is the | |
106 | // compound filter, and it is _not_ owned. | |
374ca955 A |
107 | // 3. Rules |
108 | // Here ID is the ID, aliasID is the rules string. | |
109 | // idSplitPoint is the UTransDirection. | |
b75a7d8f | 110 | UnicodeString ID; |
73c04bcf A |
111 | UnicodeString aliasesOrRules; |
112 | UVector* transes; // owned | |
b75a7d8f | 113 | const UnicodeSet* compoundFilter; // alias |
73c04bcf | 114 | UTransDirection direction; |
374ca955 | 115 | enum { SIMPLE, COMPOUND, RULES } type; |
b75a7d8f A |
116 | |
117 | TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class | |
118 | TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class | |
119 | }; | |
120 | ||
121 | ||
122 | /** | |
123 | * A registry of system transliterators. This is the data structure | |
124 | * that implements the mapping between transliterator IDs and the data | |
125 | * or function pointers used to create the corresponding | |
126 | * transliterators. There is one instance of the registry that is | |
127 | * created statically. | |
128 | * | |
129 | * The registry consists of a dynamic component -- a hashtable -- and | |
130 | * a static component -- locale resource bundles. The dynamic store | |
131 | * is semantically overlaid on the static store, so the static mapping | |
132 | * can be dynamically overridden. | |
133 | * | |
134 | * This is an internal class that is only used by Transliterator. | |
135 | * Transliterator maintains one static instance of this class and | |
136 | * delegates all registry-related operations to it. | |
137 | * | |
138 | * @author Alan Liu | |
139 | */ | |
140 | class TransliteratorRegistry : public UMemory { | |
141 | ||
142 | public: | |
143 | ||
144 | /** | |
145 | * Contructor | |
146 | * @param status Output param set to success/failure code. | |
147 | */ | |
148 | TransliteratorRegistry(UErrorCode& status); | |
149 | ||
150 | /** | |
151 | * Nonvirtual destructor -- this class is not subclassable. | |
152 | */ | |
153 | ~TransliteratorRegistry(); | |
154 | ||
155 | //------------------------------------------------------------------ | |
156 | // Basic public API | |
157 | //------------------------------------------------------------------ | |
158 | ||
159 | /** | |
160 | * Given a simple ID (forward direction, no inline filter, not | |
161 | * compound) attempt to instantiate it from the registry. Return | |
162 | * 0 on failure. | |
163 | * | |
164 | * Return a non-NULL aliasReturn value if the ID points to an alias. | |
165 | * We cannot instantiate it ourselves because the alias may contain | |
166 | * filters or compounds, which we do not understand. Caller should | |
167 | * make aliasReturn NULL before calling. | |
168 | * @param ID the given ID | |
374ca955 A |
169 | * @param aliasReturn output param to receive TransliteratorAlias; |
170 | * should be NULL on entry | |
171 | * @param parseError Struct to recieve information on position | |
b75a7d8f A |
172 | * of error if an error is encountered |
173 | * @param status Output param set to success/failure code. | |
174 | */ | |
175 | Transliterator* get(const UnicodeString& ID, | |
176 | TransliteratorAlias*& aliasReturn, | |
b75a7d8f A |
177 | UErrorCode& status); |
178 | ||
374ca955 A |
179 | /** |
180 | * The caller must call this after calling get(), if [a] calling get() | |
181 | * returns an alias, and [b] the alias is rule based. In that | |
182 | * situation the caller must call alias->parse() to do the parsing | |
183 | * OUTSIDE THE REGISTRY MUTEX, then call this method to retry | |
184 | * instantiating the transliterator. | |
185 | * | |
186 | * Note: Another alias might be returned by this method. | |
187 | * | |
188 | * This method (like all public methods of this class) must be called | |
189 | * from within the TransliteratorRegistry mutex. | |
190 | * | |
191 | * @param aliasReturn output param to receive TransliteratorAlias; | |
192 | * should be NULL on entry | |
193 | */ | |
194 | Transliterator* reget(const UnicodeString& ID, | |
195 | TransliteratorParser& parser, | |
196 | TransliteratorAlias*& aliasReturn, | |
197 | UErrorCode& status); | |
198 | ||
b75a7d8f A |
199 | /** |
200 | * Register a prototype (adopted). This adds an entry to the | |
201 | * dynamic store, or replaces an existing entry. Any entry in the | |
202 | * underlying static locale resource store is masked. | |
203 | */ | |
204 | void put(Transliterator* adoptedProto, | |
46f4442e A |
205 | UBool visible, |
206 | UErrorCode& ec); | |
b75a7d8f A |
207 | |
208 | /** | |
209 | * Register an ID and a factory function pointer. This adds an | |
210 | * entry to the dynamic store, or replaces an existing entry. Any | |
211 | * entry in the underlying static locale resource store is masked. | |
212 | */ | |
213 | void put(const UnicodeString& ID, | |
214 | Transliterator::Factory factory, | |
215 | Transliterator::Token context, | |
46f4442e A |
216 | UBool visible, |
217 | UErrorCode& ec); | |
b75a7d8f A |
218 | |
219 | /** | |
220 | * Register an ID and a resource name. This adds an entry to the | |
221 | * dynamic store, or replaces an existing entry. Any entry in the | |
222 | * underlying static locale resource store is masked. | |
223 | */ | |
224 | void put(const UnicodeString& ID, | |
225 | const UnicodeString& resourceName, | |
226 | UTransDirection dir, | |
73c04bcf | 227 | UBool readonlyResourceAlias, |
46f4442e A |
228 | UBool visible, |
229 | UErrorCode& ec); | |
b75a7d8f A |
230 | |
231 | /** | |
232 | * Register an ID and an alias ID. This adds an entry to the | |
233 | * dynamic store, or replaces an existing entry. Any entry in the | |
234 | * underlying static locale resource store is masked. | |
235 | */ | |
236 | void put(const UnicodeString& ID, | |
237 | const UnicodeString& alias, | |
73c04bcf | 238 | UBool readonlyAliasAlias, |
46f4442e A |
239 | UBool visible, |
240 | UErrorCode& ec); | |
b75a7d8f A |
241 | |
242 | /** | |
243 | * Unregister an ID. This removes an entry from the dynamic store | |
244 | * if there is one. The static locale resource store is | |
245 | * unaffected. | |
246 | * @param ID the given ID. | |
247 | */ | |
248 | void remove(const UnicodeString& ID); | |
249 | ||
250 | //------------------------------------------------------------------ | |
251 | // Public ID and spec management | |
252 | //------------------------------------------------------------------ | |
253 | ||
254 | /** | |
374ca955 A |
255 | * Return a StringEnumeration over the IDs currently registered |
256 | * with the system. | |
257 | * @internal | |
258 | */ | |
259 | StringEnumeration* getAvailableIDs() const; | |
260 | ||
261 | /** | |
262 | * == OBSOLETE - remove in ICU 3.4 == | |
b75a7d8f A |
263 | * Return the number of IDs currently registered with the system. |
264 | * To retrieve the actual IDs, call getAvailableID(i) with | |
265 | * i from 0 to countAvailableIDs() - 1. | |
266 | * @return the number of IDs currently registered with the system. | |
267 | * @internal | |
268 | */ | |
374ca955 | 269 | int32_t countAvailableIDs(void) const; |
b75a7d8f A |
270 | |
271 | /** | |
374ca955 | 272 | * == OBSOLETE - remove in ICU 3.4 == |
b75a7d8f A |
273 | * Return the index-th available ID. index must be between 0 |
274 | * and countAvailableIDs() - 1, inclusive. If index is out of | |
275 | * range, the result of getAvailableID(0) is returned. | |
276 | * @param index the given index. | |
277 | * @return the index-th available ID. index must be between 0 | |
278 | * and countAvailableIDs() - 1, inclusive. If index is out of | |
279 | * range, the result of getAvailableID(0) is returned. | |
280 | * @internal | |
281 | */ | |
374ca955 | 282 | const UnicodeString& getAvailableID(int32_t index) const; |
b75a7d8f A |
283 | |
284 | /** | |
285 | * Return the number of registered source specifiers. | |
286 | * @return the number of registered source specifiers. | |
287 | */ | |
374ca955 A |
288 | int32_t countAvailableSources(void) const; |
289 | ||
b75a7d8f A |
290 | /** |
291 | * Return a registered source specifier. | |
292 | * @param index which specifier to return, from 0 to n-1, where | |
293 | * n = countAvailableSources() | |
294 | * @param result fill-in paramter to receive the source specifier. | |
295 | * If index is out of range, result will be empty. | |
296 | * @return reference to result | |
297 | */ | |
298 | UnicodeString& getAvailableSource(int32_t index, | |
374ca955 A |
299 | UnicodeString& result) const; |
300 | ||
b75a7d8f A |
301 | /** |
302 | * Return the number of registered target specifiers for a given | |
303 | * source specifier. | |
304 | * @param source the given source specifier. | |
305 | * @return the number of registered target specifiers for a given | |
306 | * source specifier. | |
307 | */ | |
374ca955 A |
308 | int32_t countAvailableTargets(const UnicodeString& source) const; |
309 | ||
b75a7d8f A |
310 | /** |
311 | * Return a registered target specifier for a given source. | |
312 | * @param index which specifier to return, from 0 to n-1, where | |
313 | * n = countAvailableTargets(source) | |
314 | * @param source the source specifier | |
315 | * @param result fill-in paramter to receive the target specifier. | |
316 | * If source is invalid or if index is out of range, result will | |
317 | * be empty. | |
318 | * @return reference to result | |
319 | */ | |
320 | UnicodeString& getAvailableTarget(int32_t index, | |
321 | const UnicodeString& source, | |
374ca955 A |
322 | UnicodeString& result) const; |
323 | ||
b75a7d8f A |
324 | /** |
325 | * Return the number of registered variant specifiers for a given | |
326 | * source-target pair. There is always at least one variant: If | |
327 | * just source-target is registered, then the single variant | |
328 | * NO_VARIANT is returned. If source-target/variant is registered | |
329 | * then that variant is returned. | |
330 | * @param source the source specifiers | |
331 | * @param target the target specifiers | |
332 | * @return the number of registered variant specifiers for a given | |
333 | * source-target pair. | |
334 | */ | |
335 | int32_t countAvailableVariants(const UnicodeString& source, | |
374ca955 A |
336 | const UnicodeString& target) const; |
337 | ||
b75a7d8f A |
338 | /** |
339 | * Return a registered variant specifier for a given source-target | |
340 | * pair. If NO_VARIANT is one of the variants, then it will be | |
341 | * at index 0. | |
342 | * @param index which specifier to return, from 0 to n-1, where | |
343 | * n = countAvailableVariants(source, target) | |
344 | * @param source the source specifier | |
345 | * @param target the target specifier | |
346 | * @param result fill-in paramter to receive the variant | |
347 | * specifier. If source is invalid or if target is invalid or if | |
348 | * index is out of range, result will be empty. | |
349 | * @return reference to result | |
350 | */ | |
351 | UnicodeString& getAvailableVariant(int32_t index, | |
352 | const UnicodeString& source, | |
353 | const UnicodeString& target, | |
374ca955 | 354 | UnicodeString& result) const; |
b75a7d8f A |
355 | |
356 | private: | |
357 | ||
358 | //---------------------------------------------------------------- | |
359 | // Private implementation | |
360 | //---------------------------------------------------------------- | |
361 | ||
729e4ab9 | 362 | TransliteratorEntry* find(const UnicodeString& ID); |
374ca955 | 363 | |
729e4ab9 | 364 | TransliteratorEntry* find(UnicodeString& source, |
b75a7d8f A |
365 | UnicodeString& target, |
366 | UnicodeString& variant); | |
367 | ||
729e4ab9 A |
368 | TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src, |
369 | const TransliteratorSpec& trg, | |
374ca955 | 370 | const UnicodeString& variant) const; |
b75a7d8f | 371 | |
729e4ab9 A |
372 | TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src, |
373 | const TransliteratorSpec& trg, | |
b75a7d8f A |
374 | const UnicodeString& variant); |
375 | ||
729e4ab9 A |
376 | static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen, |
377 | const TransliteratorSpec& specToFind, | |
b75a7d8f A |
378 | const UnicodeString& variant, |
379 | UTransDirection direction); | |
380 | ||
381 | void registerEntry(const UnicodeString& source, | |
382 | const UnicodeString& target, | |
383 | const UnicodeString& variant, | |
729e4ab9 | 384 | TransliteratorEntry* adopted, |
b75a7d8f A |
385 | UBool visible); |
386 | ||
387 | void registerEntry(const UnicodeString& ID, | |
729e4ab9 | 388 | TransliteratorEntry* adopted, |
b75a7d8f A |
389 | UBool visible); |
390 | ||
374ca955 | 391 | void registerEntry(const UnicodeString& ID, |
b75a7d8f A |
392 | const UnicodeString& source, |
393 | const UnicodeString& target, | |
394 | const UnicodeString& variant, | |
729e4ab9 | 395 | TransliteratorEntry* adopted, |
b75a7d8f A |
396 | UBool visible); |
397 | ||
398 | void registerSTV(const UnicodeString& source, | |
399 | const UnicodeString& target, | |
400 | const UnicodeString& variant); | |
401 | ||
402 | void removeSTV(const UnicodeString& source, | |
403 | const UnicodeString& target, | |
404 | const UnicodeString& variant); | |
405 | ||
406 | Transliterator* instantiateEntry(const UnicodeString& ID, | |
729e4ab9 | 407 | TransliteratorEntry *entry, |
b75a7d8f | 408 | TransliteratorAlias*& aliasReturn, |
b75a7d8f A |
409 | UErrorCode& status); |
410 | ||
374ca955 A |
411 | /** |
412 | * A StringEnumeration over the registered IDs in this object. | |
413 | */ | |
414 | class Enumeration : public StringEnumeration { | |
415 | public: | |
416 | Enumeration(const TransliteratorRegistry& reg); | |
417 | virtual ~Enumeration(); | |
418 | virtual int32_t count(UErrorCode& status) const; | |
419 | virtual const UnicodeString* snext(UErrorCode& status); | |
420 | virtual void reset(UErrorCode& status); | |
421 | static UClassID U_EXPORT2 getStaticClassID(); | |
422 | virtual UClassID getDynamicClassID() const; | |
423 | private: | |
424 | int32_t index; | |
425 | const TransliteratorRegistry& reg; | |
426 | }; | |
427 | friend class Enumeration; | |
428 | ||
b75a7d8f A |
429 | private: |
430 | ||
431 | /** | |
432 | * Dynamic registry mapping full IDs to Entry objects. This | |
433 | * contains both public and internal entities. The visibility is | |
434 | * controlled by whether an entry is listed in availableIDs and | |
435 | * specDAG or not. | |
436 | */ | |
437 | Hashtable registry; | |
374ca955 | 438 | |
b75a7d8f A |
439 | /** |
440 | * DAG of visible IDs by spec. Hashtable: source => (Hashtable: | |
441 | * target => (UVector: variant)) The UVector of variants is never | |
442 | * empty. For a source-target with no variant, the special | |
443 | * variant NO_VARIANT (the empty string) is stored in slot zero of | |
444 | * the UVector. | |
445 | */ | |
446 | Hashtable specDAG; | |
374ca955 | 447 | |
b75a7d8f A |
448 | /** |
449 | * Vector of public full IDs. | |
450 | */ | |
451 | UVector availableIDs; | |
452 | ||
453 | TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class | |
454 | TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class | |
455 | }; | |
456 | ||
457 | U_NAMESPACE_END | |
458 | ||
459 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ | |
460 | ||
461 | #endif | |
462 | //eof |