]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ********************************************************************** | |
374ca955 | 3 | * Copyright (c) 2001-2004, International Business Machines |
b75a7d8f A |
4 | * Corporation and others. All Rights Reserved. |
5 | ********************************************************************** | |
6 | * Date Name Description | |
7 | * 08/10/2001 aliu Creation. | |
8 | ********************************************************************** | |
9 | */ | |
10 | #ifndef _TRANSREG_H | |
11 | #define _TRANSREG_H | |
12 | ||
13 | #include "unicode/utypes.h" | |
14 | ||
15 | #if !UCONFIG_NO_TRANSLITERATION | |
16 | ||
17 | #include "unicode/uobject.h" | |
18 | #include "unicode/translit.h" | |
19 | #include "hash.h" | |
20 | #include "uvector.h" | |
21 | ||
22 | U_NAMESPACE_BEGIN | |
23 | ||
24 | class Entry; | |
25 | class Spec; | |
26 | class UnicodeString; | |
27 | ||
28 | //------------------------------------------------------------------ | |
29 | // TransliteratorAlias | |
30 | //------------------------------------------------------------------ | |
31 | ||
32 | /** | |
33 | * A TransliteratorAlias object is returned by get() if the given ID | |
34 | * actually translates into something else. The caller then invokes | |
35 | * the create() method on the alias to create the actual | |
36 | * transliterator, and deletes the alias. | |
37 | * | |
38 | * Why all the shenanigans? To prevent circular calls between | |
39 | * the registry code and the transliterator code that deadlocks. | |
40 | */ | |
41 | class TransliteratorAlias : public UMemory { | |
42 | public: | |
43 | /** | |
374ca955 | 44 | * Construct a simple alias (type == SIMPLE) |
b75a7d8f A |
45 | * @param aliasID the given id. |
46 | */ | |
47 | TransliteratorAlias(const UnicodeString& aliasID); | |
374ca955 | 48 | |
b75a7d8f | 49 | /** |
374ca955 | 50 | * Construct a compound RBT alias (type == COMPOUND) |
b75a7d8f A |
51 | */ |
52 | TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlock, | |
53 | Transliterator* adopted, int32_t idSplitPoint, | |
54 | const UnicodeSet* compoundFilter); | |
55 | ||
374ca955 A |
56 | /** |
57 | * Construct a rules alias (type = RULES) | |
58 | */ | |
59 | TransliteratorAlias(const UnicodeString& theID, | |
60 | const UnicodeString& rules, | |
61 | UTransDirection dir); | |
62 | ||
b75a7d8f | 63 | ~TransliteratorAlias(); |
374ca955 | 64 | |
b75a7d8f A |
65 | /** |
66 | * The whole point of create() is that the caller must invoke | |
67 | * it when the registry mutex is NOT held, to prevent deadlock. | |
68 | * It may only be called once. | |
374ca955 A |
69 | * |
70 | * Note: Only call create() if isRuleBased() returns FALSE. | |
71 | * | |
72 | * This method must be called *outside* of the TransliteratorRegistry | |
73 | * mutex. | |
b75a7d8f A |
74 | */ |
75 | Transliterator* create(UParseError&, UErrorCode&); | |
374ca955 A |
76 | |
77 | /** | |
78 | * Return TRUE if this alias is rule-based. If so, the caller | |
79 | * must call parse() on it, then call TransliteratorRegistry::reget(). | |
80 | */ | |
81 | UBool isRuleBased() const; | |
82 | ||
83 | /** | |
84 | * If isRuleBased() returns TRUE, then the caller must call this | |
85 | * method, followed by TransliteratorRegistry::reget(). The latter | |
86 | * method must be called inside the TransliteratorRegistry mutex. | |
87 | * | |
88 | * Note: Only call parse() if isRuleBased() returns TRUE. | |
89 | * | |
90 | * This method must be called *outside* of the TransliteratorRegistry | |
91 | * mutex, because it can instantiate Transliterators embedded in | |
92 | * the rules via the "&Latin-Arabic()" syntax. | |
93 | */ | |
94 | void parse(TransliteratorParser& parser, | |
95 | UParseError& pe, UErrorCode& ec) const; | |
96 | ||
b75a7d8f | 97 | private: |
374ca955 | 98 | // We actually come in three flavors: |
b75a7d8f A |
99 | // 1. Simple alias |
100 | // Here aliasID is the alias string. Everything else is | |
101 | // null, zero, empty. | |
102 | // 2. CompoundRBT | |
103 | // Here ID is the ID, aliasID is the idBlock, trans is the | |
104 | // contained RBT, and idSplitPoint is the offet in aliasID | |
105 | // where the contained RBT goes. compoundFilter is the | |
106 | // compound filter, and it is _not_ owned. | |
374ca955 A |
107 | // 3. Rules |
108 | // Here ID is the ID, aliasID is the rules string. | |
109 | // idSplitPoint is the UTransDirection. | |
b75a7d8f | 110 | UnicodeString ID; |
374ca955 | 111 | UnicodeString aliasID; // rename! holds rules for RULES type |
b75a7d8f A |
112 | Transliterator* trans; // owned |
113 | const UnicodeSet* compoundFilter; // alias | |
374ca955 A |
114 | int32_t idSplitPoint; // rename! holds UTransDirection for RULES type |
115 | enum { SIMPLE, COMPOUND, RULES } type; | |
b75a7d8f A |
116 | |
117 | TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class | |
118 | TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class | |
119 | }; | |
120 | ||
121 | ||
122 | /** | |
123 | * A registry of system transliterators. This is the data structure | |
124 | * that implements the mapping between transliterator IDs and the data | |
125 | * or function pointers used to create the corresponding | |
126 | * transliterators. There is one instance of the registry that is | |
127 | * created statically. | |
128 | * | |
129 | * The registry consists of a dynamic component -- a hashtable -- and | |
130 | * a static component -- locale resource bundles. The dynamic store | |
131 | * is semantically overlaid on the static store, so the static mapping | |
132 | * can be dynamically overridden. | |
133 | * | |
134 | * This is an internal class that is only used by Transliterator. | |
135 | * Transliterator maintains one static instance of this class and | |
136 | * delegates all registry-related operations to it. | |
137 | * | |
138 | * @author Alan Liu | |
139 | */ | |
140 | class TransliteratorRegistry : public UMemory { | |
141 | ||
142 | public: | |
143 | ||
144 | /** | |
145 | * Contructor | |
146 | * @param status Output param set to success/failure code. | |
147 | */ | |
148 | TransliteratorRegistry(UErrorCode& status); | |
149 | ||
150 | /** | |
151 | * Nonvirtual destructor -- this class is not subclassable. | |
152 | */ | |
153 | ~TransliteratorRegistry(); | |
154 | ||
155 | //------------------------------------------------------------------ | |
156 | // Basic public API | |
157 | //------------------------------------------------------------------ | |
158 | ||
159 | /** | |
160 | * Given a simple ID (forward direction, no inline filter, not | |
161 | * compound) attempt to instantiate it from the registry. Return | |
162 | * 0 on failure. | |
163 | * | |
164 | * Return a non-NULL aliasReturn value if the ID points to an alias. | |
165 | * We cannot instantiate it ourselves because the alias may contain | |
166 | * filters or compounds, which we do not understand. Caller should | |
167 | * make aliasReturn NULL before calling. | |
168 | * @param ID the given ID | |
374ca955 A |
169 | * @param aliasReturn output param to receive TransliteratorAlias; |
170 | * should be NULL on entry | |
171 | * @param parseError Struct to recieve information on position | |
b75a7d8f A |
172 | * of error if an error is encountered |
173 | * @param status Output param set to success/failure code. | |
174 | */ | |
175 | Transliterator* get(const UnicodeString& ID, | |
176 | TransliteratorAlias*& aliasReturn, | |
b75a7d8f A |
177 | UErrorCode& status); |
178 | ||
374ca955 A |
179 | /** |
180 | * The caller must call this after calling get(), if [a] calling get() | |
181 | * returns an alias, and [b] the alias is rule based. In that | |
182 | * situation the caller must call alias->parse() to do the parsing | |
183 | * OUTSIDE THE REGISTRY MUTEX, then call this method to retry | |
184 | * instantiating the transliterator. | |
185 | * | |
186 | * Note: Another alias might be returned by this method. | |
187 | * | |
188 | * This method (like all public methods of this class) must be called | |
189 | * from within the TransliteratorRegistry mutex. | |
190 | * | |
191 | * @param aliasReturn output param to receive TransliteratorAlias; | |
192 | * should be NULL on entry | |
193 | */ | |
194 | Transliterator* reget(const UnicodeString& ID, | |
195 | TransliteratorParser& parser, | |
196 | TransliteratorAlias*& aliasReturn, | |
197 | UErrorCode& status); | |
198 | ||
b75a7d8f A |
199 | /** |
200 | * Register a prototype (adopted). This adds an entry to the | |
201 | * dynamic store, or replaces an existing entry. Any entry in the | |
202 | * underlying static locale resource store is masked. | |
203 | */ | |
204 | void put(Transliterator* adoptedProto, | |
205 | UBool visible); | |
206 | ||
207 | /** | |
208 | * Register an ID and a factory function pointer. This adds an | |
209 | * entry to the dynamic store, or replaces an existing entry. Any | |
210 | * entry in the underlying static locale resource store is masked. | |
211 | */ | |
212 | void put(const UnicodeString& ID, | |
213 | Transliterator::Factory factory, | |
214 | Transliterator::Token context, | |
215 | UBool visible); | |
216 | ||
217 | /** | |
218 | * Register an ID and a resource name. This adds an entry to the | |
219 | * dynamic store, or replaces an existing entry. Any entry in the | |
220 | * underlying static locale resource store is masked. | |
221 | */ | |
222 | void put(const UnicodeString& ID, | |
223 | const UnicodeString& resourceName, | |
224 | UTransDirection dir, | |
225 | UBool visible); | |
226 | ||
227 | /** | |
228 | * Register an ID and an alias ID. This adds an entry to the | |
229 | * dynamic store, or replaces an existing entry. Any entry in the | |
230 | * underlying static locale resource store is masked. | |
231 | */ | |
232 | void put(const UnicodeString& ID, | |
233 | const UnicodeString& alias, | |
234 | UBool visible); | |
235 | ||
236 | /** | |
237 | * Unregister an ID. This removes an entry from the dynamic store | |
238 | * if there is one. The static locale resource store is | |
239 | * unaffected. | |
240 | * @param ID the given ID. | |
241 | */ | |
242 | void remove(const UnicodeString& ID); | |
243 | ||
244 | //------------------------------------------------------------------ | |
245 | // Public ID and spec management | |
246 | //------------------------------------------------------------------ | |
247 | ||
248 | /** | |
374ca955 A |
249 | * Return a StringEnumeration over the IDs currently registered |
250 | * with the system. | |
251 | * @internal | |
252 | */ | |
253 | StringEnumeration* getAvailableIDs() const; | |
254 | ||
255 | /** | |
256 | * == OBSOLETE - remove in ICU 3.4 == | |
b75a7d8f A |
257 | * Return the number of IDs currently registered with the system. |
258 | * To retrieve the actual IDs, call getAvailableID(i) with | |
259 | * i from 0 to countAvailableIDs() - 1. | |
260 | * @return the number of IDs currently registered with the system. | |
261 | * @internal | |
262 | */ | |
374ca955 | 263 | int32_t countAvailableIDs(void) const; |
b75a7d8f A |
264 | |
265 | /** | |
374ca955 | 266 | * == OBSOLETE - remove in ICU 3.4 == |
b75a7d8f A |
267 | * Return the index-th available ID. index must be between 0 |
268 | * and countAvailableIDs() - 1, inclusive. If index is out of | |
269 | * range, the result of getAvailableID(0) is returned. | |
270 | * @param index the given index. | |
271 | * @return the index-th available ID. index must be between 0 | |
272 | * and countAvailableIDs() - 1, inclusive. If index is out of | |
273 | * range, the result of getAvailableID(0) is returned. | |
274 | * @internal | |
275 | */ | |
374ca955 | 276 | const UnicodeString& getAvailableID(int32_t index) const; |
b75a7d8f A |
277 | |
278 | /** | |
279 | * Return the number of registered source specifiers. | |
280 | * @return the number of registered source specifiers. | |
281 | */ | |
374ca955 A |
282 | int32_t countAvailableSources(void) const; |
283 | ||
b75a7d8f A |
284 | /** |
285 | * Return a registered source specifier. | |
286 | * @param index which specifier to return, from 0 to n-1, where | |
287 | * n = countAvailableSources() | |
288 | * @param result fill-in paramter to receive the source specifier. | |
289 | * If index is out of range, result will be empty. | |
290 | * @return reference to result | |
291 | */ | |
292 | UnicodeString& getAvailableSource(int32_t index, | |
374ca955 A |
293 | UnicodeString& result) const; |
294 | ||
b75a7d8f A |
295 | /** |
296 | * Return the number of registered target specifiers for a given | |
297 | * source specifier. | |
298 | * @param source the given source specifier. | |
299 | * @return the number of registered target specifiers for a given | |
300 | * source specifier. | |
301 | */ | |
374ca955 A |
302 | int32_t countAvailableTargets(const UnicodeString& source) const; |
303 | ||
b75a7d8f A |
304 | /** |
305 | * Return a registered target specifier for a given source. | |
306 | * @param index which specifier to return, from 0 to n-1, where | |
307 | * n = countAvailableTargets(source) | |
308 | * @param source the source specifier | |
309 | * @param result fill-in paramter to receive the target specifier. | |
310 | * If source is invalid or if index is out of range, result will | |
311 | * be empty. | |
312 | * @return reference to result | |
313 | */ | |
314 | UnicodeString& getAvailableTarget(int32_t index, | |
315 | const UnicodeString& source, | |
374ca955 A |
316 | UnicodeString& result) const; |
317 | ||
b75a7d8f A |
318 | /** |
319 | * Return the number of registered variant specifiers for a given | |
320 | * source-target pair. There is always at least one variant: If | |
321 | * just source-target is registered, then the single variant | |
322 | * NO_VARIANT is returned. If source-target/variant is registered | |
323 | * then that variant is returned. | |
324 | * @param source the source specifiers | |
325 | * @param target the target specifiers | |
326 | * @return the number of registered variant specifiers for a given | |
327 | * source-target pair. | |
328 | */ | |
329 | int32_t countAvailableVariants(const UnicodeString& source, | |
374ca955 A |
330 | const UnicodeString& target) const; |
331 | ||
b75a7d8f A |
332 | /** |
333 | * Return a registered variant specifier for a given source-target | |
334 | * pair. If NO_VARIANT is one of the variants, then it will be | |
335 | * at index 0. | |
336 | * @param index which specifier to return, from 0 to n-1, where | |
337 | * n = countAvailableVariants(source, target) | |
338 | * @param source the source specifier | |
339 | * @param target the target specifier | |
340 | * @param result fill-in paramter to receive the variant | |
341 | * specifier. If source is invalid or if target is invalid or if | |
342 | * index is out of range, result will be empty. | |
343 | * @return reference to result | |
344 | */ | |
345 | UnicodeString& getAvailableVariant(int32_t index, | |
346 | const UnicodeString& source, | |
347 | const UnicodeString& target, | |
374ca955 | 348 | UnicodeString& result) const; |
b75a7d8f A |
349 | |
350 | private: | |
351 | ||
352 | //---------------------------------------------------------------- | |
353 | // Private implementation | |
354 | //---------------------------------------------------------------- | |
355 | ||
356 | Entry* find(const UnicodeString& ID); | |
374ca955 | 357 | |
b75a7d8f A |
358 | Entry* find(UnicodeString& source, |
359 | UnicodeString& target, | |
360 | UnicodeString& variant); | |
361 | ||
362 | Entry* findInDynamicStore(const Spec& src, | |
363 | const Spec& trg, | |
374ca955 | 364 | const UnicodeString& variant) const; |
b75a7d8f A |
365 | |
366 | Entry* findInStaticStore(const Spec& src, | |
367 | const Spec& trg, | |
368 | const UnicodeString& variant); | |
369 | ||
370 | static Entry* findInBundle(const Spec& specToOpen, | |
371 | const Spec& specToFind, | |
372 | const UnicodeString& variant, | |
373 | UTransDirection direction); | |
374 | ||
375 | void registerEntry(const UnicodeString& source, | |
376 | const UnicodeString& target, | |
377 | const UnicodeString& variant, | |
378 | Entry* adopted, | |
379 | UBool visible); | |
380 | ||
381 | void registerEntry(const UnicodeString& ID, | |
382 | Entry* adopted, | |
383 | UBool visible); | |
384 | ||
374ca955 | 385 | void registerEntry(const UnicodeString& ID, |
b75a7d8f A |
386 | const UnicodeString& source, |
387 | const UnicodeString& target, | |
388 | const UnicodeString& variant, | |
389 | Entry* adopted, | |
390 | UBool visible); | |
391 | ||
392 | void registerSTV(const UnicodeString& source, | |
393 | const UnicodeString& target, | |
394 | const UnicodeString& variant); | |
395 | ||
396 | void removeSTV(const UnicodeString& source, | |
397 | const UnicodeString& target, | |
398 | const UnicodeString& variant); | |
399 | ||
400 | Transliterator* instantiateEntry(const UnicodeString& ID, | |
401 | Entry *entry, | |
402 | TransliteratorAlias*& aliasReturn, | |
b75a7d8f A |
403 | UErrorCode& status); |
404 | ||
374ca955 A |
405 | /** |
406 | * A StringEnumeration over the registered IDs in this object. | |
407 | */ | |
408 | class Enumeration : public StringEnumeration { | |
409 | public: | |
410 | Enumeration(const TransliteratorRegistry& reg); | |
411 | virtual ~Enumeration(); | |
412 | virtual int32_t count(UErrorCode& status) const; | |
413 | virtual const UnicodeString* snext(UErrorCode& status); | |
414 | virtual void reset(UErrorCode& status); | |
415 | static UClassID U_EXPORT2 getStaticClassID(); | |
416 | virtual UClassID getDynamicClassID() const; | |
417 | private: | |
418 | int32_t index; | |
419 | const TransliteratorRegistry& reg; | |
420 | }; | |
421 | friend class Enumeration; | |
422 | ||
b75a7d8f A |
423 | private: |
424 | ||
425 | /** | |
426 | * Dynamic registry mapping full IDs to Entry objects. This | |
427 | * contains both public and internal entities. The visibility is | |
428 | * controlled by whether an entry is listed in availableIDs and | |
429 | * specDAG or not. | |
430 | */ | |
431 | Hashtable registry; | |
374ca955 | 432 | |
b75a7d8f A |
433 | /** |
434 | * DAG of visible IDs by spec. Hashtable: source => (Hashtable: | |
435 | * target => (UVector: variant)) The UVector of variants is never | |
436 | * empty. For a source-target with no variant, the special | |
437 | * variant NO_VARIANT (the empty string) is stored in slot zero of | |
438 | * the UVector. | |
439 | */ | |
440 | Hashtable specDAG; | |
374ca955 | 441 | |
b75a7d8f A |
442 | /** |
443 | * Vector of public full IDs. | |
444 | */ | |
445 | UVector availableIDs; | |
446 | ||
447 | TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class | |
448 | TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class | |
449 | }; | |
450 | ||
451 | U_NAMESPACE_END | |
452 | ||
453 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ | |
454 | ||
455 | #endif | |
456 | //eof |