]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ********************************************************************** | |
73c04bcf | 3 | * Copyright (c) 2001-2006, International Business Machines |
b75a7d8f A |
4 | * Corporation and others. All Rights Reserved. |
5 | ********************************************************************** | |
6 | * Date Name Description | |
7 | * 08/10/2001 aliu Creation. | |
8 | ********************************************************************** | |
9 | */ | |
10 | #ifndef _TRANSREG_H | |
11 | #define _TRANSREG_H | |
12 | ||
13 | #include "unicode/utypes.h" | |
14 | ||
15 | #if !UCONFIG_NO_TRANSLITERATION | |
16 | ||
17 | #include "unicode/uobject.h" | |
18 | #include "unicode/translit.h" | |
19 | #include "hash.h" | |
20 | #include "uvector.h" | |
21 | ||
22 | U_NAMESPACE_BEGIN | |
23 | ||
24 | class Entry; | |
25 | class Spec; | |
26 | class UnicodeString; | |
27 | ||
28 | //------------------------------------------------------------------ | |
29 | // TransliteratorAlias | |
30 | //------------------------------------------------------------------ | |
31 | ||
32 | /** | |
33 | * A TransliteratorAlias object is returned by get() if the given ID | |
34 | * actually translates into something else. The caller then invokes | |
35 | * the create() method on the alias to create the actual | |
36 | * transliterator, and deletes the alias. | |
37 | * | |
38 | * Why all the shenanigans? To prevent circular calls between | |
39 | * the registry code and the transliterator code that deadlocks. | |
40 | */ | |
41 | class TransliteratorAlias : public UMemory { | |
42 | public: | |
43 | /** | |
374ca955 | 44 | * Construct a simple alias (type == SIMPLE) |
b75a7d8f A |
45 | * @param aliasID the given id. |
46 | */ | |
73c04bcf | 47 | TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter); |
374ca955 | 48 | |
b75a7d8f | 49 | /** |
374ca955 | 50 | * Construct a compound RBT alias (type == COMPOUND) |
b75a7d8f | 51 | */ |
73c04bcf A |
52 | TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks, |
53 | UVector* adoptedTransliterators, | |
b75a7d8f A |
54 | const UnicodeSet* compoundFilter); |
55 | ||
374ca955 A |
56 | /** |
57 | * Construct a rules alias (type = RULES) | |
58 | */ | |
59 | TransliteratorAlias(const UnicodeString& theID, | |
60 | const UnicodeString& rules, | |
61 | UTransDirection dir); | |
62 | ||
b75a7d8f | 63 | ~TransliteratorAlias(); |
374ca955 | 64 | |
b75a7d8f A |
65 | /** |
66 | * The whole point of create() is that the caller must invoke | |
67 | * it when the registry mutex is NOT held, to prevent deadlock. | |
68 | * It may only be called once. | |
374ca955 A |
69 | * |
70 | * Note: Only call create() if isRuleBased() returns FALSE. | |
71 | * | |
72 | * This method must be called *outside* of the TransliteratorRegistry | |
73 | * mutex. | |
b75a7d8f A |
74 | */ |
75 | Transliterator* create(UParseError&, UErrorCode&); | |
374ca955 A |
76 | |
77 | /** | |
78 | * Return TRUE if this alias is rule-based. If so, the caller | |
79 | * must call parse() on it, then call TransliteratorRegistry::reget(). | |
80 | */ | |
81 | UBool isRuleBased() const; | |
82 | ||
83 | /** | |
84 | * If isRuleBased() returns TRUE, then the caller must call this | |
85 | * method, followed by TransliteratorRegistry::reget(). The latter | |
86 | * method must be called inside the TransliteratorRegistry mutex. | |
87 | * | |
88 | * Note: Only call parse() if isRuleBased() returns TRUE. | |
89 | * | |
90 | * This method must be called *outside* of the TransliteratorRegistry | |
91 | * mutex, because it can instantiate Transliterators embedded in | |
92 | * the rules via the "&Latin-Arabic()" syntax. | |
93 | */ | |
94 | void parse(TransliteratorParser& parser, | |
95 | UParseError& pe, UErrorCode& ec) const; | |
96 | ||
b75a7d8f | 97 | private: |
374ca955 | 98 | // We actually come in three flavors: |
b75a7d8f A |
99 | // 1. Simple alias |
100 | // Here aliasID is the alias string. Everything else is | |
101 | // null, zero, empty. | |
102 | // 2. CompoundRBT | |
103 | // Here ID is the ID, aliasID is the idBlock, trans is the | |
104 | // contained RBT, and idSplitPoint is the offet in aliasID | |
105 | // where the contained RBT goes. compoundFilter is the | |
106 | // compound filter, and it is _not_ owned. | |
374ca955 A |
107 | // 3. Rules |
108 | // Here ID is the ID, aliasID is the rules string. | |
109 | // idSplitPoint is the UTransDirection. | |
b75a7d8f | 110 | UnicodeString ID; |
73c04bcf A |
111 | UnicodeString aliasesOrRules; |
112 | UVector* transes; // owned | |
b75a7d8f | 113 | const UnicodeSet* compoundFilter; // alias |
73c04bcf | 114 | UTransDirection direction; |
374ca955 | 115 | enum { SIMPLE, COMPOUND, RULES } type; |
b75a7d8f A |
116 | |
117 | TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class | |
118 | TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class | |
119 | }; | |
120 | ||
121 | ||
122 | /** | |
123 | * A registry of system transliterators. This is the data structure | |
124 | * that implements the mapping between transliterator IDs and the data | |
125 | * or function pointers used to create the corresponding | |
126 | * transliterators. There is one instance of the registry that is | |
127 | * created statically. | |
128 | * | |
129 | * The registry consists of a dynamic component -- a hashtable -- and | |
130 | * a static component -- locale resource bundles. The dynamic store | |
131 | * is semantically overlaid on the static store, so the static mapping | |
132 | * can be dynamically overridden. | |
133 | * | |
134 | * This is an internal class that is only used by Transliterator. | |
135 | * Transliterator maintains one static instance of this class and | |
136 | * delegates all registry-related operations to it. | |
137 | * | |
138 | * @author Alan Liu | |
139 | */ | |
140 | class TransliteratorRegistry : public UMemory { | |
141 | ||
142 | public: | |
143 | ||
144 | /** | |
145 | * Contructor | |
146 | * @param status Output param set to success/failure code. | |
147 | */ | |
148 | TransliteratorRegistry(UErrorCode& status); | |
149 | ||
150 | /** | |
151 | * Nonvirtual destructor -- this class is not subclassable. | |
152 | */ | |
153 | ~TransliteratorRegistry(); | |
154 | ||
155 | //------------------------------------------------------------------ | |
156 | // Basic public API | |
157 | //------------------------------------------------------------------ | |
158 | ||
159 | /** | |
160 | * Given a simple ID (forward direction, no inline filter, not | |
161 | * compound) attempt to instantiate it from the registry. Return | |
162 | * 0 on failure. | |
163 | * | |
164 | * Return a non-NULL aliasReturn value if the ID points to an alias. | |
165 | * We cannot instantiate it ourselves because the alias may contain | |
166 | * filters or compounds, which we do not understand. Caller should | |
167 | * make aliasReturn NULL before calling. | |
168 | * @param ID the given ID | |
374ca955 A |
169 | * @param aliasReturn output param to receive TransliteratorAlias; |
170 | * should be NULL on entry | |
171 | * @param parseError Struct to recieve information on position | |
b75a7d8f A |
172 | * of error if an error is encountered |
173 | * @param status Output param set to success/failure code. | |
174 | */ | |
175 | Transliterator* get(const UnicodeString& ID, | |
176 | TransliteratorAlias*& aliasReturn, | |
b75a7d8f A |
177 | UErrorCode& status); |
178 | ||
374ca955 A |
179 | /** |
180 | * The caller must call this after calling get(), if [a] calling get() | |
181 | * returns an alias, and [b] the alias is rule based. In that | |
182 | * situation the caller must call alias->parse() to do the parsing | |
183 | * OUTSIDE THE REGISTRY MUTEX, then call this method to retry | |
184 | * instantiating the transliterator. | |
185 | * | |
186 | * Note: Another alias might be returned by this method. | |
187 | * | |
188 | * This method (like all public methods of this class) must be called | |
189 | * from within the TransliteratorRegistry mutex. | |
190 | * | |
191 | * @param aliasReturn output param to receive TransliteratorAlias; | |
192 | * should be NULL on entry | |
193 | */ | |
194 | Transliterator* reget(const UnicodeString& ID, | |
195 | TransliteratorParser& parser, | |
196 | TransliteratorAlias*& aliasReturn, | |
197 | UErrorCode& status); | |
198 | ||
b75a7d8f A |
199 | /** |
200 | * Register a prototype (adopted). This adds an entry to the | |
201 | * dynamic store, or replaces an existing entry. Any entry in the | |
202 | * underlying static locale resource store is masked. | |
203 | */ | |
204 | void put(Transliterator* adoptedProto, | |
205 | UBool visible); | |
206 | ||
207 | /** | |
208 | * Register an ID and a factory function pointer. This adds an | |
209 | * entry to the dynamic store, or replaces an existing entry. Any | |
210 | * entry in the underlying static locale resource store is masked. | |
211 | */ | |
212 | void put(const UnicodeString& ID, | |
213 | Transliterator::Factory factory, | |
214 | Transliterator::Token context, | |
215 | UBool visible); | |
216 | ||
217 | /** | |
218 | * Register an ID and a resource name. This adds an entry to the | |
219 | * dynamic store, or replaces an existing entry. Any entry in the | |
220 | * underlying static locale resource store is masked. | |
221 | */ | |
222 | void put(const UnicodeString& ID, | |
223 | const UnicodeString& resourceName, | |
224 | UTransDirection dir, | |
73c04bcf | 225 | UBool readonlyResourceAlias, |
b75a7d8f A |
226 | UBool visible); |
227 | ||
228 | /** | |
229 | * Register an ID and an alias ID. This adds an entry to the | |
230 | * dynamic store, or replaces an existing entry. Any entry in the | |
231 | * underlying static locale resource store is masked. | |
232 | */ | |
233 | void put(const UnicodeString& ID, | |
234 | const UnicodeString& alias, | |
73c04bcf | 235 | UBool readonlyAliasAlias, |
b75a7d8f A |
236 | UBool visible); |
237 | ||
238 | /** | |
239 | * Unregister an ID. This removes an entry from the dynamic store | |
240 | * if there is one. The static locale resource store is | |
241 | * unaffected. | |
242 | * @param ID the given ID. | |
243 | */ | |
244 | void remove(const UnicodeString& ID); | |
245 | ||
246 | //------------------------------------------------------------------ | |
247 | // Public ID and spec management | |
248 | //------------------------------------------------------------------ | |
249 | ||
250 | /** | |
374ca955 A |
251 | * Return a StringEnumeration over the IDs currently registered |
252 | * with the system. | |
253 | * @internal | |
254 | */ | |
255 | StringEnumeration* getAvailableIDs() const; | |
256 | ||
257 | /** | |
258 | * == OBSOLETE - remove in ICU 3.4 == | |
b75a7d8f A |
259 | * Return the number of IDs currently registered with the system. |
260 | * To retrieve the actual IDs, call getAvailableID(i) with | |
261 | * i from 0 to countAvailableIDs() - 1. | |
262 | * @return the number of IDs currently registered with the system. | |
263 | * @internal | |
264 | */ | |
374ca955 | 265 | int32_t countAvailableIDs(void) const; |
b75a7d8f A |
266 | |
267 | /** | |
374ca955 | 268 | * == OBSOLETE - remove in ICU 3.4 == |
b75a7d8f A |
269 | * Return the index-th available ID. index must be between 0 |
270 | * and countAvailableIDs() - 1, inclusive. If index is out of | |
271 | * range, the result of getAvailableID(0) is returned. | |
272 | * @param index the given index. | |
273 | * @return the index-th available ID. index must be between 0 | |
274 | * and countAvailableIDs() - 1, inclusive. If index is out of | |
275 | * range, the result of getAvailableID(0) is returned. | |
276 | * @internal | |
277 | */ | |
374ca955 | 278 | const UnicodeString& getAvailableID(int32_t index) const; |
b75a7d8f A |
279 | |
280 | /** | |
281 | * Return the number of registered source specifiers. | |
282 | * @return the number of registered source specifiers. | |
283 | */ | |
374ca955 A |
284 | int32_t countAvailableSources(void) const; |
285 | ||
b75a7d8f A |
286 | /** |
287 | * Return a registered source specifier. | |
288 | * @param index which specifier to return, from 0 to n-1, where | |
289 | * n = countAvailableSources() | |
290 | * @param result fill-in paramter to receive the source specifier. | |
291 | * If index is out of range, result will be empty. | |
292 | * @return reference to result | |
293 | */ | |
294 | UnicodeString& getAvailableSource(int32_t index, | |
374ca955 A |
295 | UnicodeString& result) const; |
296 | ||
b75a7d8f A |
297 | /** |
298 | * Return the number of registered target specifiers for a given | |
299 | * source specifier. | |
300 | * @param source the given source specifier. | |
301 | * @return the number of registered target specifiers for a given | |
302 | * source specifier. | |
303 | */ | |
374ca955 A |
304 | int32_t countAvailableTargets(const UnicodeString& source) const; |
305 | ||
b75a7d8f A |
306 | /** |
307 | * Return a registered target specifier for a given source. | |
308 | * @param index which specifier to return, from 0 to n-1, where | |
309 | * n = countAvailableTargets(source) | |
310 | * @param source the source specifier | |
311 | * @param result fill-in paramter to receive the target specifier. | |
312 | * If source is invalid or if index is out of range, result will | |
313 | * be empty. | |
314 | * @return reference to result | |
315 | */ | |
316 | UnicodeString& getAvailableTarget(int32_t index, | |
317 | const UnicodeString& source, | |
374ca955 A |
318 | UnicodeString& result) const; |
319 | ||
b75a7d8f A |
320 | /** |
321 | * Return the number of registered variant specifiers for a given | |
322 | * source-target pair. There is always at least one variant: If | |
323 | * just source-target is registered, then the single variant | |
324 | * NO_VARIANT is returned. If source-target/variant is registered | |
325 | * then that variant is returned. | |
326 | * @param source the source specifiers | |
327 | * @param target the target specifiers | |
328 | * @return the number of registered variant specifiers for a given | |
329 | * source-target pair. | |
330 | */ | |
331 | int32_t countAvailableVariants(const UnicodeString& source, | |
374ca955 A |
332 | const UnicodeString& target) const; |
333 | ||
b75a7d8f A |
334 | /** |
335 | * Return a registered variant specifier for a given source-target | |
336 | * pair. If NO_VARIANT is one of the variants, then it will be | |
337 | * at index 0. | |
338 | * @param index which specifier to return, from 0 to n-1, where | |
339 | * n = countAvailableVariants(source, target) | |
340 | * @param source the source specifier | |
341 | * @param target the target specifier | |
342 | * @param result fill-in paramter to receive the variant | |
343 | * specifier. If source is invalid or if target is invalid or if | |
344 | * index is out of range, result will be empty. | |
345 | * @return reference to result | |
346 | */ | |
347 | UnicodeString& getAvailableVariant(int32_t index, | |
348 | const UnicodeString& source, | |
349 | const UnicodeString& target, | |
374ca955 | 350 | UnicodeString& result) const; |
b75a7d8f A |
351 | |
352 | private: | |
353 | ||
354 | //---------------------------------------------------------------- | |
355 | // Private implementation | |
356 | //---------------------------------------------------------------- | |
357 | ||
358 | Entry* find(const UnicodeString& ID); | |
374ca955 | 359 | |
b75a7d8f A |
360 | Entry* find(UnicodeString& source, |
361 | UnicodeString& target, | |
362 | UnicodeString& variant); | |
363 | ||
364 | Entry* findInDynamicStore(const Spec& src, | |
365 | const Spec& trg, | |
374ca955 | 366 | const UnicodeString& variant) const; |
b75a7d8f A |
367 | |
368 | Entry* findInStaticStore(const Spec& src, | |
369 | const Spec& trg, | |
370 | const UnicodeString& variant); | |
371 | ||
372 | static Entry* findInBundle(const Spec& specToOpen, | |
373 | const Spec& specToFind, | |
374 | const UnicodeString& variant, | |
375 | UTransDirection direction); | |
376 | ||
377 | void registerEntry(const UnicodeString& source, | |
378 | const UnicodeString& target, | |
379 | const UnicodeString& variant, | |
380 | Entry* adopted, | |
381 | UBool visible); | |
382 | ||
383 | void registerEntry(const UnicodeString& ID, | |
384 | Entry* adopted, | |
385 | UBool visible); | |
386 | ||
374ca955 | 387 | void registerEntry(const UnicodeString& ID, |
b75a7d8f A |
388 | const UnicodeString& source, |
389 | const UnicodeString& target, | |
390 | const UnicodeString& variant, | |
391 | Entry* adopted, | |
392 | UBool visible); | |
393 | ||
394 | void registerSTV(const UnicodeString& source, | |
395 | const UnicodeString& target, | |
396 | const UnicodeString& variant); | |
397 | ||
398 | void removeSTV(const UnicodeString& source, | |
399 | const UnicodeString& target, | |
400 | const UnicodeString& variant); | |
401 | ||
402 | Transliterator* instantiateEntry(const UnicodeString& ID, | |
403 | Entry *entry, | |
404 | TransliteratorAlias*& aliasReturn, | |
b75a7d8f A |
405 | UErrorCode& status); |
406 | ||
374ca955 A |
407 | /** |
408 | * A StringEnumeration over the registered IDs in this object. | |
409 | */ | |
410 | class Enumeration : public StringEnumeration { | |
411 | public: | |
412 | Enumeration(const TransliteratorRegistry& reg); | |
413 | virtual ~Enumeration(); | |
414 | virtual int32_t count(UErrorCode& status) const; | |
415 | virtual const UnicodeString* snext(UErrorCode& status); | |
416 | virtual void reset(UErrorCode& status); | |
417 | static UClassID U_EXPORT2 getStaticClassID(); | |
418 | virtual UClassID getDynamicClassID() const; | |
419 | private: | |
420 | int32_t index; | |
421 | const TransliteratorRegistry& reg; | |
422 | }; | |
423 | friend class Enumeration; | |
424 | ||
b75a7d8f A |
425 | private: |
426 | ||
427 | /** | |
428 | * Dynamic registry mapping full IDs to Entry objects. This | |
429 | * contains both public and internal entities. The visibility is | |
430 | * controlled by whether an entry is listed in availableIDs and | |
431 | * specDAG or not. | |
432 | */ | |
433 | Hashtable registry; | |
374ca955 | 434 | |
b75a7d8f A |
435 | /** |
436 | * DAG of visible IDs by spec. Hashtable: source => (Hashtable: | |
437 | * target => (UVector: variant)) The UVector of variants is never | |
438 | * empty. For a source-target with no variant, the special | |
439 | * variant NO_VARIANT (the empty string) is stored in slot zero of | |
440 | * the UVector. | |
441 | */ | |
442 | Hashtable specDAG; | |
374ca955 | 443 | |
b75a7d8f A |
444 | /** |
445 | * Vector of public full IDs. | |
446 | */ | |
447 | UVector availableIDs; | |
448 | ||
449 | TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class | |
450 | TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class | |
451 | }; | |
452 | ||
453 | U_NAMESPACE_END | |
454 | ||
455 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ | |
456 | ||
457 | #endif | |
458 | //eof |