]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | /* |
4 | ********************************************************************** | |
2ca993e8 | 5 | * Copyright (c) 2001-2016 International Business Machines |
b75a7d8f A |
6 | * Corporation and others. All Rights Reserved. |
7 | ********************************************************************** | |
8 | * Date Name Description | |
9 | * 08/10/2001 aliu Creation. | |
10 | ********************************************************************** | |
11 | */ | |
12 | #ifndef _TRANSREG_H | |
13 | #define _TRANSREG_H | |
14 | ||
15 | #include "unicode/utypes.h" | |
16 | ||
17 | #if !UCONFIG_NO_TRANSLITERATION | |
18 | ||
19 | #include "unicode/uobject.h" | |
20 | #include "unicode/translit.h" | |
21 | #include "hash.h" | |
22 | #include "uvector.h" | |
23 | ||
24 | U_NAMESPACE_BEGIN | |
25 | ||
729e4ab9 A |
26 | class TransliteratorEntry; |
27 | class TransliteratorSpec; | |
b75a7d8f A |
28 | class UnicodeString; |
29 | ||
30 | //------------------------------------------------------------------ | |
31 | // TransliteratorAlias | |
32 | //------------------------------------------------------------------ | |
33 | ||
34 | /** | |
35 | * A TransliteratorAlias object is returned by get() if the given ID | |
36 | * actually translates into something else. The caller then invokes | |
37 | * the create() method on the alias to create the actual | |
38 | * transliterator, and deletes the alias. | |
39 | * | |
40 | * Why all the shenanigans? To prevent circular calls between | |
41 | * the registry code and the transliterator code that deadlocks. | |
42 | */ | |
43 | class TransliteratorAlias : public UMemory { | |
44 | public: | |
45 | /** | |
374ca955 | 46 | * Construct a simple alias (type == SIMPLE) |
b75a7d8f A |
47 | * @param aliasID the given id. |
48 | */ | |
73c04bcf | 49 | TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter); |
374ca955 | 50 | |
b75a7d8f | 51 | /** |
374ca955 | 52 | * Construct a compound RBT alias (type == COMPOUND) |
b75a7d8f | 53 | */ |
73c04bcf A |
54 | TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks, |
55 | UVector* adoptedTransliterators, | |
b75a7d8f A |
56 | const UnicodeSet* compoundFilter); |
57 | ||
374ca955 A |
58 | /** |
59 | * Construct a rules alias (type = RULES) | |
60 | */ | |
61 | TransliteratorAlias(const UnicodeString& theID, | |
62 | const UnicodeString& rules, | |
63 | UTransDirection dir); | |
64 | ||
b75a7d8f | 65 | ~TransliteratorAlias(); |
374ca955 | 66 | |
b75a7d8f A |
67 | /** |
68 | * The whole point of create() is that the caller must invoke | |
69 | * it when the registry mutex is NOT held, to prevent deadlock. | |
70 | * It may only be called once. | |
374ca955 A |
71 | * |
72 | * Note: Only call create() if isRuleBased() returns FALSE. | |
73 | * | |
74 | * This method must be called *outside* of the TransliteratorRegistry | |
75 | * mutex. | |
b75a7d8f A |
76 | */ |
77 | Transliterator* create(UParseError&, UErrorCode&); | |
374ca955 A |
78 | |
79 | /** | |
80 | * Return TRUE if this alias is rule-based. If so, the caller | |
81 | * must call parse() on it, then call TransliteratorRegistry::reget(). | |
82 | */ | |
83 | UBool isRuleBased() const; | |
84 | ||
85 | /** | |
86 | * If isRuleBased() returns TRUE, then the caller must call this | |
87 | * method, followed by TransliteratorRegistry::reget(). The latter | |
88 | * method must be called inside the TransliteratorRegistry mutex. | |
89 | * | |
90 | * Note: Only call parse() if isRuleBased() returns TRUE. | |
91 | * | |
92 | * This method must be called *outside* of the TransliteratorRegistry | |
93 | * mutex, because it can instantiate Transliterators embedded in | |
94 | * the rules via the "&Latin-Arabic()" syntax. | |
95 | */ | |
96 | void parse(TransliteratorParser& parser, | |
97 | UParseError& pe, UErrorCode& ec) const; | |
98 | ||
b75a7d8f | 99 | private: |
374ca955 | 100 | // We actually come in three flavors: |
b75a7d8f A |
101 | // 1. Simple alias |
102 | // Here aliasID is the alias string. Everything else is | |
103 | // null, zero, empty. | |
104 | // 2. CompoundRBT | |
105 | // Here ID is the ID, aliasID is the idBlock, trans is the | |
106 | // contained RBT, and idSplitPoint is the offet in aliasID | |
107 | // where the contained RBT goes. compoundFilter is the | |
108 | // compound filter, and it is _not_ owned. | |
374ca955 A |
109 | // 3. Rules |
110 | // Here ID is the ID, aliasID is the rules string. | |
111 | // idSplitPoint is the UTransDirection. | |
b75a7d8f | 112 | UnicodeString ID; |
73c04bcf A |
113 | UnicodeString aliasesOrRules; |
114 | UVector* transes; // owned | |
b75a7d8f | 115 | const UnicodeSet* compoundFilter; // alias |
73c04bcf | 116 | UTransDirection direction; |
374ca955 | 117 | enum { SIMPLE, COMPOUND, RULES } type; |
b75a7d8f A |
118 | |
119 | TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class | |
120 | TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class | |
121 | }; | |
122 | ||
123 | ||
124 | /** | |
125 | * A registry of system transliterators. This is the data structure | |
126 | * that implements the mapping between transliterator IDs and the data | |
127 | * or function pointers used to create the corresponding | |
128 | * transliterators. There is one instance of the registry that is | |
129 | * created statically. | |
130 | * | |
131 | * The registry consists of a dynamic component -- a hashtable -- and | |
132 | * a static component -- locale resource bundles. The dynamic store | |
133 | * is semantically overlaid on the static store, so the static mapping | |
134 | * can be dynamically overridden. | |
135 | * | |
136 | * This is an internal class that is only used by Transliterator. | |
137 | * Transliterator maintains one static instance of this class and | |
138 | * delegates all registry-related operations to it. | |
139 | * | |
140 | * @author Alan Liu | |
141 | */ | |
142 | class TransliteratorRegistry : public UMemory { | |
143 | ||
144 | public: | |
145 | ||
146 | /** | |
147 | * Contructor | |
148 | * @param status Output param set to success/failure code. | |
149 | */ | |
150 | TransliteratorRegistry(UErrorCode& status); | |
151 | ||
152 | /** | |
153 | * Nonvirtual destructor -- this class is not subclassable. | |
154 | */ | |
155 | ~TransliteratorRegistry(); | |
156 | ||
157 | //------------------------------------------------------------------ | |
158 | // Basic public API | |
159 | //------------------------------------------------------------------ | |
160 | ||
161 | /** | |
162 | * Given a simple ID (forward direction, no inline filter, not | |
163 | * compound) attempt to instantiate it from the registry. Return | |
164 | * 0 on failure. | |
165 | * | |
166 | * Return a non-NULL aliasReturn value if the ID points to an alias. | |
167 | * We cannot instantiate it ourselves because the alias may contain | |
168 | * filters or compounds, which we do not understand. Caller should | |
169 | * make aliasReturn NULL before calling. | |
170 | * @param ID the given ID | |
374ca955 A |
171 | * @param aliasReturn output param to receive TransliteratorAlias; |
172 | * should be NULL on entry | |
173 | * @param parseError Struct to recieve information on position | |
b75a7d8f A |
174 | * of error if an error is encountered |
175 | * @param status Output param set to success/failure code. | |
176 | */ | |
177 | Transliterator* get(const UnicodeString& ID, | |
178 | TransliteratorAlias*& aliasReturn, | |
b75a7d8f A |
179 | UErrorCode& status); |
180 | ||
374ca955 A |
181 | /** |
182 | * The caller must call this after calling get(), if [a] calling get() | |
183 | * returns an alias, and [b] the alias is rule based. In that | |
184 | * situation the caller must call alias->parse() to do the parsing | |
185 | * OUTSIDE THE REGISTRY MUTEX, then call this method to retry | |
186 | * instantiating the transliterator. | |
187 | * | |
188 | * Note: Another alias might be returned by this method. | |
189 | * | |
190 | * This method (like all public methods of this class) must be called | |
191 | * from within the TransliteratorRegistry mutex. | |
192 | * | |
193 | * @param aliasReturn output param to receive TransliteratorAlias; | |
194 | * should be NULL on entry | |
195 | */ | |
196 | Transliterator* reget(const UnicodeString& ID, | |
197 | TransliteratorParser& parser, | |
198 | TransliteratorAlias*& aliasReturn, | |
199 | UErrorCode& status); | |
200 | ||
b75a7d8f A |
201 | /** |
202 | * Register a prototype (adopted). This adds an entry to the | |
203 | * dynamic store, or replaces an existing entry. Any entry in the | |
204 | * underlying static locale resource store is masked. | |
205 | */ | |
206 | void put(Transliterator* adoptedProto, | |
46f4442e A |
207 | UBool visible, |
208 | UErrorCode& ec); | |
b75a7d8f A |
209 | |
210 | /** | |
211 | * Register an ID and a factory function pointer. This adds an | |
212 | * entry to the dynamic store, or replaces an existing entry. Any | |
213 | * entry in the underlying static locale resource store is masked. | |
214 | */ | |
215 | void put(const UnicodeString& ID, | |
216 | Transliterator::Factory factory, | |
217 | Transliterator::Token context, | |
46f4442e A |
218 | UBool visible, |
219 | UErrorCode& ec); | |
b75a7d8f A |
220 | |
221 | /** | |
222 | * Register an ID and a resource name. This adds an entry to the | |
223 | * dynamic store, or replaces an existing entry. Any entry in the | |
224 | * underlying static locale resource store is masked. | |
225 | */ | |
226 | void put(const UnicodeString& ID, | |
227 | const UnicodeString& resourceName, | |
228 | UTransDirection dir, | |
73c04bcf | 229 | UBool readonlyResourceAlias, |
46f4442e A |
230 | UBool visible, |
231 | UErrorCode& ec); | |
b75a7d8f A |
232 | |
233 | /** | |
234 | * Register an ID and an alias ID. This adds an entry to the | |
235 | * dynamic store, or replaces an existing entry. Any entry in the | |
236 | * underlying static locale resource store is masked. | |
237 | */ | |
238 | void put(const UnicodeString& ID, | |
239 | const UnicodeString& alias, | |
73c04bcf | 240 | UBool readonlyAliasAlias, |
46f4442e A |
241 | UBool visible, |
242 | UErrorCode& ec); | |
b75a7d8f A |
243 | |
244 | /** | |
245 | * Unregister an ID. This removes an entry from the dynamic store | |
246 | * if there is one. The static locale resource store is | |
247 | * unaffected. | |
248 | * @param ID the given ID. | |
249 | */ | |
250 | void remove(const UnicodeString& ID); | |
251 | ||
252 | //------------------------------------------------------------------ | |
253 | // Public ID and spec management | |
254 | //------------------------------------------------------------------ | |
255 | ||
256 | /** | |
374ca955 A |
257 | * Return a StringEnumeration over the IDs currently registered |
258 | * with the system. | |
259 | * @internal | |
260 | */ | |
261 | StringEnumeration* getAvailableIDs() const; | |
262 | ||
263 | /** | |
264 | * == OBSOLETE - remove in ICU 3.4 == | |
b75a7d8f A |
265 | * Return the number of IDs currently registered with the system. |
266 | * To retrieve the actual IDs, call getAvailableID(i) with | |
267 | * i from 0 to countAvailableIDs() - 1. | |
268 | * @return the number of IDs currently registered with the system. | |
269 | * @internal | |
270 | */ | |
374ca955 | 271 | int32_t countAvailableIDs(void) const; |
b75a7d8f A |
272 | |
273 | /** | |
374ca955 | 274 | * == OBSOLETE - remove in ICU 3.4 == |
b75a7d8f A |
275 | * Return the index-th available ID. index must be between 0 |
276 | * and countAvailableIDs() - 1, inclusive. If index is out of | |
277 | * range, the result of getAvailableID(0) is returned. | |
278 | * @param index the given index. | |
279 | * @return the index-th available ID. index must be between 0 | |
280 | * and countAvailableIDs() - 1, inclusive. If index is out of | |
281 | * range, the result of getAvailableID(0) is returned. | |
282 | * @internal | |
283 | */ | |
374ca955 | 284 | const UnicodeString& getAvailableID(int32_t index) const; |
b75a7d8f A |
285 | |
286 | /** | |
287 | * Return the number of registered source specifiers. | |
288 | * @return the number of registered source specifiers. | |
289 | */ | |
374ca955 A |
290 | int32_t countAvailableSources(void) const; |
291 | ||
b75a7d8f A |
292 | /** |
293 | * Return a registered source specifier. | |
294 | * @param index which specifier to return, from 0 to n-1, where | |
295 | * n = countAvailableSources() | |
296 | * @param result fill-in paramter to receive the source specifier. | |
297 | * If index is out of range, result will be empty. | |
298 | * @return reference to result | |
299 | */ | |
300 | UnicodeString& getAvailableSource(int32_t index, | |
374ca955 A |
301 | UnicodeString& result) const; |
302 | ||
b75a7d8f A |
303 | /** |
304 | * Return the number of registered target specifiers for a given | |
305 | * source specifier. | |
306 | * @param source the given source specifier. | |
307 | * @return the number of registered target specifiers for a given | |
308 | * source specifier. | |
309 | */ | |
374ca955 A |
310 | int32_t countAvailableTargets(const UnicodeString& source) const; |
311 | ||
b75a7d8f A |
312 | /** |
313 | * Return a registered target specifier for a given source. | |
314 | * @param index which specifier to return, from 0 to n-1, where | |
315 | * n = countAvailableTargets(source) | |
316 | * @param source the source specifier | |
317 | * @param result fill-in paramter to receive the target specifier. | |
318 | * If source is invalid or if index is out of range, result will | |
319 | * be empty. | |
320 | * @return reference to result | |
321 | */ | |
322 | UnicodeString& getAvailableTarget(int32_t index, | |
323 | const UnicodeString& source, | |
374ca955 A |
324 | UnicodeString& result) const; |
325 | ||
b75a7d8f A |
326 | /** |
327 | * Return the number of registered variant specifiers for a given | |
328 | * source-target pair. There is always at least one variant: If | |
329 | * just source-target is registered, then the single variant | |
330 | * NO_VARIANT is returned. If source-target/variant is registered | |
331 | * then that variant is returned. | |
332 | * @param source the source specifiers | |
333 | * @param target the target specifiers | |
334 | * @return the number of registered variant specifiers for a given | |
335 | * source-target pair. | |
336 | */ | |
337 | int32_t countAvailableVariants(const UnicodeString& source, | |
374ca955 A |
338 | const UnicodeString& target) const; |
339 | ||
b75a7d8f A |
340 | /** |
341 | * Return a registered variant specifier for a given source-target | |
342 | * pair. If NO_VARIANT is one of the variants, then it will be | |
343 | * at index 0. | |
344 | * @param index which specifier to return, from 0 to n-1, where | |
345 | * n = countAvailableVariants(source, target) | |
346 | * @param source the source specifier | |
347 | * @param target the target specifier | |
348 | * @param result fill-in paramter to receive the variant | |
349 | * specifier. If source is invalid or if target is invalid or if | |
350 | * index is out of range, result will be empty. | |
351 | * @return reference to result | |
352 | */ | |
353 | UnicodeString& getAvailableVariant(int32_t index, | |
354 | const UnicodeString& source, | |
355 | const UnicodeString& target, | |
374ca955 | 356 | UnicodeString& result) const; |
b75a7d8f A |
357 | |
358 | private: | |
359 | ||
360 | //---------------------------------------------------------------- | |
361 | // Private implementation | |
362 | //---------------------------------------------------------------- | |
363 | ||
729e4ab9 | 364 | TransliteratorEntry* find(const UnicodeString& ID); |
374ca955 | 365 | |
729e4ab9 | 366 | TransliteratorEntry* find(UnicodeString& source, |
b75a7d8f A |
367 | UnicodeString& target, |
368 | UnicodeString& variant); | |
369 | ||
729e4ab9 A |
370 | TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src, |
371 | const TransliteratorSpec& trg, | |
374ca955 | 372 | const UnicodeString& variant) const; |
b75a7d8f | 373 | |
729e4ab9 A |
374 | TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src, |
375 | const TransliteratorSpec& trg, | |
b75a7d8f A |
376 | const UnicodeString& variant); |
377 | ||
729e4ab9 A |
378 | static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen, |
379 | const TransliteratorSpec& specToFind, | |
b75a7d8f A |
380 | const UnicodeString& variant, |
381 | UTransDirection direction); | |
382 | ||
383 | void registerEntry(const UnicodeString& source, | |
384 | const UnicodeString& target, | |
385 | const UnicodeString& variant, | |
729e4ab9 | 386 | TransliteratorEntry* adopted, |
b75a7d8f A |
387 | UBool visible); |
388 | ||
389 | void registerEntry(const UnicodeString& ID, | |
729e4ab9 | 390 | TransliteratorEntry* adopted, |
b75a7d8f A |
391 | UBool visible); |
392 | ||
374ca955 | 393 | void registerEntry(const UnicodeString& ID, |
b75a7d8f A |
394 | const UnicodeString& source, |
395 | const UnicodeString& target, | |
396 | const UnicodeString& variant, | |
729e4ab9 | 397 | TransliteratorEntry* adopted, |
b75a7d8f A |
398 | UBool visible); |
399 | ||
400 | void registerSTV(const UnicodeString& source, | |
401 | const UnicodeString& target, | |
402 | const UnicodeString& variant); | |
403 | ||
404 | void removeSTV(const UnicodeString& source, | |
405 | const UnicodeString& target, | |
406 | const UnicodeString& variant); | |
407 | ||
408 | Transliterator* instantiateEntry(const UnicodeString& ID, | |
729e4ab9 | 409 | TransliteratorEntry *entry, |
b75a7d8f | 410 | TransliteratorAlias*& aliasReturn, |
b75a7d8f A |
411 | UErrorCode& status); |
412 | ||
374ca955 A |
413 | /** |
414 | * A StringEnumeration over the registered IDs in this object. | |
415 | */ | |
416 | class Enumeration : public StringEnumeration { | |
417 | public: | |
418 | Enumeration(const TransliteratorRegistry& reg); | |
419 | virtual ~Enumeration(); | |
420 | virtual int32_t count(UErrorCode& status) const; | |
421 | virtual const UnicodeString* snext(UErrorCode& status); | |
422 | virtual void reset(UErrorCode& status); | |
423 | static UClassID U_EXPORT2 getStaticClassID(); | |
424 | virtual UClassID getDynamicClassID() const; | |
425 | private: | |
426 | int32_t index; | |
427 | const TransliteratorRegistry& reg; | |
428 | }; | |
429 | friend class Enumeration; | |
430 | ||
b75a7d8f A |
431 | private: |
432 | ||
433 | /** | |
434 | * Dynamic registry mapping full IDs to Entry objects. This | |
435 | * contains both public and internal entities. The visibility is | |
436 | * controlled by whether an entry is listed in availableIDs and | |
437 | * specDAG or not. | |
438 | */ | |
439 | Hashtable registry; | |
374ca955 | 440 | |
b75a7d8f A |
441 | /** |
442 | * DAG of visible IDs by spec. Hashtable: source => (Hashtable: | |
2ca993e8 | 443 | * target => variant bitmask) |
b75a7d8f A |
444 | */ |
445 | Hashtable specDAG; | |
374ca955 | 446 | |
2ca993e8 A |
447 | /** |
448 | * Vector of all variant names | |
449 | */ | |
450 | UVector variantList; | |
451 | ||
b75a7d8f A |
452 | /** |
453 | * Vector of public full IDs. | |
454 | */ | |
455 | UVector availableIDs; | |
456 | ||
457 | TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class | |
458 | TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class | |
459 | }; | |
460 | ||
461 | U_NAMESPACE_END | |
462 | ||
b331163b A |
463 | U_CFUNC UBool utrans_transliterator_cleanup(void); |
464 | ||
b75a7d8f A |
465 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
466 | ||
467 | #endif | |
468 | //eof |