]>
Commit | Line | Data |
---|---|---|
b75a7d8f | 1 | /* |
46f4442e A |
2 | ********************************************************************** |
3 | * Copyright (C) 1999-2007, International Business Machines Corporation | |
4 | * and others. All Rights Reserved. | |
b75a7d8f A |
5 | ********************************************************************** |
6 | * Date Name Description | |
7 | * 11/17/99 aliu Creation. | |
8 | ********************************************************************** | |
9 | */ | |
10 | #ifndef RBT_SET_H | |
11 | #define RBT_SET_H | |
12 | ||
13 | #include "unicode/utypes.h" | |
14 | ||
15 | #if !UCONFIG_NO_TRANSLITERATION | |
16 | ||
17 | #include "unicode/uobject.h" | |
18 | #include "unicode/utrans.h" | |
19 | #include "uvector.h" | |
20 | ||
21 | U_NAMESPACE_BEGIN | |
22 | ||
23 | class Replaceable; | |
24 | class TransliterationRule; | |
25 | class TransliterationRuleData; | |
26 | class UnicodeFilter; | |
27 | class UnicodeString; | |
28 | class UnicodeSet; | |
29 | ||
30 | /** | |
31 | * A set of rules for a <code>RuleBasedTransliterator</code>. | |
32 | * @author Alan Liu | |
33 | */ | |
46f4442e | 34 | class TransliterationRuleSet : public UMemory { |
b75a7d8f A |
35 | /** |
36 | * Vector of rules, in the order added. This is used while the | |
37 | * rule set is getting built. After that, freeze() reorders and | |
38 | * indexes the rules into rules[]. Any given rule is stored once | |
39 | * in ruleVector, and one or more times in rules[]. ruleVector | |
40 | * owns and deletes the rules. | |
41 | */ | |
42 | UVector* ruleVector; | |
43 | ||
44 | /** | |
45 | * Sorted and indexed table of rules. This is created by freeze() | |
46 | * from the rules in ruleVector. It contains alias pointers to | |
47 | * the rules in ruleVector. It is zero before freeze() is called | |
48 | * and non-zero thereafter. | |
49 | */ | |
50 | TransliterationRule** rules; | |
51 | ||
52 | /** | |
53 | * Index table. For text having a first character c, compute x = c&0xFF. | |
54 | * Now use rules[index[x]..index[x+1]-1]. This index table is created by | |
55 | * freeze(). Before freeze() is called it contains garbage. | |
56 | */ | |
57 | int32_t index[257]; | |
58 | ||
59 | /** | |
60 | * Length of the longest preceding context | |
61 | */ | |
62 | int32_t maxContextLength; | |
63 | ||
64 | public: | |
65 | ||
66 | /** | |
67 | * Construct a new empty rule set. | |
68 | * @param status Output parameter filled in with success or failure status. | |
69 | */ | |
70 | TransliterationRuleSet(UErrorCode& status); | |
71 | ||
72 | /** | |
73 | * Copy constructor. | |
74 | */ | |
75 | TransliterationRuleSet(const TransliterationRuleSet&); | |
76 | ||
77 | /** | |
78 | * Destructor. | |
79 | */ | |
80 | virtual ~TransliterationRuleSet(); | |
81 | ||
82 | /** | |
83 | * Change the data object that this rule belongs to. Used | |
84 | * internally by the TransliterationRuleData copy constructor. | |
85 | * @param data the new data value to be set. | |
86 | */ | |
87 | void setData(const TransliterationRuleData* data); | |
88 | ||
89 | /** | |
90 | * Return the maximum context length. | |
91 | * @return the length of the longest preceding context. | |
92 | */ | |
93 | virtual int32_t getMaximumContextLength(void) const; | |
94 | ||
95 | /** | |
96 | * Add a rule to this set. Rules are added in order, and order is | |
97 | * significant. The last call to this method must be followed by | |
98 | * a call to <code>freeze()</code> before the rule set is used. | |
99 | * This method must <em>not</em> be called after freeze() has been | |
100 | * called. | |
101 | * | |
102 | * @param adoptedRule the rule to add | |
103 | */ | |
104 | virtual void addRule(TransliterationRule* adoptedRule, | |
105 | UErrorCode& status); | |
106 | ||
107 | /** | |
108 | * Check this for masked rules and index it to optimize performance. | |
109 | * The sequence of operations is: (1) add rules to a set using | |
110 | * <code>addRule()</code>; (2) freeze the set using | |
111 | * <code>freeze()</code>; (3) use the rule set. If | |
112 | * <code>addRule()</code> is called after calling this method, it | |
113 | * invalidates this object, and this method must be called again. | |
114 | * That is, <code>freeze()</code> may be called multiple times, | |
115 | * although for optimal performance it shouldn't be. | |
116 | * @param parseError A pointer to UParseError to receive information about errors | |
117 | * occurred. | |
118 | * @param status Output parameter filled in with success or failure status. | |
119 | */ | |
120 | virtual void freeze(UParseError& parseError, UErrorCode& status); | |
121 | ||
122 | /** | |
123 | * Transliterate the given text with the given UTransPosition | |
124 | * indices. Return TRUE if the transliteration should continue | |
125 | * or FALSE if it should halt (because of a U_PARTIAL_MATCH match). | |
126 | * Note that FALSE is only ever returned if isIncremental is TRUE. | |
127 | * @param text the text to be transliterated | |
128 | * @param index the position indices, which will be updated | |
129 | * @param isIncremental if TRUE, assume new text may be inserted | |
130 | * at index.limit, and return FALSE if thre is a partial match. | |
131 | * @return TRUE unless a U_PARTIAL_MATCH has been obtained, | |
132 | * indicating that transliteration should stop until more text | |
133 | * arrives. | |
134 | */ | |
135 | UBool transliterate(Replaceable& text, | |
136 | UTransPosition& index, | |
137 | UBool isIncremental); | |
138 | ||
139 | /** | |
140 | * Create rule strings that represents this rule set. | |
141 | * @param result string to receive the rule strings. Current | |
142 | * contents will be deleted. | |
143 | * @param escapeUnprintable True, will escape the unprintable characters | |
144 | * @return A reference to 'result'. | |
145 | */ | |
146 | virtual UnicodeString& toRules(UnicodeString& result, | |
147 | UBool escapeUnprintable) const; | |
148 | ||
149 | /** | |
150 | * Return the set of all characters that may be modified | |
151 | * (getTarget=false) or emitted (getTarget=true) by this set. | |
152 | */ | |
153 | UnicodeSet& getSourceTargetSet(UnicodeSet& result, | |
374ca955 | 154 | UBool getTarget) const; |
b75a7d8f A |
155 | |
156 | private: | |
157 | ||
158 | TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class | |
159 | }; | |
160 | ||
161 | U_NAMESPACE_END | |
162 | ||
163 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ | |
164 | ||
165 | #endif |