]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/i18n/rbt_set.h
ICU-400.39.tar.gz
[apple/icu.git] / icuSources / i18n / rbt_set.h
... / ...
CommitLineData
1/*
2**********************************************************************
3* Copyright (C) 1999-2007, International Business Machines Corporation
4* and others. All Rights Reserved.
5**********************************************************************
6* Date Name Description
7* 11/17/99 aliu Creation.
8**********************************************************************
9*/
10#ifndef RBT_SET_H
11#define RBT_SET_H
12
13#include "unicode/utypes.h"
14
15#if !UCONFIG_NO_TRANSLITERATION
16
17#include "unicode/uobject.h"
18#include "unicode/utrans.h"
19#include "uvector.h"
20
21U_NAMESPACE_BEGIN
22
23class Replaceable;
24class TransliterationRule;
25class TransliterationRuleData;
26class UnicodeFilter;
27class UnicodeString;
28class UnicodeSet;
29
30/**
31 * A set of rules for a <code>RuleBasedTransliterator</code>.
32 * @author Alan Liu
33 */
34class TransliterationRuleSet : public UMemory {
35 /**
36 * Vector of rules, in the order added. This is used while the
37 * rule set is getting built. After that, freeze() reorders and
38 * indexes the rules into rules[]. Any given rule is stored once
39 * in ruleVector, and one or more times in rules[]. ruleVector
40 * owns and deletes the rules.
41 */
42 UVector* ruleVector;
43
44 /**
45 * Sorted and indexed table of rules. This is created by freeze()
46 * from the rules in ruleVector. It contains alias pointers to
47 * the rules in ruleVector. It is zero before freeze() is called
48 * and non-zero thereafter.
49 */
50 TransliterationRule** rules;
51
52 /**
53 * Index table. For text having a first character c, compute x = c&0xFF.
54 * Now use rules[index[x]..index[x+1]-1]. This index table is created by
55 * freeze(). Before freeze() is called it contains garbage.
56 */
57 int32_t index[257];
58
59 /**
60 * Length of the longest preceding context
61 */
62 int32_t maxContextLength;
63
64public:
65
66 /**
67 * Construct a new empty rule set.
68 * @param status Output parameter filled in with success or failure status.
69 */
70 TransliterationRuleSet(UErrorCode& status);
71
72 /**
73 * Copy constructor.
74 */
75 TransliterationRuleSet(const TransliterationRuleSet&);
76
77 /**
78 * Destructor.
79 */
80 virtual ~TransliterationRuleSet();
81
82 /**
83 * Change the data object that this rule belongs to. Used
84 * internally by the TransliterationRuleData copy constructor.
85 * @param data the new data value to be set.
86 */
87 void setData(const TransliterationRuleData* data);
88
89 /**
90 * Return the maximum context length.
91 * @return the length of the longest preceding context.
92 */
93 virtual int32_t getMaximumContextLength(void) const;
94
95 /**
96 * Add a rule to this set. Rules are added in order, and order is
97 * significant. The last call to this method must be followed by
98 * a call to <code>freeze()</code> before the rule set is used.
99 * This method must <em>not</em> be called after freeze() has been
100 * called.
101 *
102 * @param adoptedRule the rule to add
103 */
104 virtual void addRule(TransliterationRule* adoptedRule,
105 UErrorCode& status);
106
107 /**
108 * Check this for masked rules and index it to optimize performance.
109 * The sequence of operations is: (1) add rules to a set using
110 * <code>addRule()</code>; (2) freeze the set using
111 * <code>freeze()</code>; (3) use the rule set. If
112 * <code>addRule()</code> is called after calling this method, it
113 * invalidates this object, and this method must be called again.
114 * That is, <code>freeze()</code> may be called multiple times,
115 * although for optimal performance it shouldn't be.
116 * @param parseError A pointer to UParseError to receive information about errors
117 * occurred.
118 * @param status Output parameter filled in with success or failure status.
119 */
120 virtual void freeze(UParseError& parseError, UErrorCode& status);
121
122 /**
123 * Transliterate the given text with the given UTransPosition
124 * indices. Return TRUE if the transliteration should continue
125 * or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
126 * Note that FALSE is only ever returned if isIncremental is TRUE.
127 * @param text the text to be transliterated
128 * @param index the position indices, which will be updated
129 * @param isIncremental if TRUE, assume new text may be inserted
130 * at index.limit, and return FALSE if thre is a partial match.
131 * @return TRUE unless a U_PARTIAL_MATCH has been obtained,
132 * indicating that transliteration should stop until more text
133 * arrives.
134 */
135 UBool transliterate(Replaceable& text,
136 UTransPosition& index,
137 UBool isIncremental);
138
139 /**
140 * Create rule strings that represents this rule set.
141 * @param result string to receive the rule strings. Current
142 * contents will be deleted.
143 * @param escapeUnprintable True, will escape the unprintable characters
144 * @return A reference to 'result'.
145 */
146 virtual UnicodeString& toRules(UnicodeString& result,
147 UBool escapeUnprintable) const;
148
149 /**
150 * Return the set of all characters that may be modified
151 * (getTarget=false) or emitted (getTarget=true) by this set.
152 */
153 UnicodeSet& getSourceTargetSet(UnicodeSet& result,
154 UBool getTarget) const;
155
156private:
157
158 TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class
159};
160
161U_NAMESPACE_END
162
163#endif /* #if !UCONFIG_NO_TRANSLITERATION */
164
165#endif