]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/rbt.cpp
ICU-6.2.9.tar.gz
[apple/icu.git] / icuSources / i18n / rbt.cpp
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
374ca955 3* Copyright (C) 1999-2004, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6* Date Name Description
7* 11/17/99 aliu Creation.
8**********************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_TRANSLITERATION
14
15#include "unicode/rep.h"
16#include "unicode/uniset.h"
17#include "rbt_pars.h"
18#include "rbt_data.h"
19#include "rbt_rule.h"
20#include "rbt.h"
374ca955 21#include "umutex.h"
b75a7d8f
A
22
23U_NAMESPACE_BEGIN
24
374ca955
A
25UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator)
26
27static UMTX transliteratorDataMutex = NULL;
28static Replaceable *gLockedText = NULL;
b75a7d8f
A
29
30void RuleBasedTransliterator::_construct(const UnicodeString& rules,
31 UTransDirection direction,
32 UParseError& parseError,
33 UErrorCode& status) {
374ca955 34 fData = 0;
b75a7d8f
A
35 isDataOwned = TRUE;
36 if (U_FAILURE(status)) {
37 return;
38 }
39
40 TransliteratorParser parser;
41 parser.parse(rules, direction, parseError, status);
42 if (U_FAILURE(status)) {
43 return;
44 }
45
46 if (parser.idBlock.length() != 0 ||
47 parser.compoundFilter != NULL) {
48 status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT
49 return;
50 }
51
374ca955
A
52 fData = parser.orphanData();
53 setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
54}
55
56/**
57 * Constructs a new transliterator from the given rules.
58 * @param id the id for the transliterator.
59 * @param rules rules, separated by ';'
60 * @param direction either FORWARD or REVERSE.
61 * @param adoptedFilter the filter for this transliterator.
62 * @param parseError Struct to recieve information on position
63 * of error if an error is encountered
64 * @param status Output param set to success/failure code.
65 * @exception IllegalArgumentException if rules are malformed
66 * or direction is invalid.
67 */
68RuleBasedTransliterator::RuleBasedTransliterator(
69 const UnicodeString& id,
70 const UnicodeString& rules,
71 UTransDirection direction,
72 UnicodeFilter* adoptedFilter,
73 UParseError& parseError,
74 UErrorCode& status) :
75 Transliterator(id, adoptedFilter) {
76 _construct(rules, direction,parseError,status);
77}
78
79/**
80 * Constructs a new transliterator from the given rules.
81 * @param id the id for the transliterator.
82 * @param rules rules, separated by ';'
83 * @param direction either FORWARD or REVERSE.
84 * @param adoptedFilter the filter for this transliterator.
85 * @param status Output param set to success/failure code.
86 * @exception IllegalArgumentException if rules are malformed
87 * or direction is invalid.
88 */
89RuleBasedTransliterator::RuleBasedTransliterator(
90 const UnicodeString& id,
91 const UnicodeString& rules,
92 UTransDirection direction,
93 UnicodeFilter* adoptedFilter,
94 UErrorCode& status) :
95 Transliterator(id, adoptedFilter) {
96 UParseError parseError;
97 _construct(rules, direction,parseError, status);
98}
99
100/**
101 * Covenience constructor with no filter.
102 */
103RuleBasedTransliterator::RuleBasedTransliterator(
104 const UnicodeString& id,
105 const UnicodeString& rules,
106 UTransDirection direction,
107 UErrorCode& status) :
108 Transliterator(id, 0) {
109 UParseError parseError;
110 _construct(rules, direction,parseError, status);
111}
112
113/**
114 * Covenience constructor with no filter and FORWARD direction.
115 */
116RuleBasedTransliterator::RuleBasedTransliterator(
117 const UnicodeString& id,
118 const UnicodeString& rules,
119 UErrorCode& status) :
120 Transliterator(id, 0) {
121 UParseError parseError;
122 _construct(rules, UTRANS_FORWARD, parseError, status);
123}
124
125/**
126 * Covenience constructor with FORWARD direction.
127 */
128RuleBasedTransliterator::RuleBasedTransliterator(
129 const UnicodeString& id,
130 const UnicodeString& rules,
131 UnicodeFilter* adoptedFilter,
132 UErrorCode& status) :
133 Transliterator(id, adoptedFilter) {
134 UParseError parseError;
135 _construct(rules, UTRANS_FORWARD,parseError, status);
b75a7d8f
A
136}
137
138RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
139 const TransliterationRuleData* theData,
140 UnicodeFilter* adoptedFilter) :
141 Transliterator(id, adoptedFilter),
374ca955 142 fData((TransliterationRuleData*)theData), // cast away const
b75a7d8f 143 isDataOwned(FALSE) {
374ca955 144 setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
b75a7d8f
A
145}
146
147/**
148 * Internal constructor.
149 */
150RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
151 TransliterationRuleData* theData,
152 UBool isDataAdopted) :
153 Transliterator(id, 0),
374ca955 154 fData(theData),
b75a7d8f 155 isDataOwned(isDataAdopted) {
374ca955 156 setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
b75a7d8f
A
157}
158
159/**
160 * Copy constructor.
161 */
162RuleBasedTransliterator::RuleBasedTransliterator(
163 const RuleBasedTransliterator& other) :
374ca955 164 Transliterator(other), fData(other.fData),
b75a7d8f
A
165 isDataOwned(other.isDataOwned) {
166
167 // The data object may or may not be owned. If it is not owned we
168 // share it; it is invariant. If it is owned, it's still
169 // invariant, but we need to copy it to prevent double-deletion.
170 // If this becomes a performance issue (if people do a lot of RBT
171 // copying -- unlikely) we can reference count the data object.
172
173 // Only do a deep copy if this is owned data, that is, data that
174 // will be later deleted. System transliterators contain
175 // non-owned data.
176 if (isDataOwned) {
374ca955 177 fData = new TransliterationRuleData(*other.fData);
b75a7d8f
A
178 }
179}
180
181/**
182 * Destructor.
183 */
184RuleBasedTransliterator::~RuleBasedTransliterator() {
185 // Delete the data object only if we own it.
186 if (isDataOwned) {
374ca955 187 delete fData;
b75a7d8f
A
188 }
189}
190
191Transliterator* // Covariant return NOT ALLOWED (for portability)
192RuleBasedTransliterator::clone(void) const {
193 return new RuleBasedTransliterator(*this);
194}
195
196/**
197 * Implements {@link Transliterator#handleTransliterate}.
198 */
199void
200RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index,
201 UBool isIncremental) const {
202 /* We keep contextStart and contextLimit fixed the entire time,
203 * relative to the text -- contextLimit may move numerically if
204 * text is inserted or removed. The start offset moves toward
205 * limit, with replacements happening under it.
206 *
207 * Example: rules 1. ab>x|y
208 * 2. yc>z
209 *
210 * |eabcd begin - no match, advance start
211 * e|abcd match rule 1 - change text & adjust start
212 * ex|ycd match rule 2 - change text & adjust start
213 * exz|d no match, advance start
214 * exzd| done
215 */
216
217 /* A rule like
218 * a>b|a
219 * creates an infinite loop. To prevent that, we put an arbitrary
220 * limit on the number of iterations that we take, one that is
221 * high enough that any reasonable rules are ok, but low enough to
222 * prevent a server from hanging. The limit is 16 times the
223 * number of characters n, unless n is so large that 16n exceeds a
224 * uint32_t.
225 */
226 uint32_t loopCount = 0;
227 uint32_t loopLimit = index.limit - index.start;
228 if (loopLimit >= 0x10000000) {
229 loopLimit = 0xFFFFFFFF;
230 } else {
231 loopLimit <<= 4;
232 }
233
374ca955
A
234 // Transliterator locking. Rule-based Transliterators are not thread safe; concurrent
235 // operations must be prevented.
236 // A Complication: compound transliterators can result in recursive entries to this
237 // function, sometimes with different "This" objects, always with the same text.
238 // Double-locking must be prevented in these cases.
239 //
240
241 // If the transliteration data is exclusively owned by this transliterator object,
242 // we don't need to do any locking. No sharing between transliterators is possible,
243 // so no concurrent access from multiple threads is possible.
244 UBool lockedMutexAtThisLevel = FALSE;
245 if (isDataOwned == FALSE) {
246 umtx_lock(NULL);
247 // Test whether this request is operating on the same text string as some
248 // some other transliteration that is still in progress and holding the
249 // transliteration mutex. If so, do not lock the transliteration
250 // mutex again.
251 UBool needToLock = (&text != gLockedText);
252 umtx_unlock(NULL);
253 if (needToLock) {
254 umtx_lock(&transliteratorDataMutex);
255 gLockedText = &text;
256 lockedMutexAtThisLevel = TRUE;
257 }
258 }
259
260
b75a7d8f
A
261 while (index.start < index.limit &&
262 loopCount <= loopLimit &&
374ca955 263 fData->ruleSet.transliterate(text, index, isIncremental)) {
b75a7d8f
A
264 ++loopCount;
265 }
374ca955
A
266 if (lockedMutexAtThisLevel) {
267 gLockedText = NULL;
268 umtx_unlock(&transliteratorDataMutex);
269 }
b75a7d8f
A
270}
271
272UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource,
273 UBool escapeUnprintable) const {
374ca955 274 return fData->ruleSet.toRules(rulesSource, escapeUnprintable);
b75a7d8f
A
275}
276
277/**
278 * Implement Transliterator framework
279 */
280void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const {
374ca955 281 fData->ruleSet.getSourceTargetSet(result, FALSE);
b75a7d8f
A
282}
283
284/**
285 * Override Transliterator framework
286 */
287UnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const {
374ca955 288 return fData->ruleSet.getSourceTargetSet(result, TRUE);
b75a7d8f
A
289}
290
291U_NAMESPACE_END
292
293#endif /* #if !UCONFIG_NO_TRANSLITERATION */