]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/rbt.cpp
ICU-491.11.1.tar.gz
[apple/icu.git] / icuSources / i18n / rbt.cpp
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
46f4442e 3* Copyright (C) 1999-2008, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6* Date Name Description
7* 11/17/99 aliu Creation.
8**********************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_TRANSLITERATION
14
15#include "unicode/rep.h"
16#include "unicode/uniset.h"
17#include "rbt_pars.h"
18#include "rbt_data.h"
19#include "rbt_rule.h"
20#include "rbt.h"
374ca955 21#include "umutex.h"
b75a7d8f
A
22
23U_NAMESPACE_BEGIN
24
374ca955
A
25UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator)
26
27static UMTX transliteratorDataMutex = NULL;
28static Replaceable *gLockedText = NULL;
b75a7d8f
A
29
30void RuleBasedTransliterator::_construct(const UnicodeString& rules,
31 UTransDirection direction,
32 UParseError& parseError,
33 UErrorCode& status) {
374ca955 34 fData = 0;
b75a7d8f
A
35 isDataOwned = TRUE;
36 if (U_FAILURE(status)) {
37 return;
38 }
39
73c04bcf 40 TransliteratorParser parser(status);
b75a7d8f
A
41 parser.parse(rules, direction, parseError, status);
42 if (U_FAILURE(status)) {
43 return;
44 }
45
73c04bcf
A
46 if (parser.idBlockVector.size() != 0 ||
47 parser.compoundFilter != NULL ||
48 parser.dataVector.size() == 0) {
b75a7d8f
A
49 status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT
50 return;
51 }
52
73c04bcf 53 fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
374ca955
A
54 setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
55}
56
57/**
58 * Constructs a new transliterator from the given rules.
59 * @param id the id for the transliterator.
60 * @param rules rules, separated by ';'
61 * @param direction either FORWARD or REVERSE.
62 * @param adoptedFilter the filter for this transliterator.
63 * @param parseError Struct to recieve information on position
64 * of error if an error is encountered
65 * @param status Output param set to success/failure code.
66 * @exception IllegalArgumentException if rules are malformed
67 * or direction is invalid.
68 */
69RuleBasedTransliterator::RuleBasedTransliterator(
70 const UnicodeString& id,
71 const UnicodeString& rules,
72 UTransDirection direction,
73 UnicodeFilter* adoptedFilter,
74 UParseError& parseError,
75 UErrorCode& status) :
76 Transliterator(id, adoptedFilter) {
77 _construct(rules, direction,parseError,status);
78}
79
80/**
81 * Constructs a new transliterator from the given rules.
82 * @param id the id for the transliterator.
83 * @param rules rules, separated by ';'
84 * @param direction either FORWARD or REVERSE.
85 * @param adoptedFilter the filter for this transliterator.
86 * @param status Output param set to success/failure code.
87 * @exception IllegalArgumentException if rules are malformed
88 * or direction is invalid.
89 */
46f4442e 90/*RuleBasedTransliterator::RuleBasedTransliterator(
374ca955
A
91 const UnicodeString& id,
92 const UnicodeString& rules,
93 UTransDirection direction,
94 UnicodeFilter* adoptedFilter,
95 UErrorCode& status) :
96 Transliterator(id, adoptedFilter) {
97 UParseError parseError;
98 _construct(rules, direction,parseError, status);
46f4442e 99}*/
374ca955
A
100
101/**
102 * Covenience constructor with no filter.
103 */
46f4442e 104/*RuleBasedTransliterator::RuleBasedTransliterator(
374ca955
A
105 const UnicodeString& id,
106 const UnicodeString& rules,
107 UTransDirection direction,
108 UErrorCode& status) :
109 Transliterator(id, 0) {
110 UParseError parseError;
111 _construct(rules, direction,parseError, status);
46f4442e 112}*/
374ca955
A
113
114/**
115 * Covenience constructor with no filter and FORWARD direction.
116 */
46f4442e 117/*RuleBasedTransliterator::RuleBasedTransliterator(
374ca955
A
118 const UnicodeString& id,
119 const UnicodeString& rules,
120 UErrorCode& status) :
121 Transliterator(id, 0) {
122 UParseError parseError;
123 _construct(rules, UTRANS_FORWARD, parseError, status);
46f4442e 124}*/
374ca955
A
125
126/**
127 * Covenience constructor with FORWARD direction.
128 */
46f4442e 129/*RuleBasedTransliterator::RuleBasedTransliterator(
374ca955
A
130 const UnicodeString& id,
131 const UnicodeString& rules,
132 UnicodeFilter* adoptedFilter,
133 UErrorCode& status) :
134 Transliterator(id, adoptedFilter) {
135 UParseError parseError;
136 _construct(rules, UTRANS_FORWARD,parseError, status);
46f4442e 137}*/
b75a7d8f
A
138
139RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
140 const TransliterationRuleData* theData,
141 UnicodeFilter* adoptedFilter) :
142 Transliterator(id, adoptedFilter),
374ca955 143 fData((TransliterationRuleData*)theData), // cast away const
b75a7d8f 144 isDataOwned(FALSE) {
374ca955 145 setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
b75a7d8f
A
146}
147
148/**
149 * Internal constructor.
150 */
151RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
152 TransliterationRuleData* theData,
153 UBool isDataAdopted) :
154 Transliterator(id, 0),
374ca955 155 fData(theData),
b75a7d8f 156 isDataOwned(isDataAdopted) {
374ca955 157 setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
b75a7d8f
A
158}
159
160/**
161 * Copy constructor.
162 */
163RuleBasedTransliterator::RuleBasedTransliterator(
164 const RuleBasedTransliterator& other) :
374ca955 165 Transliterator(other), fData(other.fData),
b75a7d8f
A
166 isDataOwned(other.isDataOwned) {
167
168 // The data object may or may not be owned. If it is not owned we
169 // share it; it is invariant. If it is owned, it's still
170 // invariant, but we need to copy it to prevent double-deletion.
171 // If this becomes a performance issue (if people do a lot of RBT
172 // copying -- unlikely) we can reference count the data object.
173
174 // Only do a deep copy if this is owned data, that is, data that
175 // will be later deleted. System transliterators contain
176 // non-owned data.
177 if (isDataOwned) {
374ca955 178 fData = new TransliterationRuleData(*other.fData);
b75a7d8f
A
179 }
180}
181
182/**
183 * Destructor.
184 */
185RuleBasedTransliterator::~RuleBasedTransliterator() {
186 // Delete the data object only if we own it.
187 if (isDataOwned) {
374ca955 188 delete fData;
b75a7d8f
A
189 }
190}
191
192Transliterator* // Covariant return NOT ALLOWED (for portability)
193RuleBasedTransliterator::clone(void) const {
194 return new RuleBasedTransliterator(*this);
195}
196
197/**
198 * Implements {@link Transliterator#handleTransliterate}.
199 */
200void
201RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index,
202 UBool isIncremental) const {
203 /* We keep contextStart and contextLimit fixed the entire time,
204 * relative to the text -- contextLimit may move numerically if
205 * text is inserted or removed. The start offset moves toward
206 * limit, with replacements happening under it.
207 *
208 * Example: rules 1. ab>x|y
209 * 2. yc>z
210 *
211 * |eabcd begin - no match, advance start
212 * e|abcd match rule 1 - change text & adjust start
213 * ex|ycd match rule 2 - change text & adjust start
214 * exz|d no match, advance start
215 * exzd| done
216 */
217
218 /* A rule like
219 * a>b|a
220 * creates an infinite loop. To prevent that, we put an arbitrary
221 * limit on the number of iterations that we take, one that is
222 * high enough that any reasonable rules are ok, but low enough to
223 * prevent a server from hanging. The limit is 16 times the
224 * number of characters n, unless n is so large that 16n exceeds a
225 * uint32_t.
226 */
227 uint32_t loopCount = 0;
228 uint32_t loopLimit = index.limit - index.start;
229 if (loopLimit >= 0x10000000) {
230 loopLimit = 0xFFFFFFFF;
231 } else {
232 loopLimit <<= 4;
233 }
234
374ca955
A
235 // Transliterator locking. Rule-based Transliterators are not thread safe; concurrent
236 // operations must be prevented.
237 // A Complication: compound transliterators can result in recursive entries to this
238 // function, sometimes with different "This" objects, always with the same text.
239 // Double-locking must be prevented in these cases.
240 //
241
242 // If the transliteration data is exclusively owned by this transliterator object,
243 // we don't need to do any locking. No sharing between transliterators is possible,
244 // so no concurrent access from multiple threads is possible.
245 UBool lockedMutexAtThisLevel = FALSE;
246 if (isDataOwned == FALSE) {
46f4442e
A
247 // Test whether this request is operating on the same text string as some
248 // some other transliteration that is still in progress and holding the
249 // transliteration mutex. If so, do not lock the transliteration
250 // mutex again.
251 UBool needToLock;
252 UMTX_CHECK(NULL, (&text != gLockedText), needToLock);
374ca955
A
253 if (needToLock) {
254 umtx_lock(&transliteratorDataMutex);
255 gLockedText = &text;
256 lockedMutexAtThisLevel = TRUE;
257 }
258 }
259
46f4442e
A
260 // Check to make sure we don't dereference a null pointer.
261 if (fData != NULL) {
262 while (index.start < index.limit &&
263 loopCount <= loopLimit &&
264 fData->ruleSet.transliterate(text, index, isIncremental)) {
265 ++loopCount;
266 }
b75a7d8f 267 }
374ca955
A
268 if (lockedMutexAtThisLevel) {
269 gLockedText = NULL;
270 umtx_unlock(&transliteratorDataMutex);
271 }
b75a7d8f
A
272}
273
274UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource,
275 UBool escapeUnprintable) const {
374ca955 276 return fData->ruleSet.toRules(rulesSource, escapeUnprintable);
b75a7d8f
A
277}
278
279/**
280 * Implement Transliterator framework
281 */
282void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const {
374ca955 283 fData->ruleSet.getSourceTargetSet(result, FALSE);
b75a7d8f
A
284}
285
286/**
287 * Override Transliterator framework
288 */
289UnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const {
374ca955 290 return fData->ruleSet.getSourceTargetSet(result, TRUE);
b75a7d8f
A
291}
292
293U_NAMESPACE_END
294
295#endif /* #if !UCONFIG_NO_TRANSLITERATION */