]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/rbt.cpp
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / rbt.cpp
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
2ca993e8 3* Copyright (C) 1999-2015, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6* Date Name Description
7* 11/17/99 aliu Creation.
8**********************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_TRANSLITERATION
14
15#include "unicode/rep.h"
16#include "unicode/uniset.h"
17#include "rbt_pars.h"
18#include "rbt_data.h"
19#include "rbt_rule.h"
20#include "rbt.h"
2ca993e8 21#include "mutex.h"
374ca955 22#include "umutex.h"
b75a7d8f
A
23
24U_NAMESPACE_BEGIN
25
374ca955
A
26UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator)
27
51004dcb 28static UMutex transliteratorDataMutex = U_MUTEX_INITIALIZER;
374ca955 29static Replaceable *gLockedText = NULL;
b75a7d8f
A
30
31void RuleBasedTransliterator::_construct(const UnicodeString& rules,
32 UTransDirection direction,
33 UParseError& parseError,
34 UErrorCode& status) {
374ca955 35 fData = 0;
b75a7d8f
A
36 isDataOwned = TRUE;
37 if (U_FAILURE(status)) {
38 return;
39 }
40
73c04bcf 41 TransliteratorParser parser(status);
b75a7d8f
A
42 parser.parse(rules, direction, parseError, status);
43 if (U_FAILURE(status)) {
44 return;
45 }
46
73c04bcf
A
47 if (parser.idBlockVector.size() != 0 ||
48 parser.compoundFilter != NULL ||
49 parser.dataVector.size() == 0) {
b75a7d8f
A
50 status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT
51 return;
52 }
53
73c04bcf 54 fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
374ca955
A
55 setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
56}
57
58/**
59 * Constructs a new transliterator from the given rules.
60 * @param id the id for the transliterator.
61 * @param rules rules, separated by ';'
62 * @param direction either FORWARD or REVERSE.
63 * @param adoptedFilter the filter for this transliterator.
64 * @param parseError Struct to recieve information on position
65 * of error if an error is encountered
66 * @param status Output param set to success/failure code.
67 * @exception IllegalArgumentException if rules are malformed
68 * or direction is invalid.
69 */
70RuleBasedTransliterator::RuleBasedTransliterator(
71 const UnicodeString& id,
72 const UnicodeString& rules,
73 UTransDirection direction,
74 UnicodeFilter* adoptedFilter,
75 UParseError& parseError,
76 UErrorCode& status) :
77 Transliterator(id, adoptedFilter) {
78 _construct(rules, direction,parseError,status);
79}
80
81/**
82 * Constructs a new transliterator from the given rules.
83 * @param id the id for the transliterator.
84 * @param rules rules, separated by ';'
85 * @param direction either FORWARD or REVERSE.
86 * @param adoptedFilter the filter for this transliterator.
87 * @param status Output param set to success/failure code.
88 * @exception IllegalArgumentException if rules are malformed
89 * or direction is invalid.
90 */
46f4442e 91/*RuleBasedTransliterator::RuleBasedTransliterator(
374ca955
A
92 const UnicodeString& id,
93 const UnicodeString& rules,
94 UTransDirection direction,
95 UnicodeFilter* adoptedFilter,
96 UErrorCode& status) :
97 Transliterator(id, adoptedFilter) {
98 UParseError parseError;
99 _construct(rules, direction,parseError, status);
46f4442e 100}*/
374ca955
A
101
102/**
103 * Covenience constructor with no filter.
104 */
46f4442e 105/*RuleBasedTransliterator::RuleBasedTransliterator(
374ca955
A
106 const UnicodeString& id,
107 const UnicodeString& rules,
108 UTransDirection direction,
109 UErrorCode& status) :
110 Transliterator(id, 0) {
111 UParseError parseError;
112 _construct(rules, direction,parseError, status);
46f4442e 113}*/
374ca955
A
114
115/**
116 * Covenience constructor with no filter and FORWARD direction.
117 */
46f4442e 118/*RuleBasedTransliterator::RuleBasedTransliterator(
374ca955
A
119 const UnicodeString& id,
120 const UnicodeString& rules,
121 UErrorCode& status) :
122 Transliterator(id, 0) {
123 UParseError parseError;
124 _construct(rules, UTRANS_FORWARD, parseError, status);
46f4442e 125}*/
374ca955
A
126
127/**
128 * Covenience constructor with FORWARD direction.
129 */
46f4442e 130/*RuleBasedTransliterator::RuleBasedTransliterator(
374ca955
A
131 const UnicodeString& id,
132 const UnicodeString& rules,
133 UnicodeFilter* adoptedFilter,
134 UErrorCode& status) :
135 Transliterator(id, adoptedFilter) {
136 UParseError parseError;
137 _construct(rules, UTRANS_FORWARD,parseError, status);
46f4442e 138}*/
b75a7d8f
A
139
140RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
141 const TransliterationRuleData* theData,
142 UnicodeFilter* adoptedFilter) :
143 Transliterator(id, adoptedFilter),
374ca955 144 fData((TransliterationRuleData*)theData), // cast away const
b75a7d8f 145 isDataOwned(FALSE) {
374ca955 146 setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
b75a7d8f
A
147}
148
149/**
150 * Internal constructor.
151 */
152RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
153 TransliterationRuleData* theData,
154 UBool isDataAdopted) :
155 Transliterator(id, 0),
374ca955 156 fData(theData),
b75a7d8f 157 isDataOwned(isDataAdopted) {
374ca955 158 setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
b75a7d8f
A
159}
160
161/**
162 * Copy constructor.
163 */
164RuleBasedTransliterator::RuleBasedTransliterator(
165 const RuleBasedTransliterator& other) :
374ca955 166 Transliterator(other), fData(other.fData),
b75a7d8f
A
167 isDataOwned(other.isDataOwned) {
168
169 // The data object may or may not be owned. If it is not owned we
170 // share it; it is invariant. If it is owned, it's still
171 // invariant, but we need to copy it to prevent double-deletion.
172 // If this becomes a performance issue (if people do a lot of RBT
173 // copying -- unlikely) we can reference count the data object.
174
175 // Only do a deep copy if this is owned data, that is, data that
176 // will be later deleted. System transliterators contain
177 // non-owned data.
178 if (isDataOwned) {
374ca955 179 fData = new TransliterationRuleData(*other.fData);
b75a7d8f
A
180 }
181}
182
183/**
184 * Destructor.
185 */
186RuleBasedTransliterator::~RuleBasedTransliterator() {
187 // Delete the data object only if we own it.
188 if (isDataOwned) {
374ca955 189 delete fData;
b75a7d8f
A
190 }
191}
192
193Transliterator* // Covariant return NOT ALLOWED (for portability)
194RuleBasedTransliterator::clone(void) const {
195 return new RuleBasedTransliterator(*this);
196}
197
198/**
199 * Implements {@link Transliterator#handleTransliterate}.
200 */
201void
202RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index,
203 UBool isIncremental) const {
204 /* We keep contextStart and contextLimit fixed the entire time,
205 * relative to the text -- contextLimit may move numerically if
206 * text is inserted or removed. The start offset moves toward
207 * limit, with replacements happening under it.
208 *
209 * Example: rules 1. ab>x|y
210 * 2. yc>z
211 *
212 * |eabcd begin - no match, advance start
213 * e|abcd match rule 1 - change text & adjust start
214 * ex|ycd match rule 2 - change text & adjust start
215 * exz|d no match, advance start
216 * exzd| done
217 */
218
219 /* A rule like
220 * a>b|a
221 * creates an infinite loop. To prevent that, we put an arbitrary
222 * limit on the number of iterations that we take, one that is
223 * high enough that any reasonable rules are ok, but low enough to
224 * prevent a server from hanging. The limit is 16 times the
225 * number of characters n, unless n is so large that 16n exceeds a
226 * uint32_t.
227 */
228 uint32_t loopCount = 0;
229 uint32_t loopLimit = index.limit - index.start;
230 if (loopLimit >= 0x10000000) {
231 loopLimit = 0xFFFFFFFF;
232 } else {
233 loopLimit <<= 4;
234 }
235
374ca955
A
236 // Transliterator locking. Rule-based Transliterators are not thread safe; concurrent
237 // operations must be prevented.
238 // A Complication: compound transliterators can result in recursive entries to this
239 // function, sometimes with different "This" objects, always with the same text.
240 // Double-locking must be prevented in these cases.
241 //
242
374ca955 243 UBool lockedMutexAtThisLevel = FALSE;
2ca993e8
A
244
245 // Test whether this request is operating on the same text string as
246 // some other transliteration that is still in progress and holding the
247 // transliteration mutex. If so, do not lock the transliteration
248 // mutex again.
249 //
250 // gLockedText variable is protected by the global ICU mutex.
251 // Shared RBT data protected by transliteratorDataMutex.
252 //
253 // TODO(andy): Need a better scheme for handling this.
254 UBool needToLock;
255 {
256 Mutex m;
57a6839d 257 needToLock = (&text != gLockedText);
2ca993e8
A
258 }
259 if (needToLock) {
260 umtx_lock(&transliteratorDataMutex); // Contention, longish waits possible here.
261 Mutex m;
262 gLockedText = &text;
263 lockedMutexAtThisLevel = TRUE;
374ca955
A
264 }
265
46f4442e
A
266 // Check to make sure we don't dereference a null pointer.
267 if (fData != NULL) {
268 while (index.start < index.limit &&
269 loopCount <= loopLimit &&
270 fData->ruleSet.transliterate(text, index, isIncremental)) {
271 ++loopCount;
272 }
b75a7d8f 273 }
374ca955 274 if (lockedMutexAtThisLevel) {
2ca993e8
A
275 {
276 Mutex m;
277 gLockedText = NULL;
278 }
374ca955
A
279 umtx_unlock(&transliteratorDataMutex);
280 }
b75a7d8f
A
281}
282
283UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource,
284 UBool escapeUnprintable) const {
374ca955 285 return fData->ruleSet.toRules(rulesSource, escapeUnprintable);
b75a7d8f
A
286}
287
288/**
289 * Implement Transliterator framework
290 */
291void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const {
374ca955 292 fData->ruleSet.getSourceTargetSet(result, FALSE);
b75a7d8f
A
293}
294
295/**
296 * Override Transliterator framework
297 */
298UnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const {
374ca955 299 return fData->ruleSet.getSourceTargetSet(result, TRUE);
b75a7d8f
A
300}
301
302U_NAMESPACE_END
303
304#endif /* #if !UCONFIG_NO_TRANSLITERATION */