2 **********************************************************************
3 * Copyright (C) 1999-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/17/99 aliu Creation.
8 **********************************************************************
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_TRANSLITERATION
15 #include "unicode/rep.h"
16 #include "unicode/uniset.h"
26 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator
)
28 static UMutex transliteratorDataMutex
= U_MUTEX_INITIALIZER
;
29 static Replaceable
*gLockedText
= NULL
;
31 void RuleBasedTransliterator::_construct(const UnicodeString
& rules
,
32 UTransDirection direction
,
33 UParseError
& parseError
,
37 if (U_FAILURE(status
)) {
41 TransliteratorParser
parser(status
);
42 parser
.parse(rules
, direction
, parseError
, status
);
43 if (U_FAILURE(status
)) {
47 if (parser
.idBlockVector
.size() != 0 ||
48 parser
.compoundFilter
!= NULL
||
49 parser
.dataVector
.size() == 0) {
50 status
= U_INVALID_RBT_SYNTAX
; // ::ID blocks disallowed in RBT
54 fData
= (TransliterationRuleData
*)parser
.dataVector
.orphanElementAt(0);
55 setMaximumContextLength(fData
->ruleSet
.getMaximumContextLength());
59 * Constructs a new transliterator from the given rules.
60 * @param id the id for the transliterator.
61 * @param rules rules, separated by ';'
62 * @param direction either FORWARD or REVERSE.
63 * @param adoptedFilter the filter for this transliterator.
64 * @param parseError Struct to recieve information on position
65 * of error if an error is encountered
66 * @param status Output param set to success/failure code.
67 * @exception IllegalArgumentException if rules are malformed
68 * or direction is invalid.
70 RuleBasedTransliterator::RuleBasedTransliterator(
71 const UnicodeString
& id
,
72 const UnicodeString
& rules
,
73 UTransDirection direction
,
74 UnicodeFilter
* adoptedFilter
,
75 UParseError
& parseError
,
77 Transliterator(id
, adoptedFilter
) {
78 _construct(rules
, direction
,parseError
,status
);
82 * Constructs a new transliterator from the given rules.
83 * @param id the id for the transliterator.
84 * @param rules rules, separated by ';'
85 * @param direction either FORWARD or REVERSE.
86 * @param adoptedFilter the filter for this transliterator.
87 * @param status Output param set to success/failure code.
88 * @exception IllegalArgumentException if rules are malformed
89 * or direction is invalid.
91 /*RuleBasedTransliterator::RuleBasedTransliterator(
92 const UnicodeString& id,
93 const UnicodeString& rules,
94 UTransDirection direction,
95 UnicodeFilter* adoptedFilter,
97 Transliterator(id, adoptedFilter) {
98 UParseError parseError;
99 _construct(rules, direction,parseError, status);
103 * Covenience constructor with no filter.
105 /*RuleBasedTransliterator::RuleBasedTransliterator(
106 const UnicodeString& id,
107 const UnicodeString& rules,
108 UTransDirection direction,
109 UErrorCode& status) :
110 Transliterator(id, 0) {
111 UParseError parseError;
112 _construct(rules, direction,parseError, status);
116 * Covenience constructor with no filter and FORWARD direction.
118 /*RuleBasedTransliterator::RuleBasedTransliterator(
119 const UnicodeString& id,
120 const UnicodeString& rules,
121 UErrorCode& status) :
122 Transliterator(id, 0) {
123 UParseError parseError;
124 _construct(rules, UTRANS_FORWARD, parseError, status);
128 * Covenience constructor with FORWARD direction.
130 /*RuleBasedTransliterator::RuleBasedTransliterator(
131 const UnicodeString& id,
132 const UnicodeString& rules,
133 UnicodeFilter* adoptedFilter,
134 UErrorCode& status) :
135 Transliterator(id, adoptedFilter) {
136 UParseError parseError;
137 _construct(rules, UTRANS_FORWARD,parseError, status);
140 RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString
& id
,
141 const TransliterationRuleData
* theData
,
142 UnicodeFilter
* adoptedFilter
) :
143 Transliterator(id
, adoptedFilter
),
144 fData((TransliterationRuleData
*)theData
), // cast away const
146 setMaximumContextLength(fData
->ruleSet
.getMaximumContextLength());
150 * Internal constructor.
152 RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString
& id
,
153 TransliterationRuleData
* theData
,
154 UBool isDataAdopted
) :
155 Transliterator(id
, 0),
157 isDataOwned(isDataAdopted
) {
158 setMaximumContextLength(fData
->ruleSet
.getMaximumContextLength());
164 RuleBasedTransliterator::RuleBasedTransliterator(
165 const RuleBasedTransliterator
& other
) :
166 Transliterator(other
), fData(other
.fData
),
167 isDataOwned(other
.isDataOwned
) {
169 // The data object may or may not be owned. If it is not owned we
170 // share it; it is invariant. If it is owned, it's still
171 // invariant, but we need to copy it to prevent double-deletion.
172 // If this becomes a performance issue (if people do a lot of RBT
173 // copying -- unlikely) we can reference count the data object.
175 // Only do a deep copy if this is owned data, that is, data that
176 // will be later deleted. System transliterators contain
179 fData
= new TransliterationRuleData(*other
.fData
);
186 RuleBasedTransliterator::~RuleBasedTransliterator() {
187 // Delete the data object only if we own it.
193 Transliterator
* // Covariant return NOT ALLOWED (for portability)
194 RuleBasedTransliterator::clone(void) const {
195 return new RuleBasedTransliterator(*this);
199 * Implements {@link Transliterator#handleTransliterate}.
202 RuleBasedTransliterator::handleTransliterate(Replaceable
& text
, UTransPosition
& index
,
203 UBool isIncremental
) const {
204 /* We keep contextStart and contextLimit fixed the entire time,
205 * relative to the text -- contextLimit may move numerically if
206 * text is inserted or removed. The start offset moves toward
207 * limit, with replacements happening under it.
209 * Example: rules 1. ab>x|y
212 * |eabcd begin - no match, advance start
213 * e|abcd match rule 1 - change text & adjust start
214 * ex|ycd match rule 2 - change text & adjust start
215 * exz|d no match, advance start
221 * creates an infinite loop. To prevent that, we put an arbitrary
222 * limit on the number of iterations that we take, one that is
223 * high enough that any reasonable rules are ok, but low enough to
224 * prevent a server from hanging. The limit is 16 times the
225 * number of characters n, unless n is so large that 16n exceeds a
228 uint32_t loopCount
= 0;
229 uint32_t loopLimit
= index
.limit
- index
.start
;
230 if (loopLimit
>= 0x10000000) {
231 loopLimit
= 0xFFFFFFFF;
236 // Transliterator locking. Rule-based Transliterators are not thread safe; concurrent
237 // operations must be prevented.
238 // A Complication: compound transliterators can result in recursive entries to this
239 // function, sometimes with different "This" objects, always with the same text.
240 // Double-locking must be prevented in these cases.
243 UBool lockedMutexAtThisLevel
= FALSE
;
245 // Test whether this request is operating on the same text string as
246 // some other transliteration that is still in progress and holding the
247 // transliteration mutex. If so, do not lock the transliteration
250 // gLockedText variable is protected by the global ICU mutex.
251 // Shared RBT data protected by transliteratorDataMutex.
253 // TODO(andy): Need a better scheme for handling this.
257 needToLock
= (&text
!= gLockedText
);
260 umtx_lock(&transliteratorDataMutex
); // Contention, longish waits possible here.
263 lockedMutexAtThisLevel
= TRUE
;
266 // Check to make sure we don't dereference a null pointer.
268 while (index
.start
< index
.limit
&&
269 loopCount
<= loopLimit
&&
270 fData
->ruleSet
.transliterate(text
, index
, isIncremental
)) {
274 if (lockedMutexAtThisLevel
) {
279 umtx_unlock(&transliteratorDataMutex
);
283 UnicodeString
& RuleBasedTransliterator::toRules(UnicodeString
& rulesSource
,
284 UBool escapeUnprintable
) const {
285 return fData
->ruleSet
.toRules(rulesSource
, escapeUnprintable
);
289 * Implement Transliterator framework
291 void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet
& result
) const {
292 fData
->ruleSet
.getSourceTargetSet(result
, FALSE
);
296 * Override Transliterator framework
298 UnicodeSet
& RuleBasedTransliterator::getTargetSet(UnicodeSet
& result
) const {
299 return fData
->ruleSet
.getSourceTargetSet(result
, TRUE
);
304 #endif /* #if !UCONFIG_NO_TRANSLITERATION */